{
  "best_metric": 2.1108951568603516,
  "best_model_checkpoint": "/home/nlp/matan_avitan/git/vec2text/saves/train_on_bios/checkpoint-259966",
  "epoch": 49.916666666666664,
  "eval_steps": 625,
  "global_step": 259966,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 9.6e-05,
      "loss": 10.0588,
      "step": 12
    },
    {
      "epoch": 0.0,
      "learning_rate": 0.000192,
      "loss": 9.0806,
      "step": 24
    },
    {
      "epoch": 0.01,
      "learning_rate": 0.000288,
      "loss": 8.8436,
      "step": 36
    },
    {
      "epoch": 0.01,
      "learning_rate": 0.000384,
      "loss": 8.5908,
      "step": 48
    },
    {
      "epoch": 0.01,
      "learning_rate": 0.00048,
      "loss": 8.4078,
      "step": 60
    },
    {
      "epoch": 0.01,
      "learning_rate": 0.000576,
      "loss": 8.254,
      "step": 72
    },
    {
      "epoch": 0.02,
      "learning_rate": 0.0006720000000000001,
      "loss": 8.0822,
      "step": 84
    },
    {
      "epoch": 0.02,
      "learning_rate": 0.000768,
      "loss": 7.9411,
      "step": 96
    },
    {
      "epoch": 0.02,
      "learning_rate": 0.000864,
      "loss": 7.7923,
      "step": 108
    },
    {
      "epoch": 0.02,
      "learning_rate": 0.00096,
      "loss": 7.6285,
      "step": 120
    },
    {
      "epoch": 0.03,
      "learning_rate": 0.001,
      "loss": 7.4645,
      "step": 132
    },
    {
      "epoch": 0.03,
      "learning_rate": 0.001,
      "loss": 7.3039,
      "step": 144
    },
    {
      "epoch": 0.03,
      "learning_rate": 0.001,
      "loss": 7.1368,
      "step": 156
    },
    {
      "epoch": 0.03,
      "learning_rate": 0.001,
      "loss": 6.9908,
      "step": 168
    },
    {
      "epoch": 0.03,
      "learning_rate": 0.001,
      "loss": 6.8706,
      "step": 180
    },
    {
      "epoch": 0.04,
      "learning_rate": 0.001,
      "loss": 6.7611,
      "step": 192
    },
    {
      "epoch": 0.04,
      "learning_rate": 0.001,
      "loss": 6.668,
      "step": 204
    },
    {
      "epoch": 0.04,
      "learning_rate": 0.001,
      "loss": 6.5827,
      "step": 216
    },
    {
      "epoch": 0.04,
      "learning_rate": 0.001,
      "loss": 6.5161,
      "step": 228
    },
    {
      "epoch": 0.05,
      "learning_rate": 0.001,
      "loss": 6.4537,
      "step": 240
    },
    {
      "epoch": 0.05,
      "learning_rate": 0.001,
      "loss": 6.3921,
      "step": 252
    },
    {
      "epoch": 0.05,
      "learning_rate": 0.001,
      "loss": 6.3573,
      "step": 264
    },
    {
      "epoch": 0.05,
      "learning_rate": 0.001,
      "loss": 6.3111,
      "step": 276
    },
    {
      "epoch": 0.06,
      "learning_rate": 0.001,
      "loss": 6.267,
      "step": 288
    },
    {
      "epoch": 0.06,
      "learning_rate": 0.001,
      "loss": 6.2284,
      "step": 300
    },
    {
      "epoch": 0.06,
      "learning_rate": 0.001,
      "loss": 6.1643,
      "step": 312
    },
    {
      "epoch": 0.06,
      "learning_rate": 0.001,
      "loss": 6.1673,
      "step": 324
    },
    {
      "epoch": 0.06,
      "learning_rate": 0.001,
      "loss": 6.1339,
      "step": 336
    },
    {
      "epoch": 0.07,
      "learning_rate": 0.001,
      "loss": 6.1094,
      "step": 348
    },
    {
      "epoch": 0.07,
      "learning_rate": 0.001,
      "loss": 6.0815,
      "step": 360
    },
    {
      "epoch": 0.07,
      "learning_rate": 0.001,
      "loss": 6.058,
      "step": 372
    },
    {
      "epoch": 0.07,
      "learning_rate": 0.001,
      "loss": 6.0346,
      "step": 384
    },
    {
      "epoch": 0.08,
      "learning_rate": 0.001,
      "loss": 5.9907,
      "step": 396
    },
    {
      "epoch": 0.08,
      "learning_rate": 0.001,
      "loss": 5.9701,
      "step": 408
    },
    {
      "epoch": 0.08,
      "learning_rate": 0.001,
      "loss": 5.9687,
      "step": 420
    },
    {
      "epoch": 0.08,
      "learning_rate": 0.001,
      "loss": 5.9362,
      "step": 432
    },
    {
      "epoch": 0.09,
      "learning_rate": 0.001,
      "loss": 5.9169,
      "step": 444
    },
    {
      "epoch": 0.09,
      "learning_rate": 0.001,
      "loss": 5.8856,
      "step": 456
    },
    {
      "epoch": 0.09,
      "learning_rate": 0.001,
      "loss": 5.8743,
      "step": 468
    },
    {
      "epoch": 0.09,
      "learning_rate": 0.001,
      "loss": 5.8533,
      "step": 480
    },
    {
      "epoch": 0.09,
      "learning_rate": 0.001,
      "loss": 5.8393,
      "step": 492
    },
    {
      "epoch": 0.1,
      "learning_rate": 0.001,
      "loss": 5.8127,
      "step": 504
    },
    {
      "epoch": 0.1,
      "learning_rate": 0.001,
      "loss": 5.7933,
      "step": 516
    },
    {
      "epoch": 0.1,
      "learning_rate": 0.001,
      "loss": 5.7799,
      "step": 528
    },
    {
      "epoch": 0.1,
      "learning_rate": 0.001,
      "loss": 5.7717,
      "step": 540
    },
    {
      "epoch": 0.11,
      "learning_rate": 0.001,
      "loss": 5.7435,
      "step": 552
    },
    {
      "epoch": 0.11,
      "learning_rate": 0.001,
      "loss": 5.7376,
      "step": 564
    },
    {
      "epoch": 0.11,
      "learning_rate": 0.001,
      "loss": 5.7223,
      "step": 576
    },
    {
      "epoch": 0.11,
      "learning_rate": 0.001,
      "loss": 5.6961,
      "step": 588
    },
    {
      "epoch": 0.12,
      "learning_rate": 0.001,
      "loss": 5.6851,
      "step": 600
    },
    {
      "epoch": 0.12,
      "learning_rate": 0.001,
      "loss": 5.6679,
      "step": 612
    },
    {
      "epoch": 0.12,
      "learning_rate": 0.001,
      "loss": 5.6604,
      "step": 624
    },
    {
      "epoch": 0.12,
      "eval_ag_news_accuracy": 0.125,
      "eval_ag_news_bleu_score": 0.7076014520866056,
      "eval_ag_news_bleu_score_sem": 0.03414116463338299,
      "eval_ag_news_emb_cos_sim": 0.21779999136924744,
      "eval_ag_news_emb_cos_sim_sem": 0.0047666913456596936,
      "eval_ag_news_emb_top1_equal": 0.015625,
      "eval_ag_news_emb_top1_equal_sem": 0.011004959004867984,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 6.598365306854248,
      "eval_ag_news_n_ngrams_match_1": 2.252,
      "eval_ag_news_n_ngrams_match_2": 0.162,
      "eval_ag_news_n_ngrams_match_3": 0.02,
      "eval_ag_news_num_pred_words": 25.952,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 733.894515808332,
      "eval_ag_news_pred_num_tokens": 62.6171875,
      "eval_ag_news_rouge_score": 0.06449897772590718,
      "eval_ag_news_runtime": 11.491,
      "eval_ag_news_samples_per_second": 43.512,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.08013141855362527,
      "eval_ag_news_token_set_f1_sem": 0.0030168278878077133,
      "eval_ag_news_token_set_precision": 0.049011952780084765,
      "eval_ag_news_token_set_recall": 0.37340358913233485,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 625
    },
    {
      "epoch": 0.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.03975,
      "eval_anthropic_toxic_prompts_bleu_score": 0.6415384184960544,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.04574896028341354,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.16644136607646942,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.006283958794261147,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 6.232697486877441,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 0.846,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.038,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.004,
      "eval_anthropic_toxic_prompts_num_pred_words": 21.81,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 509.1269962366393,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.703125,
      "eval_anthropic_toxic_prompts_rouge_score": 0.050466309613359225,
      "eval_anthropic_toxic_prompts_runtime": 11.0626,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.197,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.09,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.07067156960891947,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.003784809498639409,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.04966828658845425,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.21656006934242233,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 625
    },
    {
      "epoch": 0.12,
      "eval_arxiv_accuracy": 0.1473125,
      "eval_arxiv_bleu_score": 0.7817510336725016,
      "eval_arxiv_bleu_score_sem": 0.02753647830505165,
      "eval_arxiv_emb_cos_sim": 0.2147493064403534,
      "eval_arxiv_emb_cos_sim_sem": 0.003958968396644919,
      "eval_arxiv_emb_top1_equal": 0.0390625,
      "eval_arxiv_emb_top1_equal_sem": 0.017191973462108996,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 6.37405252456665,
      "eval_arxiv_n_ngrams_match_1": 2.168,
      "eval_arxiv_n_ngrams_match_2": 0.128,
      "eval_arxiv_n_ngrams_match_3": 0.002,
      "eval_arxiv_num_pred_words": 28.868,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 586.4295400542229,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.05873686213768187,
      "eval_arxiv_runtime": 10.5122,
      "eval_arxiv_samples_per_second": 47.564,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.06343817217647532,
      "eval_arxiv_token_set_f1_sem": 0.0020879189531666778,
      "eval_arxiv_token_set_precision": 0.03457650963707991,
      "eval_arxiv_token_set_recall": 0.6167626984126982,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 625
    },
    {
      "epoch": 0.12,
      "eval_python_code_alpaca_accuracy": 0.06821875,
      "eval_python_code_alpaca_bleu_score": 1.0450194418170362,
      "eval_python_code_alpaca_bleu_score_sem": 0.035346566865746755,
      "eval_python_code_alpaca_emb_cos_sim": 0.1467011570930481,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.005004210568987345,
      "eval_python_code_alpaca_emb_top1_equal": 0.0,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.0,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 6.127904891967773,
      "eval_python_code_alpaca_n_ngrams_match_1": 1.634,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.008,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.002,
      "eval_python_code_alpaca_num_pred_words": 22.842,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 458.47459992795496,
      "eval_python_code_alpaca_pred_num_tokens": 62.765625,
      "eval_python_code_alpaca_rouge_score": 0.08181687675669569,
      "eval_python_code_alpaca_runtime": 10.8834,
      "eval_python_code_alpaca_samples_per_second": 45.942,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.11634515823362704,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0026517246590390905,
      "eval_python_code_alpaca_token_set_precision": 0.06694592367448347,
      "eval_python_code_alpaca_token_set_recall": 0.6800358585858585,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 625
    },
    {
      "epoch": 0.12,
      "eval_wikibio_accuracy": 0.13315625,
      "eval_wikibio_bleu_score": 0.7767738705191737,
      "eval_wikibio_bleu_score_sem": 0.059009427236449793,
      "eval_wikibio_emb_cos_sim": 0.2736821472644806,
      "eval_wikibio_emb_cos_sim_sem": 0.007596860125700324,
      "eval_wikibio_emb_top1_equal": 0.03125,
      "eval_wikibio_emb_top1_equal_sem": 0.015439349450344106,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 5.738673210144043,
      "eval_wikibio_n_ngrams_match_1": 2.176,
      "eval_wikibio_n_ngrams_match_2": 0.332,
      "eval_wikibio_n_ngrams_match_3": 0.08,
      "eval_wikibio_num_pred_words": 29.816,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 310.65196754973863,
      "eval_wikibio_pred_num_tokens": 62.0625,
      "eval_wikibio_rouge_score": 0.06659998277535806,
      "eval_wikibio_runtime": 9.9756,
      "eval_wikibio_samples_per_second": 50.122,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.09418651497142766,
      "eval_wikibio_token_set_f1_sem": 0.0044443076778509834,
      "eval_wikibio_token_set_precision": 0.06717098926076812,
      "eval_wikibio_token_set_recall": 0.26601288770994785,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 625
    },
    {
      "epoch": 0.12,
      "eval_nq_accuracy": 0.2171875,
      "eval_nq_bleu_score": 1.5076008565197407,
      "eval_nq_bleu_score_sem": 0.08413672986162012,
      "eval_nq_emb_cos_sim": 0.265103280544281,
      "eval_nq_emb_cos_sim_sem": 0.00787410999773908,
      "eval_nq_emb_top1_equal": 0.078125,
      "eval_nq_emb_top1_equal_sem": 0.023813825516515504,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 5.423642635345459,
      "eval_nq_n_ngrams_match_1": 6.076,
      "eval_nq_n_ngrams_match_2": 1.036,
      "eval_nq_n_ngrams_match_3": 0.226,
      "eval_nq_num_pred_words": 35.068,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 226.70341817027813,
      "eval_nq_pred_num_tokens": 61.453125,
      "eval_nq_rouge_score": 0.1211578668951579,
      "eval_nq_runtime": 10.3634,
      "eval_nq_samples_per_second": 48.247,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.15437157625852624,
      "eval_nq_token_set_f1_sem": 0.00485683145133116,
      "eval_nq_token_set_precision": 0.10064937158566319,
      "eval_nq_token_set_recall": 0.501620294333327,
      "eval_nq_true_num_tokens": 64.0,
      "step": 625
    },
    {
      "epoch": 0.12,
      "learning_rate": 0.001,
      "loss": 5.6436,
      "step": 636
    },
    {
      "epoch": 0.12,
      "learning_rate": 0.001,
      "loss": 5.6172,
      "step": 648
    },
    {
      "epoch": 0.13,
      "learning_rate": 0.001,
      "loss": 5.6093,
      "step": 660
    },
    {
      "epoch": 0.13,
      "learning_rate": 0.001,
      "loss": 5.6059,
      "step": 672
    },
    {
      "epoch": 0.13,
      "learning_rate": 0.001,
      "loss": 5.5919,
      "step": 684
    },
    {
      "epoch": 0.13,
      "learning_rate": 0.001,
      "loss": 5.5829,
      "step": 696
    },
    {
      "epoch": 0.14,
      "learning_rate": 0.001,
      "loss": 5.5524,
      "step": 708
    },
    {
      "epoch": 0.14,
      "learning_rate": 0.001,
      "loss": 5.5551,
      "step": 720
    },
    {
      "epoch": 0.14,
      "learning_rate": 0.001,
      "loss": 5.5233,
      "step": 732
    },
    {
      "epoch": 0.14,
      "learning_rate": 0.001,
      "loss": 5.5187,
      "step": 744
    },
    {
      "epoch": 0.15,
      "learning_rate": 0.001,
      "loss": 5.5111,
      "step": 756
    },
    {
      "epoch": 0.15,
      "learning_rate": 0.001,
      "loss": 5.4952,
      "step": 768
    },
    {
      "epoch": 0.15,
      "learning_rate": 0.001,
      "loss": 5.4776,
      "step": 780
    },
    {
      "epoch": 0.15,
      "learning_rate": 0.001,
      "loss": 5.4606,
      "step": 792
    },
    {
      "epoch": 0.15,
      "learning_rate": 0.001,
      "loss": 5.4627,
      "step": 804
    },
    {
      "epoch": 0.16,
      "learning_rate": 0.001,
      "loss": 5.4523,
      "step": 816
    },
    {
      "epoch": 0.16,
      "learning_rate": 0.001,
      "loss": 5.4317,
      "step": 828
    },
    {
      "epoch": 0.16,
      "learning_rate": 0.001,
      "loss": 5.4328,
      "step": 840
    },
    {
      "epoch": 0.16,
      "learning_rate": 0.001,
      "loss": 5.421,
      "step": 852
    },
    {
      "epoch": 0.17,
      "learning_rate": 0.001,
      "loss": 5.4102,
      "step": 864
    },
    {
      "epoch": 0.17,
      "learning_rate": 0.001,
      "loss": 5.3909,
      "step": 876
    },
    {
      "epoch": 0.17,
      "learning_rate": 0.001,
      "loss": 5.3922,
      "step": 888
    },
    {
      "epoch": 0.17,
      "learning_rate": 0.001,
      "loss": 5.382,
      "step": 900
    },
    {
      "epoch": 0.18,
      "learning_rate": 0.001,
      "loss": 5.3687,
      "step": 912
    },
    {
      "epoch": 0.18,
      "learning_rate": 0.001,
      "loss": 5.3497,
      "step": 924
    },
    {
      "epoch": 0.18,
      "learning_rate": 0.001,
      "loss": 5.345,
      "step": 936
    },
    {
      "epoch": 0.18,
      "learning_rate": 0.001,
      "loss": 5.3393,
      "step": 948
    },
    {
      "epoch": 0.18,
      "learning_rate": 0.001,
      "loss": 5.3281,
      "step": 960
    },
    {
      "epoch": 0.19,
      "learning_rate": 0.001,
      "loss": 5.3162,
      "step": 972
    },
    {
      "epoch": 0.19,
      "learning_rate": 0.001,
      "loss": 5.3198,
      "step": 984
    },
    {
      "epoch": 0.19,
      "learning_rate": 0.001,
      "loss": 5.2957,
      "step": 996
    },
    {
      "epoch": 0.19,
      "learning_rate": 0.001,
      "loss": 5.2922,
      "step": 1008
    },
    {
      "epoch": 0.2,
      "learning_rate": 0.001,
      "loss": 5.284,
      "step": 1020
    },
    {
      "epoch": 0.2,
      "learning_rate": 0.001,
      "loss": 5.2713,
      "step": 1032
    },
    {
      "epoch": 0.2,
      "learning_rate": 0.001,
      "loss": 5.2516,
      "step": 1044
    },
    {
      "epoch": 0.2,
      "learning_rate": 0.001,
      "loss": 5.2495,
      "step": 1056
    },
    {
      "epoch": 0.21,
      "learning_rate": 0.001,
      "loss": 5.2442,
      "step": 1068
    },
    {
      "epoch": 0.21,
      "learning_rate": 0.001,
      "loss": 5.2302,
      "step": 1080
    },
    {
      "epoch": 0.21,
      "learning_rate": 0.001,
      "loss": 5.2222,
      "step": 1092
    },
    {
      "epoch": 0.21,
      "learning_rate": 0.001,
      "loss": 5.2026,
      "step": 1104
    },
    {
      "epoch": 0.21,
      "learning_rate": 0.001,
      "loss": 5.1956,
      "step": 1116
    },
    {
      "epoch": 0.22,
      "learning_rate": 0.001,
      "loss": 5.1944,
      "step": 1128
    },
    {
      "epoch": 0.22,
      "learning_rate": 0.001,
      "loss": 5.1968,
      "step": 1140
    },
    {
      "epoch": 0.22,
      "learning_rate": 0.001,
      "loss": 5.1797,
      "step": 1152
    },
    {
      "epoch": 0.22,
      "learning_rate": 0.001,
      "loss": 5.1708,
      "step": 1164
    },
    {
      "epoch": 0.23,
      "learning_rate": 0.001,
      "loss": 5.1591,
      "step": 1176
    },
    {
      "epoch": 0.23,
      "learning_rate": 0.001,
      "loss": 5.1583,
      "step": 1188
    },
    {
      "epoch": 0.23,
      "learning_rate": 0.001,
      "loss": 5.1363,
      "step": 1200
    },
    {
      "epoch": 0.23,
      "learning_rate": 0.001,
      "loss": 5.1344,
      "step": 1212
    },
    {
      "epoch": 0.24,
      "learning_rate": 0.001,
      "loss": 5.1172,
      "step": 1224
    },
    {
      "epoch": 0.24,
      "learning_rate": 0.001,
      "loss": 5.1126,
      "step": 1236
    },
    {
      "epoch": 0.24,
      "learning_rate": 0.001,
      "loss": 5.1075,
      "step": 1248
    },
    {
      "epoch": 0.24,
      "eval_ag_news_accuracy": 0.14090625,
      "eval_ag_news_bleu_score": 1.2347401162864637,
      "eval_ag_news_bleu_score_sem": 0.049933663695827225,
      "eval_ag_news_emb_cos_sim": 0.27115195989608765,
      "eval_ag_news_emb_cos_sim_sem": 0.007412213465285081,
      "eval_ag_news_emb_top1_equal": 0.03125,
      "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 6.150851249694824,
      "eval_ag_news_n_ngrams_match_1": 4.292,
      "eval_ag_news_n_ngrams_match_2": 0.43,
      "eval_ag_news_n_ngrams_match_3": 0.082,
      "eval_ag_news_num_pred_words": 33.876,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 469.1165521854841,
      "eval_ag_news_pred_num_tokens": 62.28125,
      "eval_ag_news_rouge_score": 0.10640590724771785,
      "eval_ag_news_runtime": 10.8415,
      "eval_ag_news_samples_per_second": 46.119,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.1386755571959265,
      "eval_ag_news_token_set_f1_sem": 0.003893407533770644,
      "eval_ag_news_token_set_precision": 0.09695214653796702,
      "eval_ag_news_token_set_recall": 0.3288685225973883,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 1250
    },
    {
      "epoch": 0.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.04528125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.029767975734896,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05067200138853742,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.2049163579940796,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007331099221147833,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.0,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 5.778225421905518,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 1.714,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.134,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.012,
      "eval_anthropic_toxic_prompts_num_pred_words": 30.022,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 323.1851639280978,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.2265625,
      "eval_anthropic_toxic_prompts_rouge_score": 0.06978179678525673,
      "eval_anthropic_toxic_prompts_runtime": 9.717,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.456,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.11283037790319016,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0043284125012183995,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.09766515866904854,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.1815796536295251,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 1250
    },
    {
      "epoch": 0.24,
      "eval_arxiv_accuracy": 0.166125,
      "eval_arxiv_bleu_score": 0.7123611794447666,
      "eval_arxiv_bleu_score_sem": 0.0359630612674515,
      "eval_arxiv_emb_cos_sim": 0.23540450632572174,
      "eval_arxiv_emb_cos_sim_sem": 0.004519463200650853,
      "eval_arxiv_emb_top1_equal": 0.09375,
      "eval_arxiv_emb_top1_equal_sem": 0.025864720141013958,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 5.919602394104004,
      "eval_arxiv_n_ngrams_match_1": 2.836,
      "eval_arxiv_n_ngrams_match_2": 0.14,
      "eval_arxiv_n_ngrams_match_3": 0.004,
      "eval_arxiv_num_pred_words": 16.358,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 372.2636702164734,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.076071754092774,
      "eval_arxiv_runtime": 10.4855,
      "eval_arxiv_samples_per_second": 47.685,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.08661780722950992,
      "eval_arxiv_token_set_f1_sem": 0.0025155717633808484,
      "eval_arxiv_token_set_precision": 0.05187472128557089,
      "eval_arxiv_token_set_recall": 0.34516107648801186,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 1250
    },
    {
      "epoch": 0.24,
      "eval_python_code_alpaca_accuracy": 0.0756875,
      "eval_python_code_alpaca_bleu_score": 1.2812924148682532,
      "eval_python_code_alpaca_bleu_score_sem": 0.051918882153093135,
      "eval_python_code_alpaca_emb_cos_sim": 0.17265605926513672,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.005516537122773645,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 5.793320178985596,
      "eval_python_code_alpaca_n_ngrams_match_1": 2.068,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.05,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.002,
      "eval_python_code_alpaca_num_pred_words": 14.108,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 328.100570572479,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.11775269936913832,
      "eval_python_code_alpaca_runtime": 9.4971,
      "eval_python_code_alpaca_samples_per_second": 52.648,
      "eval_python_code_alpaca_steps_per_second": 0.105,
      "eval_python_code_alpaca_token_set_f1": 0.14949715891184465,
      "eval_python_code_alpaca_token_set_f1_sem": 0.003332086710413263,
      "eval_python_code_alpaca_token_set_precision": 0.10263999337191698,
      "eval_python_code_alpaca_token_set_recall": 0.39765223691856894,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 1250
    },
    {
      "epoch": 0.24,
      "eval_wikibio_accuracy": 0.149375,
      "eval_wikibio_bleu_score": 1.5607964385224509,
      "eval_wikibio_bleu_score_sem": 0.08738723969099557,
      "eval_wikibio_emb_cos_sim": 0.31114959716796875,
      "eval_wikibio_emb_cos_sim_sem": 0.008725569192434765,
      "eval_wikibio_emb_top1_equal": 0.046875,
      "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 5.656447887420654,
      "eval_wikibio_n_ngrams_match_1": 3.778,
      "eval_wikibio_n_ngrams_match_2": 0.722,
      "eval_wikibio_n_ngrams_match_3": 0.166,
      "eval_wikibio_num_pred_words": 24.21,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 286.1304676571906,
      "eval_wikibio_pred_num_tokens": 60.7734375,
      "eval_wikibio_rouge_score": 0.11962860085433155,
      "eval_wikibio_runtime": 9.8295,
      "eval_wikibio_samples_per_second": 50.867,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.14192687736854545,
      "eval_wikibio_token_set_f1_sem": 0.005483227298569409,
      "eval_wikibio_token_set_precision": 0.11340557673465658,
      "eval_wikibio_token_set_recall": 0.2624057504726079,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 1250
    },
    {
      "epoch": 0.24,
      "eval_nq_accuracy": 0.25834375,
      "eval_nq_bleu_score": 2.4396129370396635,
      "eval_nq_bleu_score_sem": 0.12587618744705167,
      "eval_nq_emb_cos_sim": 0.3528878092765808,
      "eval_nq_emb_cos_sim_sem": 0.009834176846607543,
      "eval_nq_emb_top1_equal": 0.0625,
      "eval_nq_emb_top1_equal_sem": 0.02147948148198014,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 4.811520576477051,
      "eval_nq_n_ngrams_match_1": 9.094,
      "eval_nq_n_ngrams_match_2": 1.872,
      "eval_nq_n_ngrams_match_3": 0.484,
      "eval_nq_num_pred_words": 36.498,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 122.91838228692436,
      "eval_nq_pred_num_tokens": 61.9296875,
      "eval_nq_rouge_score": 0.17772016140424224,
      "eval_nq_runtime": 10.0914,
      "eval_nq_samples_per_second": 49.547,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.21363380083645164,
      "eval_nq_token_set_f1_sem": 0.004906987320581662,
      "eval_nq_token_set_precision": 0.14957695896721576,
      "eval_nq_token_set_recall": 0.46750880878337747,
      "eval_nq_true_num_tokens": 64.0,
      "step": 1250
    },
    {
      "epoch": 0.24,
      "learning_rate": 0.001,
      "loss": 5.0774,
      "step": 1260
    },
    {
      "epoch": 0.24,
      "learning_rate": 0.001,
      "loss": 5.0766,
      "step": 1272
    },
    {
      "epoch": 0.25,
      "learning_rate": 0.001,
      "loss": 5.0612,
      "step": 1284
    },
    {
      "epoch": 0.25,
      "learning_rate": 0.001,
      "loss": 5.0689,
      "step": 1296
    },
    {
      "epoch": 0.25,
      "learning_rate": 0.001,
      "loss": 5.0523,
      "step": 1308
    },
    {
      "epoch": 0.25,
      "learning_rate": 0.001,
      "loss": 5.0493,
      "step": 1320
    },
    {
      "epoch": 0.26,
      "learning_rate": 0.001,
      "loss": 5.0359,
      "step": 1332
    },
    {
      "epoch": 0.26,
      "learning_rate": 0.001,
      "loss": 5.0325,
      "step": 1344
    },
    {
      "epoch": 0.26,
      "learning_rate": 0.001,
      "loss": 5.0255,
      "step": 1356
    },
    {
      "epoch": 0.26,
      "learning_rate": 0.001,
      "loss": 5.0155,
      "step": 1368
    },
    {
      "epoch": 0.26,
      "learning_rate": 0.001,
      "loss": 5.003,
      "step": 1380
    },
    {
      "epoch": 0.27,
      "learning_rate": 0.001,
      "loss": 4.9981,
      "step": 1392
    },
    {
      "epoch": 0.27,
      "learning_rate": 0.001,
      "loss": 4.9914,
      "step": 1404
    },
    {
      "epoch": 0.27,
      "learning_rate": 0.001,
      "loss": 4.978,
      "step": 1416
    },
    {
      "epoch": 0.27,
      "learning_rate": 0.001,
      "loss": 4.9747,
      "step": 1428
    },
    {
      "epoch": 0.28,
      "learning_rate": 0.001,
      "loss": 4.9638,
      "step": 1440
    },
    {
      "epoch": 0.28,
      "learning_rate": 0.001,
      "loss": 4.9597,
      "step": 1452
    },
    {
      "epoch": 0.28,
      "learning_rate": 0.001,
      "loss": 4.9502,
      "step": 1464
    },
    {
      "epoch": 0.28,
      "learning_rate": 0.001,
      "loss": 4.9458,
      "step": 1476
    },
    {
      "epoch": 0.29,
      "learning_rate": 0.001,
      "loss": 4.9398,
      "step": 1488
    },
    {
      "epoch": 0.29,
      "learning_rate": 0.001,
      "loss": 4.9153,
      "step": 1500
    },
    {
      "epoch": 0.29,
      "learning_rate": 0.001,
      "loss": 4.9193,
      "step": 1512
    },
    {
      "epoch": 0.29,
      "learning_rate": 0.001,
      "loss": 4.9167,
      "step": 1524
    },
    {
      "epoch": 0.29,
      "learning_rate": 0.001,
      "loss": 4.9042,
      "step": 1536
    },
    {
      "epoch": 0.3,
      "learning_rate": 0.001,
      "loss": 4.8995,
      "step": 1548
    },
    {
      "epoch": 0.3,
      "learning_rate": 0.001,
      "loss": 4.8845,
      "step": 1560
    },
    {
      "epoch": 0.3,
      "learning_rate": 0.001,
      "loss": 4.8734,
      "step": 1572
    },
    {
      "epoch": 0.3,
      "learning_rate": 0.001,
      "loss": 4.8802,
      "step": 1584
    },
    {
      "epoch": 0.31,
      "learning_rate": 0.001,
      "loss": 4.855,
      "step": 1596
    },
    {
      "epoch": 0.31,
      "learning_rate": 0.001,
      "loss": 4.876,
      "step": 1608
    },
    {
      "epoch": 0.31,
      "learning_rate": 0.001,
      "loss": 4.8573,
      "step": 1620
    },
    {
      "epoch": 0.31,
      "learning_rate": 0.001,
      "loss": 4.8555,
      "step": 1632
    },
    {
      "epoch": 0.32,
      "learning_rate": 0.001,
      "loss": 4.8417,
      "step": 1644
    },
    {
      "epoch": 0.32,
      "learning_rate": 0.001,
      "loss": 4.837,
      "step": 1656
    },
    {
      "epoch": 0.32,
      "learning_rate": 0.001,
      "loss": 4.8314,
      "step": 1668
    },
    {
      "epoch": 0.32,
      "learning_rate": 0.001,
      "loss": 4.8279,
      "step": 1680
    },
    {
      "epoch": 0.32,
      "learning_rate": 0.001,
      "loss": 4.8274,
      "step": 1692
    },
    {
      "epoch": 0.33,
      "learning_rate": 0.001,
      "loss": 4.8241,
      "step": 1704
    },
    {
      "epoch": 0.33,
      "learning_rate": 0.001,
      "loss": 4.8036,
      "step": 1716
    },
    {
      "epoch": 0.33,
      "learning_rate": 0.001,
      "loss": 4.8028,
      "step": 1728
    },
    {
      "epoch": 0.33,
      "learning_rate": 0.001,
      "loss": 4.803,
      "step": 1740
    },
    {
      "epoch": 0.34,
      "learning_rate": 0.001,
      "loss": 4.796,
      "step": 1752
    },
    {
      "epoch": 0.34,
      "learning_rate": 0.001,
      "loss": 4.7845,
      "step": 1764
    },
    {
      "epoch": 0.34,
      "learning_rate": 0.001,
      "loss": 4.7982,
      "step": 1776
    },
    {
      "epoch": 0.34,
      "learning_rate": 0.001,
      "loss": 4.7735,
      "step": 1788
    },
    {
      "epoch": 0.35,
      "learning_rate": 0.001,
      "loss": 4.766,
      "step": 1800
    },
    {
      "epoch": 0.35,
      "learning_rate": 0.001,
      "loss": 4.7596,
      "step": 1812
    },
    {
      "epoch": 0.35,
      "learning_rate": 0.001,
      "loss": 4.751,
      "step": 1824
    },
    {
      "epoch": 0.35,
      "learning_rate": 0.001,
      "loss": 4.7453,
      "step": 1836
    },
    {
      "epoch": 0.35,
      "learning_rate": 0.001,
      "loss": 4.741,
      "step": 1848
    },
    {
      "epoch": 0.36,
      "learning_rate": 0.001,
      "loss": 4.744,
      "step": 1860
    },
    {
      "epoch": 0.36,
      "learning_rate": 0.001,
      "loss": 4.7302,
      "step": 1872
    },
    {
      "epoch": 0.36,
      "eval_ag_news_accuracy": 0.1599375,
      "eval_ag_news_bleu_score": 1.5492879738464664,
      "eval_ag_news_bleu_score_sem": 0.05764465401039229,
      "eval_ag_news_emb_cos_sim": 0.3223033845424652,
      "eval_ag_news_emb_cos_sim_sem": 0.009460101175560086,
      "eval_ag_news_emb_top1_equal": 0.03125,
      "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 5.799770832061768,
      "eval_ag_news_n_ngrams_match_1": 5.07,
      "eval_ag_news_n_ngrams_match_2": 0.562,
      "eval_ag_news_n_ngrams_match_3": 0.096,
      "eval_ag_news_num_pred_words": 35.554,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 330.22387451316945,
      "eval_ag_news_pred_num_tokens": 61.015625,
      "eval_ag_news_rouge_score": 0.12322004814007076,
      "eval_ag_news_runtime": 10.0912,
      "eval_ag_news_samples_per_second": 49.548,
      "eval_ag_news_steps_per_second": 0.099,
      "eval_ag_news_token_set_f1": 0.1565384936721467,
      "eval_ag_news_token_set_f1_sem": 0.00383566004187165,
      "eval_ag_news_token_set_precision": 0.11254903170419046,
      "eval_ag_news_token_set_recall": 0.3222754532574314,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 1875
    },
    {
      "epoch": 0.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.0491875,
      "eval_anthropic_toxic_prompts_bleu_score": 1.214368903496161,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06396182987976606,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.22693181037902832,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00968666090118589,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.0,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 5.420243263244629,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 1.762,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.15,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.022,
      "eval_anthropic_toxic_prompts_num_pred_words": 31.006,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 225.93407727423414,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.0859375,
      "eval_anthropic_toxic_prompts_rouge_score": 0.0748185811340395,
      "eval_anthropic_toxic_prompts_runtime": 16.2273,
      "eval_anthropic_toxic_prompts_samples_per_second": 30.812,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.062,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.11576930926170863,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.004504946056209329,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.10694872887009735,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.1707539659626981,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 1875
    },
    {
      "epoch": 0.36,
      "eval_arxiv_accuracy": 0.18940625,
      "eval_arxiv_bleu_score": 1.0254523469573953,
      "eval_arxiv_bleu_score_sem": 0.03942704472519825,
      "eval_arxiv_emb_cos_sim": 0.2698151469230652,
      "eval_arxiv_emb_cos_sim_sem": 0.005664845955782909,
      "eval_arxiv_emb_top1_equal": 0.140625,
      "eval_arxiv_emb_top1_equal_sem": 0.030847557647994725,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 5.602920055389404,
      "eval_arxiv_n_ngrams_match_1": 4.622,
      "eval_arxiv_n_ngrams_match_2": 0.442,
      "eval_arxiv_n_ngrams_match_3": 0.012,
      "eval_arxiv_num_pred_words": 22.514,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 271.21722156321505,
      "eval_arxiv_pred_num_tokens": 62.796875,
      "eval_arxiv_rouge_score": 0.12605652585231003,
      "eval_arxiv_runtime": 9.8688,
      "eval_arxiv_samples_per_second": 50.665,
      "eval_arxiv_steps_per_second": 0.101,
      "eval_arxiv_token_set_f1": 0.116413515795875,
      "eval_arxiv_token_set_f1_sem": 0.0030111242923623062,
      "eval_arxiv_token_set_precision": 0.07199300037495716,
      "eval_arxiv_token_set_recall": 0.3818007419738849,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 1875
    },
    {
      "epoch": 0.36,
      "eval_python_code_alpaca_accuracy": 0.07840625,
      "eval_python_code_alpaca_bleu_score": 1.9046998170337497,
      "eval_python_code_alpaca_bleu_score_sem": 0.053333640205557684,
      "eval_python_code_alpaca_emb_cos_sim": 0.19778388738632202,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.005881379341274675,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 5.510037899017334,
      "eval_python_code_alpaca_n_ngrams_match_1": 2.732,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.104,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.004,
      "eval_python_code_alpaca_num_pred_words": 15.638,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 247.1604940299708,
      "eval_python_code_alpaca_pred_num_tokens": 62.8671875,
      "eval_python_code_alpaca_rouge_score": 0.14933505420530963,
      "eval_python_code_alpaca_runtime": 10.4618,
      "eval_python_code_alpaca_samples_per_second": 47.793,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.1731586772698891,
      "eval_python_code_alpaca_token_set_f1_sem": 0.003806368806522163,
      "eval_python_code_alpaca_token_set_precision": 0.1299234640088901,
      "eval_python_code_alpaca_token_set_recall": 0.32511951610481055,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 1875
    },
    {
      "epoch": 0.36,
      "eval_wikibio_accuracy": 0.157625,
      "eval_wikibio_bleu_score": 1.9069811407916746,
      "eval_wikibio_bleu_score_sem": 0.09573417862296345,
      "eval_wikibio_emb_cos_sim": 0.33923864364624023,
      "eval_wikibio_emb_cos_sim_sem": 0.008974367966095145,
      "eval_wikibio_emb_top1_equal": 0.015625,
      "eval_wikibio_emb_top1_equal_sem": 0.011004959004867984,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 5.6722869873046875,
      "eval_wikibio_n_ngrams_match_1": 4.24,
      "eval_wikibio_n_ngrams_match_2": 0.85,
      "eval_wikibio_n_ngrams_match_3": 0.204,
      "eval_wikibio_num_pred_words": 28.526,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 290.6985987537537,
      "eval_wikibio_pred_num_tokens": 60.6796875,
      "eval_wikibio_rouge_score": 0.12333534589947309,
      "eval_wikibio_runtime": 9.9222,
      "eval_wikibio_samples_per_second": 50.392,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.1597959754397675,
      "eval_wikibio_token_set_f1_sem": 0.005884556348911185,
      "eval_wikibio_token_set_precision": 0.13065982922471994,
      "eval_wikibio_token_set_recall": 0.2603553779766988,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 1875
    },
    {
      "epoch": 0.36,
      "eval_nq_accuracy": 0.2873125,
      "eval_nq_bleu_score": 3.2101702127784275,
      "eval_nq_bleu_score_sem": 0.1718251519000673,
      "eval_nq_emb_cos_sim": 0.40342414379119873,
      "eval_nq_emb_cos_sim_sem": 0.010269663110113403,
      "eval_nq_emb_top1_equal": 0.1015625,
      "eval_nq_emb_top1_equal_sem": 0.026804565886848545,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 4.413300037384033,
      "eval_nq_n_ngrams_match_1": 11.13,
      "eval_nq_n_ngrams_match_2": 2.414,
      "eval_nq_n_ngrams_match_3": 0.668,
      "eval_nq_num_pred_words": 40.122,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 82.54140426988461,
      "eval_nq_pred_num_tokens": 60.0234375,
      "eval_nq_rouge_score": 0.211280173412138,
      "eval_nq_runtime": 10.3836,
      "eval_nq_samples_per_second": 48.153,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.25103762490627235,
      "eval_nq_token_set_f1_sem": 0.004830772676500314,
      "eval_nq_token_set_precision": 0.18247951034071636,
      "eval_nq_token_set_recall": 0.46558207220765374,
      "eval_nq_true_num_tokens": 64.0,
      "step": 1875
    },
    {
      "epoch": 0.36,
      "learning_rate": 0.001,
      "loss": 4.7273,
      "step": 1884
    },
    {
      "epoch": 0.36,
      "learning_rate": 0.001,
      "loss": 4.7098,
      "step": 1896
    },
    {
      "epoch": 0.37,
      "learning_rate": 0.001,
      "loss": 4.726,
      "step": 1908
    },
    {
      "epoch": 0.37,
      "learning_rate": 0.001,
      "loss": 4.7215,
      "step": 1920
    },
    {
      "epoch": 0.37,
      "learning_rate": 0.001,
      "loss": 4.707,
      "step": 1932
    },
    {
      "epoch": 0.37,
      "learning_rate": 0.001,
      "loss": 4.7097,
      "step": 1944
    },
    {
      "epoch": 0.38,
      "learning_rate": 0.001,
      "loss": 4.6983,
      "step": 1956
    },
    {
      "epoch": 0.38,
      "learning_rate": 0.001,
      "loss": 4.6958,
      "step": 1968
    },
    {
      "epoch": 0.38,
      "learning_rate": 0.001,
      "loss": 4.6876,
      "step": 1980
    },
    {
      "epoch": 0.38,
      "learning_rate": 0.001,
      "loss": 4.6728,
      "step": 1992
    },
    {
      "epoch": 0.38,
      "learning_rate": 0.001,
      "loss": 4.6791,
      "step": 2004
    },
    {
      "epoch": 0.39,
      "learning_rate": 0.001,
      "loss": 4.676,
      "step": 2016
    },
    {
      "epoch": 0.39,
      "learning_rate": 0.001,
      "loss": 4.6698,
      "step": 2028
    },
    {
      "epoch": 0.39,
      "learning_rate": 0.001,
      "loss": 4.6587,
      "step": 2040
    },
    {
      "epoch": 0.39,
      "learning_rate": 0.001,
      "loss": 4.6566,
      "step": 2052
    },
    {
      "epoch": 0.4,
      "learning_rate": 0.001,
      "loss": 4.6514,
      "step": 2064
    },
    {
      "epoch": 0.4,
      "learning_rate": 0.001,
      "loss": 4.6358,
      "step": 2076
    },
    {
      "epoch": 0.4,
      "learning_rate": 0.001,
      "loss": 4.6437,
      "step": 2088
    },
    {
      "epoch": 0.4,
      "learning_rate": 0.001,
      "loss": 4.6498,
      "step": 2100
    },
    {
      "epoch": 0.41,
      "learning_rate": 0.001,
      "loss": 4.6218,
      "step": 2112
    },
    {
      "epoch": 0.41,
      "learning_rate": 0.001,
      "loss": 4.6455,
      "step": 2124
    },
    {
      "epoch": 0.41,
      "learning_rate": 0.001,
      "loss": 4.629,
      "step": 2136
    },
    {
      "epoch": 0.41,
      "learning_rate": 0.001,
      "loss": 4.6146,
      "step": 2148
    },
    {
      "epoch": 0.41,
      "learning_rate": 0.001,
      "loss": 4.6128,
      "step": 2160
    },
    {
      "epoch": 0.42,
      "learning_rate": 0.001,
      "loss": 4.6049,
      "step": 2172
    },
    {
      "epoch": 0.42,
      "learning_rate": 0.001,
      "loss": 4.6053,
      "step": 2184
    },
    {
      "epoch": 0.42,
      "learning_rate": 0.001,
      "loss": 4.6002,
      "step": 2196
    },
    {
      "epoch": 0.42,
      "learning_rate": 0.001,
      "loss": 4.6119,
      "step": 2208
    },
    {
      "epoch": 0.43,
      "learning_rate": 0.001,
      "loss": 4.5884,
      "step": 2220
    },
    {
      "epoch": 0.43,
      "learning_rate": 0.001,
      "loss": 4.5804,
      "step": 2232
    },
    {
      "epoch": 0.43,
      "learning_rate": 0.001,
      "loss": 4.5959,
      "step": 2244
    },
    {
      "epoch": 0.43,
      "learning_rate": 0.001,
      "loss": 4.5874,
      "step": 2256
    },
    {
      "epoch": 0.44,
      "learning_rate": 0.001,
      "loss": 4.5806,
      "step": 2268
    },
    {
      "epoch": 0.44,
      "learning_rate": 0.001,
      "loss": 4.5749,
      "step": 2280
    },
    {
      "epoch": 0.44,
      "learning_rate": 0.001,
      "loss": 4.5643,
      "step": 2292
    },
    {
      "epoch": 0.44,
      "learning_rate": 0.001,
      "loss": 4.5557,
      "step": 2304
    },
    {
      "epoch": 0.44,
      "learning_rate": 0.001,
      "loss": 4.5603,
      "step": 2316
    },
    {
      "epoch": 0.45,
      "learning_rate": 0.001,
      "loss": 4.5447,
      "step": 2328
    },
    {
      "epoch": 0.45,
      "learning_rate": 0.001,
      "loss": 4.5369,
      "step": 2340
    },
    {
      "epoch": 0.45,
      "learning_rate": 0.001,
      "loss": 4.545,
      "step": 2352
    },
    {
      "epoch": 0.45,
      "learning_rate": 0.001,
      "loss": 4.5483,
      "step": 2364
    },
    {
      "epoch": 0.46,
      "learning_rate": 0.001,
      "loss": 4.5438,
      "step": 2376
    },
    {
      "epoch": 0.46,
      "learning_rate": 0.001,
      "loss": 4.5408,
      "step": 2388
    },
    {
      "epoch": 0.46,
      "learning_rate": 0.001,
      "loss": 4.5387,
      "step": 2400
    },
    {
      "epoch": 0.46,
      "learning_rate": 0.001,
      "loss": 4.5375,
      "step": 2412
    },
    {
      "epoch": 0.47,
      "learning_rate": 0.001,
      "loss": 4.5164,
      "step": 2424
    },
    {
      "epoch": 0.47,
      "learning_rate": 0.001,
      "loss": 4.5216,
      "step": 2436
    },
    {
      "epoch": 0.47,
      "learning_rate": 0.001,
      "loss": 4.4997,
      "step": 2448
    },
    {
      "epoch": 0.47,
      "learning_rate": 0.001,
      "loss": 4.5179,
      "step": 2460
    },
    {
      "epoch": 0.47,
      "learning_rate": 0.001,
      "loss": 4.5004,
      "step": 2472
    },
    {
      "epoch": 0.48,
      "learning_rate": 0.001,
      "loss": 4.5037,
      "step": 2484
    },
    {
      "epoch": 0.48,
      "learning_rate": 0.001,
      "loss": 4.4875,
      "step": 2496
    },
    {
      "epoch": 0.48,
      "eval_ag_news_accuracy": 0.173125,
      "eval_ag_news_bleu_score": 1.8445900736953396,
      "eval_ag_news_bleu_score_sem": 0.06262184334737668,
      "eval_ag_news_emb_cos_sim": 0.34900230169296265,
      "eval_ag_news_emb_cos_sim_sem": 0.010179393121681672,
      "eval_ag_news_emb_top1_equal": 0.03125,
      "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 5.598976135253906,
      "eval_ag_news_n_ngrams_match_1": 6.342,
      "eval_ag_news_n_ngrams_match_2": 0.744,
      "eval_ag_news_n_ngrams_match_3": 0.124,
      "eval_ag_news_num_pred_words": 40.05,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 270.1496690566818,
      "eval_ag_news_pred_num_tokens": 62.5390625,
      "eval_ag_news_rouge_score": 0.14732981524733085,
      "eval_ag_news_runtime": 10.1945,
      "eval_ag_news_samples_per_second": 49.046,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.18426404790619708,
      "eval_ag_news_token_set_f1_sem": 0.003983054075476968,
      "eval_ag_news_token_set_precision": 0.14057732009040233,
      "eval_ag_news_token_set_recall": 0.30661308725535086,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 2500
    },
    {
      "epoch": 0.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.05278125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.1399798872031897,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.04618923291342836,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.2499910593032837,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009365932603641098,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0234375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.013424676090873717,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 5.1824951171875,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.388,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.244,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.032,
      "eval_anthropic_toxic_prompts_num_pred_words": 39.23,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 178.126703981217,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.75,
      "eval_anthropic_toxic_prompts_rouge_score": 0.08504647404745881,
      "eval_anthropic_toxic_prompts_runtime": 10.2011,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.014,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.1415453788063794,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.004915337389797356,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.1424269313439444,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.1785590425989663,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 2500
    },
    {
      "epoch": 0.48,
      "eval_arxiv_accuracy": 0.2069375,
      "eval_arxiv_bleu_score": 1.0817968392037858,
      "eval_arxiv_bleu_score_sem": 0.043877314138167575,
      "eval_arxiv_emb_cos_sim": 0.2864419221878052,
      "eval_arxiv_emb_cos_sim_sem": 0.006383267306064941,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 5.378298759460449,
      "eval_arxiv_n_ngrams_match_1": 4.564,
      "eval_arxiv_n_ngrams_match_2": 0.464,
      "eval_arxiv_n_ngrams_match_3": 0.02,
      "eval_arxiv_num_pred_words": 23.218,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 216.6533822090273,
      "eval_arxiv_pred_num_tokens": 62.8046875,
      "eval_arxiv_rouge_score": 0.12174756200256615,
      "eval_arxiv_runtime": 10.1745,
      "eval_arxiv_samples_per_second": 49.143,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.12196276733542068,
      "eval_arxiv_token_set_f1_sem": 0.0035965513475812072,
      "eval_arxiv_token_set_precision": 0.07694164495554813,
      "eval_arxiv_token_set_recall": 0.39201808371367247,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 2500
    },
    {
      "epoch": 0.48,
      "eval_python_code_alpaca_accuracy": 0.0823125,
      "eval_python_code_alpaca_bleu_score": 1.9885140531274417,
      "eval_python_code_alpaca_bleu_score_sem": 0.05715660482036896,
      "eval_python_code_alpaca_emb_cos_sim": 0.2083890736103058,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0067747779429612395,
      "eval_python_code_alpaca_emb_top1_equal": 0.0,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.0,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 5.2765960693359375,
      "eval_python_code_alpaca_n_ngrams_match_1": 3.026,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.16,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.012,
      "eval_python_code_alpaca_num_pred_words": 18.666,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 195.70258226560196,
      "eval_python_code_alpaca_pred_num_tokens": 62.90625,
      "eval_python_code_alpaca_rouge_score": 0.14774207463910968,
      "eval_python_code_alpaca_runtime": 9.3776,
      "eval_python_code_alpaca_samples_per_second": 53.319,
      "eval_python_code_alpaca_steps_per_second": 0.107,
      "eval_python_code_alpaca_token_set_f1": 0.19409139501907424,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0041145243937218984,
      "eval_python_code_alpaca_token_set_precision": 0.14745978200103235,
      "eval_python_code_alpaca_token_set_recall": 0.3656227900840133,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 2500
    },
    {
      "epoch": 0.48,
      "eval_wikibio_accuracy": 0.17415625,
      "eval_wikibio_bleu_score": 2.195020967443216,
      "eval_wikibio_bleu_score_sem": 0.11868718815415513,
      "eval_wikibio_emb_cos_sim": 0.3559015989303589,
      "eval_wikibio_emb_cos_sim_sem": 0.010704055531191504,
      "eval_wikibio_emb_top1_equal": 0.046875,
      "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 5.531292915344238,
      "eval_wikibio_n_ngrams_match_1": 4.522,
      "eval_wikibio_n_ngrams_match_2": 1.014,
      "eval_wikibio_n_ngrams_match_3": 0.312,
      "eval_wikibio_num_pred_words": 29.83,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 252.47012259156008,
      "eval_wikibio_pred_num_tokens": 62.765625,
      "eval_wikibio_rouge_score": 0.134333882608771,
      "eval_wikibio_runtime": 9.8046,
      "eval_wikibio_samples_per_second": 50.996,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.16498288286324034,
      "eval_wikibio_token_set_f1_sem": 0.005735143889755407,
      "eval_wikibio_token_set_precision": 0.13822869179740693,
      "eval_wikibio_token_set_recall": 0.2534312355646855,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 2500
    },
    {
      "epoch": 0.48,
      "eval_nq_accuracy": 0.31165625,
      "eval_nq_bleu_score": 3.640149850234891,
      "eval_nq_bleu_score_sem": 0.16268640178805646,
      "eval_nq_emb_cos_sim": 0.44711968302726746,
      "eval_nq_emb_cos_sim_sem": 0.01117219177285864,
      "eval_nq_emb_top1_equal": 0.09375,
      "eval_nq_emb_top1_equal_sem": 0.025864720141013958,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 4.153674602508545,
      "eval_nq_n_ngrams_match_1": 12.522,
      "eval_nq_n_ngrams_match_2": 2.8,
      "eval_nq_n_ngrams_match_3": 0.75,
      "eval_nq_num_pred_words": 43.906,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 63.66752382504825,
      "eval_nq_pred_num_tokens": 62.375,
      "eval_nq_rouge_score": 0.22695565292984432,
      "eval_nq_runtime": 10.08,
      "eval_nq_samples_per_second": 49.603,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.2699038320927348,
      "eval_nq_token_set_f1_sem": 0.004896122256853701,
      "eval_nq_token_set_precision": 0.20154225312448248,
      "eval_nq_token_set_recall": 0.46019639800401324,
      "eval_nq_true_num_tokens": 64.0,
      "step": 2500
    },
    {
      "epoch": 0.48,
      "learning_rate": 0.001,
      "loss": 4.5039,
      "step": 2508
    },
    {
      "epoch": 0.48,
      "learning_rate": 0.001,
      "loss": 4.4939,
      "step": 2520
    },
    {
      "epoch": 0.49,
      "learning_rate": 0.001,
      "loss": 4.4845,
      "step": 2532
    },
    {
      "epoch": 0.49,
      "learning_rate": 0.001,
      "loss": 4.4728,
      "step": 2544
    },
    {
      "epoch": 0.49,
      "learning_rate": 0.001,
      "loss": 4.4719,
      "step": 2556
    },
    {
      "epoch": 0.49,
      "learning_rate": 0.001,
      "loss": 4.4653,
      "step": 2568
    },
    {
      "epoch": 0.5,
      "learning_rate": 0.001,
      "loss": 4.4568,
      "step": 2580
    },
    {
      "epoch": 0.5,
      "learning_rate": 0.001,
      "loss": 4.4561,
      "step": 2592
    },
    {
      "epoch": 0.5,
      "learning_rate": 0.001,
      "loss": 4.4529,
      "step": 2604
    },
    {
      "epoch": 0.5,
      "learning_rate": 0.001,
      "loss": 4.4449,
      "step": 2616
    },
    {
      "epoch": 0.5,
      "learning_rate": 0.001,
      "loss": 4.44,
      "step": 2628
    },
    {
      "epoch": 0.51,
      "learning_rate": 0.001,
      "loss": 4.4493,
      "step": 2640
    },
    {
      "epoch": 0.51,
      "learning_rate": 0.001,
      "loss": 4.4416,
      "step": 2652
    },
    {
      "epoch": 0.51,
      "learning_rate": 0.001,
      "loss": 4.4307,
      "step": 2664
    },
    {
      "epoch": 0.51,
      "learning_rate": 0.001,
      "loss": 4.4218,
      "step": 2676
    },
    {
      "epoch": 0.52,
      "learning_rate": 0.001,
      "loss": 4.4288,
      "step": 2688
    },
    {
      "epoch": 0.52,
      "learning_rate": 0.001,
      "loss": 4.4201,
      "step": 2700
    },
    {
      "epoch": 0.52,
      "learning_rate": 0.001,
      "loss": 4.4118,
      "step": 2712
    },
    {
      "epoch": 0.52,
      "learning_rate": 0.001,
      "loss": 4.4214,
      "step": 2724
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.001,
      "loss": 4.4147,
      "step": 2736
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.001,
      "loss": 4.3993,
      "step": 2748
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.001,
      "loss": 4.4106,
      "step": 2760
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.001,
      "loss": 4.3987,
      "step": 2772
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.001,
      "loss": 4.3855,
      "step": 2784
    },
    {
      "epoch": 0.54,
      "learning_rate": 0.001,
      "loss": 4.3955,
      "step": 2796
    },
    {
      "epoch": 0.54,
      "learning_rate": 0.001,
      "loss": 4.3875,
      "step": 2808
    },
    {
      "epoch": 0.54,
      "learning_rate": 0.001,
      "loss": 4.3776,
      "step": 2820
    },
    {
      "epoch": 0.54,
      "learning_rate": 0.001,
      "loss": 4.3871,
      "step": 2832
    },
    {
      "epoch": 0.55,
      "learning_rate": 0.001,
      "loss": 4.3715,
      "step": 2844
    },
    {
      "epoch": 0.55,
      "learning_rate": 0.001,
      "loss": 4.3594,
      "step": 2856
    },
    {
      "epoch": 0.55,
      "learning_rate": 0.001,
      "loss": 4.3716,
      "step": 2868
    },
    {
      "epoch": 0.55,
      "learning_rate": 0.001,
      "loss": 4.36,
      "step": 2880
    },
    {
      "epoch": 0.56,
      "learning_rate": 0.001,
      "loss": 4.3628,
      "step": 2892
    },
    {
      "epoch": 0.56,
      "learning_rate": 0.001,
      "loss": 4.3503,
      "step": 2904
    },
    {
      "epoch": 0.56,
      "learning_rate": 0.001,
      "loss": 4.3434,
      "step": 2916
    },
    {
      "epoch": 0.56,
      "learning_rate": 0.001,
      "loss": 4.348,
      "step": 2928
    },
    {
      "epoch": 0.56,
      "learning_rate": 0.001,
      "loss": 4.3386,
      "step": 2940
    },
    {
      "epoch": 0.57,
      "learning_rate": 0.001,
      "loss": 4.35,
      "step": 2952
    },
    {
      "epoch": 0.57,
      "learning_rate": 0.001,
      "loss": 4.3359,
      "step": 2964
    },
    {
      "epoch": 0.57,
      "learning_rate": 0.001,
      "loss": 4.3359,
      "step": 2976
    },
    {
      "epoch": 0.57,
      "learning_rate": 0.001,
      "loss": 4.331,
      "step": 2988
    },
    {
      "epoch": 0.58,
      "learning_rate": 0.001,
      "loss": 4.3312,
      "step": 3000
    },
    {
      "epoch": 0.58,
      "learning_rate": 0.001,
      "loss": 4.3276,
      "step": 3012
    },
    {
      "epoch": 0.58,
      "learning_rate": 0.001,
      "loss": 4.3165,
      "step": 3024
    },
    {
      "epoch": 0.58,
      "learning_rate": 0.001,
      "loss": 4.3023,
      "step": 3036
    },
    {
      "epoch": 0.59,
      "learning_rate": 0.001,
      "loss": 4.3061,
      "step": 3048
    },
    {
      "epoch": 0.59,
      "learning_rate": 0.001,
      "loss": 4.3034,
      "step": 3060
    },
    {
      "epoch": 0.59,
      "learning_rate": 0.001,
      "loss": 4.3033,
      "step": 3072
    },
    {
      "epoch": 0.59,
      "learning_rate": 0.001,
      "loss": 4.299,
      "step": 3084
    },
    {
      "epoch": 0.59,
      "learning_rate": 0.001,
      "loss": 4.3062,
      "step": 3096
    },
    {
      "epoch": 0.6,
      "learning_rate": 0.001,
      "loss": 4.3031,
      "step": 3108
    },
    {
      "epoch": 0.6,
      "learning_rate": 0.001,
      "loss": 4.2838,
      "step": 3120
    },
    {
      "epoch": 0.6,
      "eval_ag_news_accuracy": 0.187625,
      "eval_ag_news_bleu_score": 1.8920297212136834,
      "eval_ag_news_bleu_score_sem": 0.0733760096541259,
      "eval_ag_news_emb_cos_sim": 0.3659282922744751,
      "eval_ag_news_emb_cos_sim_sem": 0.011287593795348515,
      "eval_ag_news_emb_top1_equal": 0.03125,
      "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 5.389096736907959,
      "eval_ag_news_n_ngrams_match_1": 6.128,
      "eval_ag_news_n_ngrams_match_2": 0.796,
      "eval_ag_news_n_ngrams_match_3": 0.156,
      "eval_ag_news_num_pred_words": 39.094,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 219.00547662150623,
      "eval_ag_news_pred_num_tokens": 62.9921875,
      "eval_ag_news_rouge_score": 0.14004064993515936,
      "eval_ag_news_runtime": 10.1745,
      "eval_ag_news_samples_per_second": 49.142,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.17740503442976457,
      "eval_ag_news_token_set_f1_sem": 0.004214815177846998,
      "eval_ag_news_token_set_precision": 0.13406466089971986,
      "eval_ag_news_token_set_recall": 0.31599748653246834,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 3125
    },
    {
      "epoch": 0.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.05465625,
      "eval_anthropic_toxic_prompts_bleu_score": 1.075822339485072,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.048404389862727715,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.26767683029174805,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010647807927675683,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 5.050642490386963,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.176,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.254,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.032,
      "eval_anthropic_toxic_prompts_num_pred_words": 36.548,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 156.12273962943607,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9921875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.07795176579412504,
      "eval_anthropic_toxic_prompts_runtime": 9.8737,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.639,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.13360386866283155,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005031129819239987,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.12835372473219997,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.19578409827717358,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 3125
    },
    {
      "epoch": 0.6,
      "eval_arxiv_accuracy": 0.22521875,
      "eval_arxiv_bleu_score": 1.2205938925532436,
      "eval_arxiv_bleu_score_sem": 0.0468696597158843,
      "eval_arxiv_emb_cos_sim": 0.2796843647956848,
      "eval_arxiv_emb_cos_sim_sem": 0.00787956538044321,
      "eval_arxiv_emb_top1_equal": 0.140625,
      "eval_arxiv_emb_top1_equal_sem": 0.030847557647994725,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 5.159443378448486,
      "eval_arxiv_n_ngrams_match_1": 5.176,
      "eval_arxiv_n_ngrams_match_2": 0.538,
      "eval_arxiv_n_ngrams_match_3": 0.024,
      "eval_arxiv_num_pred_words": 24.594,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 174.06753889108148,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.13216383019848524,
      "eval_arxiv_runtime": 9.8668,
      "eval_arxiv_samples_per_second": 50.675,
      "eval_arxiv_steps_per_second": 0.101,
      "eval_arxiv_token_set_f1": 0.13262861383240265,
      "eval_arxiv_token_set_f1_sem": 0.003971516873948744,
      "eval_arxiv_token_set_precision": 0.08465219272068342,
      "eval_arxiv_token_set_recall": 0.40846352640942785,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 3125
    },
    {
      "epoch": 0.6,
      "eval_python_code_alpaca_accuracy": 0.08265625,
      "eval_python_code_alpaca_bleu_score": 1.9714998209550776,
      "eval_python_code_alpaca_bleu_score_sem": 0.058439947030860105,
      "eval_python_code_alpaca_emb_cos_sim": 0.2215101569890976,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007273838776178736,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 5.178214073181152,
      "eval_python_code_alpaca_n_ngrams_match_1": 2.906,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.224,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.022,
      "eval_python_code_alpaca_num_pred_words": 20.886,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 177.36576569008702,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.12684910688521828,
      "eval_python_code_alpaca_runtime": 9.6209,
      "eval_python_code_alpaca_samples_per_second": 51.97,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.18422290235573677,
      "eval_python_code_alpaca_token_set_f1_sem": 0.004977650352202118,
      "eval_python_code_alpaca_token_set_precision": 0.13892467991777335,
      "eval_python_code_alpaca_token_set_recall": 0.36219662137827774,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 3125
    },
    {
      "epoch": 0.6,
      "eval_wikibio_accuracy": 0.206375,
      "eval_wikibio_bleu_score": 1.1004412593994752,
      "eval_wikibio_bleu_score_sem": 0.0834567302990565,
      "eval_wikibio_emb_cos_sim": 0.3168962001800537,
      "eval_wikibio_emb_cos_sim_sem": 0.010303981840057905,
      "eval_wikibio_emb_top1_equal": 0.0703125,
      "eval_wikibio_emb_top1_equal_sem": 0.022687306110270106,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 5.310210704803467,
      "eval_wikibio_n_ngrams_match_1": 2.192,
      "eval_wikibio_n_ngrams_match_2": 0.454,
      "eval_wikibio_n_ngrams_match_3": 0.13,
      "eval_wikibio_num_pred_words": 26.6,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 202.39286904538946,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.06197691456225775,
      "eval_wikibio_runtime": 10.3005,
      "eval_wikibio_samples_per_second": 48.541,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.07692352378772646,
      "eval_wikibio_token_set_f1_sem": 0.0053252201477126705,
      "eval_wikibio_token_set_precision": 0.06385281469822855,
      "eval_wikibio_token_set_recall": 0.13676271463183348,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 3125
    },
    {
      "epoch": 0.6,
      "eval_nq_accuracy": 0.33565625,
      "eval_nq_bleu_score": 3.6132990904335807,
      "eval_nq_bleu_score_sem": 0.1731479228516426,
      "eval_nq_emb_cos_sim": 0.44149720668792725,
      "eval_nq_emb_cos_sim_sem": 0.012091645029459032,
      "eval_nq_emb_top1_equal": 0.125,
      "eval_nq_emb_top1_equal_sem": 0.02934655822437397,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.933677911758423,
      "eval_nq_n_ngrams_match_1": 12.394,
      "eval_nq_n_ngrams_match_2": 2.834,
      "eval_nq_n_ngrams_match_3": 0.772,
      "eval_nq_num_pred_words": 42.312,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 51.094553773691246,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.22498549500134118,
      "eval_nq_runtime": 10.3286,
      "eval_nq_samples_per_second": 48.409,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.2641090096234247,
      "eval_nq_token_set_f1_sem": 0.0055344501872298885,
      "eval_nq_token_set_precision": 0.19725537998809747,
      "eval_nq_token_set_recall": 0.45519858497107596,
      "eval_nq_true_num_tokens": 64.0,
      "step": 3125
    },
    {
      "epoch": 0.6,
      "learning_rate": 0.001,
      "loss": 4.2847,
      "step": 3132
    },
    {
      "epoch": 0.6,
      "learning_rate": 0.001,
      "loss": 4.2833,
      "step": 3144
    },
    {
      "epoch": 0.61,
      "learning_rate": 0.001,
      "loss": 4.2858,
      "step": 3156
    },
    {
      "epoch": 0.61,
      "learning_rate": 0.001,
      "loss": 4.2695,
      "step": 3168
    },
    {
      "epoch": 0.61,
      "learning_rate": 0.001,
      "loss": 4.2798,
      "step": 3180
    },
    {
      "epoch": 0.61,
      "learning_rate": 0.001,
      "loss": 4.2716,
      "step": 3192
    },
    {
      "epoch": 0.62,
      "learning_rate": 0.001,
      "loss": 4.2707,
      "step": 3204
    },
    {
      "epoch": 0.62,
      "learning_rate": 0.001,
      "loss": 4.2579,
      "step": 3216
    },
    {
      "epoch": 0.62,
      "learning_rate": 0.001,
      "loss": 4.2496,
      "step": 3228
    },
    {
      "epoch": 0.62,
      "learning_rate": 0.001,
      "loss": 4.2648,
      "step": 3240
    },
    {
      "epoch": 0.62,
      "learning_rate": 0.001,
      "loss": 4.2558,
      "step": 3252
    },
    {
      "epoch": 0.63,
      "learning_rate": 0.001,
      "loss": 4.242,
      "step": 3264
    },
    {
      "epoch": 0.63,
      "learning_rate": 0.001,
      "loss": 4.2317,
      "step": 3276
    },
    {
      "epoch": 0.63,
      "learning_rate": 0.001,
      "loss": 4.2625,
      "step": 3288
    },
    {
      "epoch": 0.63,
      "learning_rate": 0.001,
      "loss": 4.2318,
      "step": 3300
    },
    {
      "epoch": 0.64,
      "learning_rate": 0.001,
      "loss": 4.2332,
      "step": 3312
    },
    {
      "epoch": 0.64,
      "learning_rate": 0.001,
      "loss": 4.2369,
      "step": 3324
    },
    {
      "epoch": 0.64,
      "learning_rate": 0.001,
      "loss": 4.232,
      "step": 3336
    },
    {
      "epoch": 0.64,
      "learning_rate": 0.001,
      "loss": 4.2251,
      "step": 3348
    },
    {
      "epoch": 0.65,
      "learning_rate": 0.001,
      "loss": 4.23,
      "step": 3360
    },
    {
      "epoch": 0.65,
      "learning_rate": 0.001,
      "loss": 4.2216,
      "step": 3372
    },
    {
      "epoch": 0.65,
      "learning_rate": 0.001,
      "loss": 4.216,
      "step": 3384
    },
    {
      "epoch": 0.65,
      "learning_rate": 0.001,
      "loss": 4.2182,
      "step": 3396
    },
    {
      "epoch": 0.65,
      "learning_rate": 0.001,
      "loss": 4.2014,
      "step": 3408
    },
    {
      "epoch": 0.66,
      "learning_rate": 0.001,
      "loss": 4.2048,
      "step": 3420
    },
    {
      "epoch": 0.66,
      "learning_rate": 0.001,
      "loss": 4.2042,
      "step": 3432
    },
    {
      "epoch": 0.66,
      "learning_rate": 0.001,
      "loss": 4.1948,
      "step": 3444
    },
    {
      "epoch": 0.66,
      "learning_rate": 0.001,
      "loss": 4.1884,
      "step": 3456
    },
    {
      "epoch": 0.67,
      "learning_rate": 0.001,
      "loss": 4.1988,
      "step": 3468
    },
    {
      "epoch": 0.67,
      "learning_rate": 0.001,
      "loss": 4.2,
      "step": 3480
    },
    {
      "epoch": 0.67,
      "learning_rate": 0.001,
      "loss": 4.1891,
      "step": 3492
    },
    {
      "epoch": 0.67,
      "learning_rate": 0.001,
      "loss": 4.1916,
      "step": 3504
    },
    {
      "epoch": 0.68,
      "learning_rate": 0.001,
      "loss": 4.1804,
      "step": 3516
    },
    {
      "epoch": 0.68,
      "learning_rate": 0.001,
      "loss": 4.1824,
      "step": 3528
    },
    {
      "epoch": 0.68,
      "learning_rate": 0.001,
      "loss": 4.1767,
      "step": 3540
    },
    {
      "epoch": 0.68,
      "learning_rate": 0.001,
      "loss": 4.1624,
      "step": 3552
    },
    {
      "epoch": 0.68,
      "learning_rate": 0.001,
      "loss": 4.1738,
      "step": 3564
    },
    {
      "epoch": 0.69,
      "learning_rate": 0.001,
      "loss": 4.1647,
      "step": 3576
    },
    {
      "epoch": 0.69,
      "learning_rate": 0.001,
      "loss": 4.1543,
      "step": 3588
    },
    {
      "epoch": 0.69,
      "learning_rate": 0.001,
      "loss": 4.169,
      "step": 3600
    },
    {
      "epoch": 0.69,
      "learning_rate": 0.001,
      "loss": 4.1549,
      "step": 3612
    },
    {
      "epoch": 0.7,
      "learning_rate": 0.001,
      "loss": 4.1656,
      "step": 3624
    },
    {
      "epoch": 0.7,
      "learning_rate": 0.001,
      "loss": 4.1506,
      "step": 3636
    },
    {
      "epoch": 0.7,
      "learning_rate": 0.001,
      "loss": 4.1565,
      "step": 3648
    },
    {
      "epoch": 0.7,
      "learning_rate": 0.001,
      "loss": 4.145,
      "step": 3660
    },
    {
      "epoch": 0.71,
      "learning_rate": 0.001,
      "loss": 4.1563,
      "step": 3672
    },
    {
      "epoch": 0.71,
      "learning_rate": 0.001,
      "loss": 4.1406,
      "step": 3684
    },
    {
      "epoch": 0.71,
      "learning_rate": 0.001,
      "loss": 4.1233,
      "step": 3696
    },
    {
      "epoch": 0.71,
      "learning_rate": 0.001,
      "loss": 4.1229,
      "step": 3708
    },
    {
      "epoch": 0.71,
      "learning_rate": 0.001,
      "loss": 4.1353,
      "step": 3720
    },
    {
      "epoch": 0.72,
      "learning_rate": 0.001,
      "loss": 4.1276,
      "step": 3732
    },
    {
      "epoch": 0.72,
      "learning_rate": 0.001,
      "loss": 4.1091,
      "step": 3744
    },
    {
      "epoch": 0.72,
      "eval_ag_news_accuracy": 0.1999375,
      "eval_ag_news_bleu_score": 1.8619877025600262,
      "eval_ag_news_bleu_score_sem": 0.0650624380628498,
      "eval_ag_news_emb_cos_sim": 0.38478976488113403,
      "eval_ag_news_emb_cos_sim_sem": 0.011722462344105887,
      "eval_ag_news_emb_top1_equal": 0.0546875,
      "eval_ag_news_emb_top1_equal_sem": 0.020175758285348722,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 5.255814552307129,
      "eval_ag_news_n_ngrams_match_1": 6.242,
      "eval_ag_news_n_ngrams_match_2": 0.806,
      "eval_ag_news_n_ngrams_match_3": 0.142,
      "eval_ag_news_num_pred_words": 38.902,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 191.6775536766177,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.14476029481218228,
      "eval_ag_news_runtime": 10.3413,
      "eval_ag_news_samples_per_second": 48.35,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.18357471062976743,
      "eval_ag_news_token_set_f1_sem": 0.0041610935295574025,
      "eval_ag_news_token_set_precision": 0.13850072206670916,
      "eval_ag_news_token_set_recall": 0.32681119622119503,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 3750
    },
    {
      "epoch": 0.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.0558125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.2702086804584747,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0557275274868974,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.2815111577510834,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011618550606191295,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.982417106628418,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.44,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.272,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.052,
      "eval_anthropic_toxic_prompts_num_pred_words": 37.69,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 145.8264340484558,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.08829251290210152,
      "eval_anthropic_toxic_prompts_runtime": 9.7406,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.331,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.1470798080901539,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.004943615235098987,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.14105713088319935,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2037022235617993,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 3750
    },
    {
      "epoch": 0.72,
      "eval_arxiv_accuracy": 0.230375,
      "eval_arxiv_bleu_score": 1.4517498825506656,
      "eval_arxiv_bleu_score_sem": 0.05051521468972786,
      "eval_arxiv_emb_cos_sim": 0.3207044303417206,
      "eval_arxiv_emb_cos_sim_sem": 0.007364829870846708,
      "eval_arxiv_emb_top1_equal": 0.1328125,
      "eval_arxiv_emb_top1_equal_sem": 0.030114394778901498,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 5.018895626068115,
      "eval_arxiv_n_ngrams_match_1": 5.57,
      "eval_arxiv_n_ngrams_match_2": 0.68,
      "eval_arxiv_n_ngrams_match_3": 0.044,
      "eval_arxiv_num_pred_words": 28.4,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 151.24418139694512,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.13883675375698562,
      "eval_arxiv_runtime": 11.3887,
      "eval_arxiv_samples_per_second": 43.903,
      "eval_arxiv_steps_per_second": 0.088,
      "eval_arxiv_token_set_f1": 0.1404308603982505,
      "eval_arxiv_token_set_f1_sem": 0.0038565303084726805,
      "eval_arxiv_token_set_precision": 0.08953093709897436,
      "eval_arxiv_token_set_recall": 0.43586337262382197,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 3750
    },
    {
      "epoch": 0.72,
      "eval_python_code_alpaca_accuracy": 0.0875625,
      "eval_python_code_alpaca_bleu_score": 1.8910110482747022,
      "eval_python_code_alpaca_bleu_score_sem": 0.0552021381501231,
      "eval_python_code_alpaca_emb_cos_sim": 0.23446616530418396,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0076346138237984645,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 5.079406261444092,
      "eval_python_code_alpaca_n_ngrams_match_1": 2.754,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.17,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.014,
      "eval_python_code_alpaca_num_pred_words": 19.398,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 160.67862650568006,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.131813315690313,
      "eval_python_code_alpaca_runtime": 10.4434,
      "eval_python_code_alpaca_samples_per_second": 47.877,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.18785229022437347,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0045256177229473235,
      "eval_python_code_alpaca_token_set_precision": 0.1364999232069587,
      "eval_python_code_alpaca_token_set_recall": 0.39858400063988386,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 3750
    },
    {
      "epoch": 0.72,
      "eval_wikibio_accuracy": 0.22246875,
      "eval_wikibio_bleu_score": 1.4159646195438642,
      "eval_wikibio_bleu_score_sem": 0.09932930841696949,
      "eval_wikibio_emb_cos_sim": 0.35904812812805176,
      "eval_wikibio_emb_cos_sim_sem": 0.01094617600499643,
      "eval_wikibio_emb_top1_equal": 0.0703125,
      "eval_wikibio_emb_top1_equal_sem": 0.022687306110270106,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 5.181839942932129,
      "eval_wikibio_n_ngrams_match_1": 2.942,
      "eval_wikibio_n_ngrams_match_2": 0.632,
      "eval_wikibio_n_ngrams_match_3": 0.19,
      "eval_wikibio_num_pred_words": 27.358,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 178.01003817296467,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.08354534387190918,
      "eval_wikibio_runtime": 9.9779,
      "eval_wikibio_samples_per_second": 50.111,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.10982181694521252,
      "eval_wikibio_token_set_f1_sem": 0.005646462481343112,
      "eval_wikibio_token_set_precision": 0.08833275999052514,
      "eval_wikibio_token_set_recall": 0.19017553563916095,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 3750
    },
    {
      "epoch": 0.72,
      "eval_nq_accuracy": 0.35415625,
      "eval_nq_bleu_score": 3.813795763528188,
      "eval_nq_bleu_score_sem": 0.18499382516869395,
      "eval_nq_emb_cos_sim": 0.4574257731437683,
      "eval_nq_emb_cos_sim_sem": 0.011819713562866744,
      "eval_nq_emb_top1_equal": 0.09375,
      "eval_nq_emb_top1_equal_sem": 0.025864720141013958,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.7624354362487793,
      "eval_nq_n_ngrams_match_1": 12.78,
      "eval_nq_n_ngrams_match_2": 3.006,
      "eval_nq_n_ngrams_match_3": 0.844,
      "eval_nq_num_pred_words": 43.766,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 43.05315160675132,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.22971694995211947,
      "eval_nq_runtime": 10.2976,
      "eval_nq_samples_per_second": 48.555,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.2739734811777524,
      "eval_nq_token_set_f1_sem": 0.005255913027903776,
      "eval_nq_token_set_precision": 0.20585654841798381,
      "eval_nq_token_set_recall": 0.4722101911422006,
      "eval_nq_true_num_tokens": 64.0,
      "step": 3750
    },
    {
      "epoch": 0.72,
      "learning_rate": 0.001,
      "loss": 4.1213,
      "step": 3756
    },
    {
      "epoch": 0.72,
      "learning_rate": 0.001,
      "loss": 4.1201,
      "step": 3768
    },
    {
      "epoch": 0.73,
      "learning_rate": 0.001,
      "loss": 4.1253,
      "step": 3780
    },
    {
      "epoch": 0.73,
      "learning_rate": 0.001,
      "loss": 4.1286,
      "step": 3792
    },
    {
      "epoch": 0.73,
      "learning_rate": 0.001,
      "loss": 4.1157,
      "step": 3804
    },
    {
      "epoch": 0.73,
      "learning_rate": 0.001,
      "loss": 4.1178,
      "step": 3816
    },
    {
      "epoch": 0.74,
      "learning_rate": 0.001,
      "loss": 4.1197,
      "step": 3828
    },
    {
      "epoch": 0.74,
      "learning_rate": 0.001,
      "loss": 4.1097,
      "step": 3840
    },
    {
      "epoch": 0.74,
      "learning_rate": 0.001,
      "loss": 4.0928,
      "step": 3852
    },
    {
      "epoch": 0.74,
      "learning_rate": 0.001,
      "loss": 4.1009,
      "step": 3864
    },
    {
      "epoch": 0.74,
      "learning_rate": 0.001,
      "loss": 4.1068,
      "step": 3876
    },
    {
      "epoch": 0.75,
      "learning_rate": 0.001,
      "loss": 4.0965,
      "step": 3888
    },
    {
      "epoch": 0.75,
      "learning_rate": 0.001,
      "loss": 4.0932,
      "step": 3900
    },
    {
      "epoch": 0.75,
      "learning_rate": 0.001,
      "loss": 4.0941,
      "step": 3912
    },
    {
      "epoch": 0.75,
      "learning_rate": 0.001,
      "loss": 4.0824,
      "step": 3924
    },
    {
      "epoch": 0.76,
      "learning_rate": 0.001,
      "loss": 4.0905,
      "step": 3936
    },
    {
      "epoch": 0.76,
      "learning_rate": 0.001,
      "loss": 4.0942,
      "step": 3948
    },
    {
      "epoch": 0.76,
      "learning_rate": 0.001,
      "loss": 4.0807,
      "step": 3960
    },
    {
      "epoch": 0.76,
      "learning_rate": 0.001,
      "loss": 4.0921,
      "step": 3972
    },
    {
      "epoch": 0.76,
      "learning_rate": 0.001,
      "loss": 4.0837,
      "step": 3984
    },
    {
      "epoch": 0.77,
      "learning_rate": 0.001,
      "loss": 4.0746,
      "step": 3996
    },
    {
      "epoch": 0.77,
      "learning_rate": 0.001,
      "loss": 4.0648,
      "step": 4008
    },
    {
      "epoch": 0.77,
      "learning_rate": 0.001,
      "loss": 4.0671,
      "step": 4020
    },
    {
      "epoch": 0.77,
      "learning_rate": 0.001,
      "loss": 4.0805,
      "step": 4032
    },
    {
      "epoch": 0.78,
      "learning_rate": 0.001,
      "loss": 4.0727,
      "step": 4044
    },
    {
      "epoch": 0.78,
      "learning_rate": 0.001,
      "loss": 4.0677,
      "step": 4056
    },
    {
      "epoch": 0.78,
      "learning_rate": 0.001,
      "loss": 4.0532,
      "step": 4068
    },
    {
      "epoch": 0.78,
      "learning_rate": 0.001,
      "loss": 4.0569,
      "step": 4080
    },
    {
      "epoch": 0.79,
      "learning_rate": 0.001,
      "loss": 4.055,
      "step": 4092
    },
    {
      "epoch": 0.79,
      "learning_rate": 0.001,
      "loss": 4.0537,
      "step": 4104
    },
    {
      "epoch": 0.79,
      "learning_rate": 0.001,
      "loss": 4.0515,
      "step": 4116
    },
    {
      "epoch": 0.79,
      "learning_rate": 0.001,
      "loss": 4.0491,
      "step": 4128
    },
    {
      "epoch": 0.79,
      "learning_rate": 0.001,
      "loss": 4.0431,
      "step": 4140
    },
    {
      "epoch": 0.8,
      "learning_rate": 0.001,
      "loss": 4.0428,
      "step": 4152
    },
    {
      "epoch": 0.8,
      "learning_rate": 0.001,
      "loss": 4.0421,
      "step": 4164
    },
    {
      "epoch": 0.8,
      "learning_rate": 0.001,
      "loss": 4.0461,
      "step": 4176
    },
    {
      "epoch": 0.8,
      "learning_rate": 0.001,
      "loss": 4.0405,
      "step": 4188
    },
    {
      "epoch": 0.81,
      "learning_rate": 0.001,
      "loss": 4.0363,
      "step": 4200
    },
    {
      "epoch": 0.81,
      "learning_rate": 0.001,
      "loss": 4.0254,
      "step": 4212
    },
    {
      "epoch": 0.81,
      "learning_rate": 0.001,
      "loss": 4.0286,
      "step": 4224
    },
    {
      "epoch": 0.81,
      "learning_rate": 0.001,
      "loss": 4.0117,
      "step": 4236
    },
    {
      "epoch": 0.82,
      "learning_rate": 0.001,
      "loss": 4.0182,
      "step": 4248
    },
    {
      "epoch": 0.82,
      "learning_rate": 0.001,
      "loss": 4.0169,
      "step": 4260
    },
    {
      "epoch": 0.82,
      "learning_rate": 0.001,
      "loss": 4.005,
      "step": 4272
    },
    {
      "epoch": 0.82,
      "learning_rate": 0.001,
      "loss": 4.0182,
      "step": 4284
    },
    {
      "epoch": 0.82,
      "learning_rate": 0.001,
      "loss": 4.022,
      "step": 4296
    },
    {
      "epoch": 0.83,
      "learning_rate": 0.001,
      "loss": 4.0206,
      "step": 4308
    },
    {
      "epoch": 0.83,
      "learning_rate": 0.001,
      "loss": 4.0101,
      "step": 4320
    },
    {
      "epoch": 0.83,
      "learning_rate": 0.001,
      "loss": 4.0047,
      "step": 4332
    },
    {
      "epoch": 0.83,
      "learning_rate": 0.001,
      "loss": 4.0135,
      "step": 4344
    },
    {
      "epoch": 0.84,
      "learning_rate": 0.001,
      "loss": 4.0004,
      "step": 4356
    },
    {
      "epoch": 0.84,
      "learning_rate": 0.001,
      "loss": 4.0053,
      "step": 4368
    },
    {
      "epoch": 0.84,
      "eval_ag_news_accuracy": 0.2065625,
      "eval_ag_news_bleu_score": 1.871361272034575,
      "eval_ag_news_bleu_score_sem": 0.0669665206205916,
      "eval_ag_news_emb_cos_sim": 0.4182249903678894,
      "eval_ag_news_emb_cos_sim_sem": 0.011004487487000269,
      "eval_ag_news_emb_top1_equal": 0.0234375,
      "eval_ag_news_emb_top1_equal_sem": 0.013424676090873717,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 5.128519535064697,
      "eval_ag_news_n_ngrams_match_1": 6.61,
      "eval_ag_news_n_ngrams_match_2": 0.87,
      "eval_ag_news_n_ngrams_match_3": 0.16,
      "eval_ag_news_num_pred_words": 42.274,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 168.76707926065117,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.15322416291529034,
      "eval_ag_news_runtime": 10.7869,
      "eval_ag_news_samples_per_second": 46.353,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.1920964020685739,
      "eval_ag_news_token_set_f1_sem": 0.0042704652911768745,
      "eval_ag_news_token_set_precision": 0.148605437255678,
      "eval_ag_news_token_set_recall": 0.31855377856718653,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 4375
    },
    {
      "epoch": 0.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.0604375,
      "eval_anthropic_toxic_prompts_bleu_score": 1.2532835276454266,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05331171100443469,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.3187675178050995,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011214266181473656,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.8185715675354,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.652,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.368,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.082,
      "eval_anthropic_toxic_prompts_num_pred_words": 41.29,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 123.78814142986258,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.09247397774172243,
      "eval_anthropic_toxic_prompts_runtime": 9.9578,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.212,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.16147119908169896,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005113289051911548,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.16160135370102474,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.20611317144977448,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 4375
    },
    {
      "epoch": 0.84,
      "eval_arxiv_accuracy": 0.23803125,
      "eval_arxiv_bleu_score": 1.5904026286564472,
      "eval_arxiv_bleu_score_sem": 0.051126216309231934,
      "eval_arxiv_emb_cos_sim": 0.3372165560722351,
      "eval_arxiv_emb_cos_sim_sem": 0.007429207206313575,
      "eval_arxiv_emb_top1_equal": 0.15625,
      "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.879188060760498,
      "eval_arxiv_n_ngrams_match_1": 6.224,
      "eval_arxiv_n_ngrams_match_2": 0.788,
      "eval_arxiv_n_ngrams_match_3": 0.06,
      "eval_arxiv_num_pred_words": 30.362,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 131.52383116140922,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.15318202399655065,
      "eval_arxiv_runtime": 10.2322,
      "eval_arxiv_samples_per_second": 48.865,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.15064753576784035,
      "eval_arxiv_token_set_f1_sem": 0.0038878295756771355,
      "eval_arxiv_token_set_precision": 0.09639888091771633,
      "eval_arxiv_token_set_recall": 0.44283950377295084,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 4375
    },
    {
      "epoch": 0.84,
      "eval_python_code_alpaca_accuracy": 0.0880625,
      "eval_python_code_alpaca_bleu_score": 1.805712411774633,
      "eval_python_code_alpaca_bleu_score_sem": 0.060076659656152144,
      "eval_python_code_alpaca_emb_cos_sim": 0.24359974265098572,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007952776431940155,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.917336940765381,
      "eval_python_code_alpaca_n_ngrams_match_1": 3.066,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.278,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.038,
      "eval_python_code_alpaca_num_pred_words": 23.632,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 136.6382524846553,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.1275283288666622,
      "eval_python_code_alpaca_runtime": 10.3593,
      "eval_python_code_alpaca_samples_per_second": 48.266,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.19763638405963282,
      "eval_python_code_alpaca_token_set_f1_sem": 0.004958243877292779,
      "eval_python_code_alpaca_token_set_precision": 0.1474496415170886,
      "eval_python_code_alpaca_token_set_recall": 0.41065891886377515,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 4375
    },
    {
      "epoch": 0.84,
      "eval_wikibio_accuracy": 0.21459375,
      "eval_wikibio_bleu_score": 1.7894438470152427,
      "eval_wikibio_bleu_score_sem": 0.11503339179080427,
      "eval_wikibio_emb_cos_sim": 0.39553847908973694,
      "eval_wikibio_emb_cos_sim_sem": 0.011656311548279024,
      "eval_wikibio_emb_top1_equal": 0.078125,
      "eval_wikibio_emb_top1_equal_sem": 0.023813825516515504,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 5.125114440917969,
      "eval_wikibio_n_ngrams_match_1": 3.732,
      "eval_wikibio_n_ngrams_match_2": 0.956,
      "eval_wikibio_n_ngrams_match_3": 0.294,
      "eval_wikibio_num_pred_words": 32.488,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 168.19338875630183,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.10842636858697531,
      "eval_wikibio_runtime": 10.2257,
      "eval_wikibio_samples_per_second": 48.896,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.12859984932092094,
      "eval_wikibio_token_set_f1_sem": 0.006442311437587149,
      "eval_wikibio_token_set_precision": 0.11314253221462418,
      "eval_wikibio_token_set_recall": 0.1890339515749286,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 4375
    },
    {
      "epoch": 0.84,
      "eval_nq_accuracy": 0.3668125,
      "eval_nq_bleu_score": 4.249049033939418,
      "eval_nq_bleu_score_sem": 0.21391976509794372,
      "eval_nq_emb_cos_sim": 0.4965488314628601,
      "eval_nq_emb_cos_sim_sem": 0.011120440711243325,
      "eval_nq_emb_top1_equal": 0.046875,
      "eval_nq_emb_top1_equal_sem": 0.01875615101164758,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.631835460662842,
      "eval_nq_n_ngrams_match_1": 13.792,
      "eval_nq_n_ngrams_match_2": 3.378,
      "eval_nq_n_ngrams_match_3": 0.984,
      "eval_nq_num_pred_words": 46.09,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 37.7821005731044,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.24789971514855919,
      "eval_nq_runtime": 10.2112,
      "eval_nq_samples_per_second": 48.966,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.29225597797313135,
      "eval_nq_token_set_f1_sem": 0.005254747588326533,
      "eval_nq_token_set_precision": 0.22302434740971663,
      "eval_nq_token_set_recall": 0.47552982854746434,
      "eval_nq_true_num_tokens": 64.0,
      "step": 4375
    },
    {
      "epoch": 0.84,
      "learning_rate": 0.001,
      "loss": 3.9943,
      "step": 4380
    },
    {
      "epoch": 0.84,
      "learning_rate": 0.001,
      "loss": 4.0075,
      "step": 4392
    },
    {
      "epoch": 0.85,
      "learning_rate": 0.001,
      "loss": 4.0101,
      "step": 4404
    },
    {
      "epoch": 0.85,
      "learning_rate": 0.001,
      "loss": 4.0002,
      "step": 4416
    },
    {
      "epoch": 0.85,
      "learning_rate": 0.001,
      "loss": 3.9845,
      "step": 4428
    },
    {
      "epoch": 0.85,
      "learning_rate": 0.001,
      "loss": 3.9987,
      "step": 4440
    },
    {
      "epoch": 0.85,
      "learning_rate": 0.001,
      "loss": 3.987,
      "step": 4452
    },
    {
      "epoch": 0.86,
      "learning_rate": 0.001,
      "loss": 3.9701,
      "step": 4464
    },
    {
      "epoch": 0.86,
      "learning_rate": 0.001,
      "loss": 3.9805,
      "step": 4476
    },
    {
      "epoch": 0.86,
      "learning_rate": 0.001,
      "loss": 3.9753,
      "step": 4488
    },
    {
      "epoch": 0.86,
      "learning_rate": 0.001,
      "loss": 3.9649,
      "step": 4500
    },
    {
      "epoch": 0.87,
      "learning_rate": 0.001,
      "loss": 3.9742,
      "step": 4512
    },
    {
      "epoch": 0.87,
      "learning_rate": 0.001,
      "loss": 3.9667,
      "step": 4524
    },
    {
      "epoch": 0.87,
      "learning_rate": 0.001,
      "loss": 3.9714,
      "step": 4536
    },
    {
      "epoch": 0.87,
      "learning_rate": 0.001,
      "loss": 3.9829,
      "step": 4548
    },
    {
      "epoch": 0.88,
      "learning_rate": 0.001,
      "loss": 3.986,
      "step": 4560
    },
    {
      "epoch": 0.88,
      "learning_rate": 0.001,
      "loss": 3.9601,
      "step": 4572
    },
    {
      "epoch": 0.88,
      "learning_rate": 0.001,
      "loss": 3.9694,
      "step": 4584
    },
    {
      "epoch": 0.88,
      "learning_rate": 0.001,
      "loss": 3.9547,
      "step": 4596
    },
    {
      "epoch": 0.88,
      "learning_rate": 0.001,
      "loss": 3.9506,
      "step": 4608
    },
    {
      "epoch": 0.89,
      "learning_rate": 0.001,
      "loss": 3.9554,
      "step": 4620
    },
    {
      "epoch": 0.89,
      "learning_rate": 0.001,
      "loss": 3.9578,
      "step": 4632
    },
    {
      "epoch": 0.89,
      "learning_rate": 0.001,
      "loss": 3.9557,
      "step": 4644
    },
    {
      "epoch": 0.89,
      "learning_rate": 0.001,
      "loss": 3.9428,
      "step": 4656
    },
    {
      "epoch": 0.9,
      "learning_rate": 0.001,
      "loss": 3.9414,
      "step": 4668
    },
    {
      "epoch": 0.9,
      "learning_rate": 0.001,
      "loss": 3.9395,
      "step": 4680
    },
    {
      "epoch": 0.9,
      "learning_rate": 0.001,
      "loss": 3.9461,
      "step": 4692
    },
    {
      "epoch": 0.9,
      "learning_rate": 0.001,
      "loss": 3.9403,
      "step": 4704
    },
    {
      "epoch": 0.91,
      "learning_rate": 0.001,
      "loss": 3.9436,
      "step": 4716
    },
    {
      "epoch": 0.91,
      "learning_rate": 0.001,
      "loss": 3.9382,
      "step": 4728
    },
    {
      "epoch": 0.91,
      "learning_rate": 0.001,
      "loss": 3.9338,
      "step": 4740
    },
    {
      "epoch": 0.91,
      "learning_rate": 0.001,
      "loss": 3.9329,
      "step": 4752
    },
    {
      "epoch": 0.91,
      "learning_rate": 0.001,
      "loss": 3.9366,
      "step": 4764
    },
    {
      "epoch": 0.92,
      "learning_rate": 0.001,
      "loss": 3.938,
      "step": 4776
    },
    {
      "epoch": 0.92,
      "learning_rate": 0.001,
      "loss": 3.9251,
      "step": 4788
    },
    {
      "epoch": 0.92,
      "learning_rate": 0.001,
      "loss": 3.9298,
      "step": 4800
    },
    {
      "epoch": 0.92,
      "learning_rate": 0.001,
      "loss": 3.9208,
      "step": 4812
    },
    {
      "epoch": 0.93,
      "learning_rate": 0.001,
      "loss": 3.9149,
      "step": 4824
    },
    {
      "epoch": 0.93,
      "learning_rate": 0.001,
      "loss": 3.911,
      "step": 4836
    },
    {
      "epoch": 0.93,
      "learning_rate": 0.001,
      "loss": 3.9112,
      "step": 4848
    },
    {
      "epoch": 0.93,
      "learning_rate": 0.001,
      "loss": 3.9072,
      "step": 4860
    },
    {
      "epoch": 0.94,
      "learning_rate": 0.001,
      "loss": 3.9138,
      "step": 4872
    },
    {
      "epoch": 0.94,
      "learning_rate": 0.001,
      "loss": 3.9082,
      "step": 4884
    },
    {
      "epoch": 0.94,
      "learning_rate": 0.001,
      "loss": 3.9071,
      "step": 4896
    },
    {
      "epoch": 0.94,
      "learning_rate": 0.001,
      "loss": 3.9095,
      "step": 4908
    },
    {
      "epoch": 0.94,
      "learning_rate": 0.001,
      "loss": 3.9083,
      "step": 4920
    },
    {
      "epoch": 0.95,
      "learning_rate": 0.001,
      "loss": 3.9102,
      "step": 4932
    },
    {
      "epoch": 0.95,
      "learning_rate": 0.001,
      "loss": 3.9082,
      "step": 4944
    },
    {
      "epoch": 0.95,
      "learning_rate": 0.001,
      "loss": 3.9016,
      "step": 4956
    },
    {
      "epoch": 0.95,
      "learning_rate": 0.001,
      "loss": 3.9075,
      "step": 4968
    },
    {
      "epoch": 0.96,
      "learning_rate": 0.001,
      "loss": 3.8916,
      "step": 4980
    },
    {
      "epoch": 0.96,
      "learning_rate": 0.001,
      "loss": 3.8996,
      "step": 4992
    },
    {
      "epoch": 0.96,
      "eval_ag_news_accuracy": 0.2135,
      "eval_ag_news_bleu_score": 2.106691925728337,
      "eval_ag_news_bleu_score_sem": 0.077828882577352,
      "eval_ag_news_emb_cos_sim": 0.4324933886528015,
      "eval_ag_news_emb_cos_sim_sem": 0.010735109197465429,
      "eval_ag_news_emb_top1_equal": 0.0546875,
      "eval_ag_news_emb_top1_equal_sem": 0.020175758285348722,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 5.042166709899902,
      "eval_ag_news_n_ngrams_match_1": 6.972,
      "eval_ag_news_n_ngrams_match_2": 0.918,
      "eval_ag_news_n_ngrams_match_3": 0.196,
      "eval_ag_news_num_pred_words": 41.766,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 154.80506958862475,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.16074004098204467,
      "eval_ag_news_runtime": 10.7798,
      "eval_ag_news_samples_per_second": 46.383,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.19950133558324398,
      "eval_ag_news_token_set_f1_sem": 0.00428042751071768,
      "eval_ag_news_token_set_precision": 0.15435039810338688,
      "eval_ag_news_token_set_recall": 0.3339399142544686,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 5000
    },
    {
      "epoch": 0.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.06021875,
      "eval_anthropic_toxic_prompts_bleu_score": 1.2758001126979703,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.055173894615778714,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.32390105724334717,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01071198638270114,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.792735576629639,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.536,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.348,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.072,
      "eval_anthropic_toxic_prompts_num_pred_words": 38.92,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 120.6309128169782,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.08958896386189649,
      "eval_anthropic_toxic_prompts_runtime": 9.8634,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.692,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.157478964395896,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005107972361080511,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.1552205257452103,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.21541269712103858,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 5000
    },
    {
      "epoch": 0.96,
      "eval_arxiv_accuracy": 0.24671875,
      "eval_arxiv_bleu_score": 1.61273635765576,
      "eval_arxiv_bleu_score_sem": 0.05466130060070739,
      "eval_arxiv_emb_cos_sim": 0.3310911953449249,
      "eval_arxiv_emb_cos_sim_sem": 0.008236715799091002,
      "eval_arxiv_emb_top1_equal": 0.1484375,
      "eval_arxiv_emb_top1_equal_sem": 0.031548465007086954,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.787303447723389,
      "eval_arxiv_n_ngrams_match_1": 6.426,
      "eval_arxiv_n_ngrams_match_2": 0.758,
      "eval_arxiv_n_ngrams_match_3": 0.058,
      "eval_arxiv_num_pred_words": 29.766,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 119.97740672012935,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.15514998462643764,
      "eval_arxiv_runtime": 9.9264,
      "eval_arxiv_samples_per_second": 50.371,
      "eval_arxiv_steps_per_second": 0.101,
      "eval_arxiv_token_set_f1": 0.16217454625457758,
      "eval_arxiv_token_set_f1_sem": 0.004081556464616621,
      "eval_arxiv_token_set_precision": 0.10590238642913431,
      "eval_arxiv_token_set_recall": 0.43923258693272826,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 5000
    },
    {
      "epoch": 0.96,
      "eval_python_code_alpaca_accuracy": 0.090875,
      "eval_python_code_alpaca_bleu_score": 1.799517111232777,
      "eval_python_code_alpaca_bleu_score_sem": 0.056360050673125536,
      "eval_python_code_alpaca_emb_cos_sim": 0.24102574586868286,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007653049118754933,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.885411262512207,
      "eval_python_code_alpaca_n_ngrams_match_1": 2.828,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.204,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.022,
      "eval_python_code_alpaca_num_pred_words": 21.402,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 132.34488263253957,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.12120203822033535,
      "eval_python_code_alpaca_runtime": 9.7802,
      "eval_python_code_alpaca_samples_per_second": 51.124,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.18527682422778052,
      "eval_python_code_alpaca_token_set_f1_sem": 0.004852335637347622,
      "eval_python_code_alpaca_token_set_precision": 0.14098828842063427,
      "eval_python_code_alpaca_token_set_recall": 0.3938896141698001,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 5000
    },
    {
      "epoch": 0.96,
      "eval_wikibio_accuracy": 0.2325625,
      "eval_wikibio_bleu_score": 2.338179388643287,
      "eval_wikibio_bleu_score_sem": 0.1289790578864121,
      "eval_wikibio_emb_cos_sim": 0.42805343866348267,
      "eval_wikibio_emb_cos_sim_sem": 0.014510646277584588,
      "eval_wikibio_emb_top1_equal": 0.0625,
      "eval_wikibio_emb_top1_equal_sem": 0.02147948148198014,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 5.084056377410889,
      "eval_wikibio_n_ngrams_match_1": 4.716,
      "eval_wikibio_n_ngrams_match_2": 1.188,
      "eval_wikibio_n_ngrams_match_3": 0.356,
      "eval_wikibio_num_pred_words": 32.336,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 161.42754067166106,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.1416051293678927,
      "eval_wikibio_runtime": 10.0452,
      "eval_wikibio_samples_per_second": 49.775,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.165290252508428,
      "eval_wikibio_token_set_f1_sem": 0.00636284664152837,
      "eval_wikibio_token_set_precision": 0.1467745112418667,
      "eval_wikibio_token_set_recall": 0.2362651750709804,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 5000
    },
    {
      "epoch": 0.96,
      "eval_nq_accuracy": 0.37525,
      "eval_nq_bleu_score": 4.557390861796473,
      "eval_nq_bleu_score_sem": 0.23243420417502222,
      "eval_nq_emb_cos_sim": 0.5068303346633911,
      "eval_nq_emb_cos_sim_sem": 0.011473194028211318,
      "eval_nq_emb_top1_equal": 0.0625,
      "eval_nq_emb_top1_equal_sem": 0.02147948148198014,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.537583589553833,
      "eval_nq_n_ngrams_match_1": 14.022,
      "eval_nq_n_ngrams_match_2": 3.49,
      "eval_nq_n_ngrams_match_3": 1.104,
      "eval_nq_num_pred_words": 45.286,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 34.38373351312476,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.2531003356575011,
      "eval_nq_runtime": 10.5932,
      "eval_nq_samples_per_second": 47.2,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.2972630078217043,
      "eval_nq_token_set_f1_sem": 0.0054111995513725095,
      "eval_nq_token_set_precision": 0.22921126040451803,
      "eval_nq_token_set_recall": 0.47529325357232133,
      "eval_nq_true_num_tokens": 64.0,
      "step": 5000
    },
    {
      "epoch": 0.96,
      "learning_rate": 0.001,
      "loss": 3.8936,
      "step": 5004
    },
    {
      "epoch": 0.96,
      "learning_rate": 0.001,
      "loss": 3.888,
      "step": 5016
    },
    {
      "epoch": 0.97,
      "learning_rate": 0.001,
      "loss": 3.8867,
      "step": 5028
    },
    {
      "epoch": 0.97,
      "learning_rate": 0.001,
      "loss": 3.8891,
      "step": 5040
    },
    {
      "epoch": 0.97,
      "learning_rate": 0.001,
      "loss": 3.8846,
      "step": 5052
    },
    {
      "epoch": 0.97,
      "learning_rate": 0.001,
      "loss": 3.8876,
      "step": 5064
    },
    {
      "epoch": 0.97,
      "learning_rate": 0.001,
      "loss": 3.8796,
      "step": 5076
    },
    {
      "epoch": 0.98,
      "learning_rate": 0.001,
      "loss": 3.8946,
      "step": 5088
    },
    {
      "epoch": 0.98,
      "learning_rate": 0.001,
      "loss": 3.8845,
      "step": 5100
    },
    {
      "epoch": 0.98,
      "learning_rate": 0.001,
      "loss": 3.881,
      "step": 5112
    },
    {
      "epoch": 0.98,
      "learning_rate": 0.001,
      "loss": 3.8666,
      "step": 5124
    },
    {
      "epoch": 0.99,
      "learning_rate": 0.001,
      "loss": 3.8721,
      "step": 5136
    },
    {
      "epoch": 0.99,
      "learning_rate": 0.001,
      "loss": 3.881,
      "step": 5148
    },
    {
      "epoch": 0.99,
      "learning_rate": 0.001,
      "loss": 3.8671,
      "step": 5160
    },
    {
      "epoch": 0.99,
      "learning_rate": 0.001,
      "loss": 3.8598,
      "step": 5172
    },
    {
      "epoch": 1.0,
      "learning_rate": 0.001,
      "loss": 3.8737,
      "step": 5184
    },
    {
      "epoch": 1.0,
      "learning_rate": 0.001,
      "loss": 3.8576,
      "step": 5196
    },
    {
      "epoch": 1.0,
      "learning_rate": 0.001,
      "loss": 3.8691,
      "step": 5208
    },
    {
      "epoch": 1.0,
      "learning_rate": 0.001,
      "loss": 3.8648,
      "step": 5220
    },
    {
      "epoch": 1.0,
      "learning_rate": 0.001,
      "loss": 3.8565,
      "step": 5232
    },
    {
      "epoch": 1.01,
      "learning_rate": 0.001,
      "loss": 3.8543,
      "step": 5244
    },
    {
      "epoch": 1.01,
      "learning_rate": 0.001,
      "loss": 3.8476,
      "step": 5256
    },
    {
      "epoch": 1.01,
      "learning_rate": 0.001,
      "loss": 3.8492,
      "step": 5268
    },
    {
      "epoch": 1.01,
      "learning_rate": 0.001,
      "loss": 3.8457,
      "step": 5280
    },
    {
      "epoch": 1.02,
      "learning_rate": 0.001,
      "loss": 3.829,
      "step": 5292
    },
    {
      "epoch": 1.02,
      "learning_rate": 0.001,
      "loss": 3.8285,
      "step": 5304
    },
    {
      "epoch": 1.02,
      "learning_rate": 0.001,
      "loss": 3.831,
      "step": 5316
    },
    {
      "epoch": 1.02,
      "learning_rate": 0.001,
      "loss": 3.8359,
      "step": 5328
    },
    {
      "epoch": 1.03,
      "learning_rate": 0.001,
      "loss": 3.8366,
      "step": 5340
    },
    {
      "epoch": 1.03,
      "learning_rate": 0.001,
      "loss": 3.8273,
      "step": 5352
    },
    {
      "epoch": 1.03,
      "learning_rate": 0.001,
      "loss": 3.835,
      "step": 5364
    },
    {
      "epoch": 1.03,
      "learning_rate": 0.001,
      "loss": 3.8325,
      "step": 5376
    },
    {
      "epoch": 1.03,
      "learning_rate": 0.001,
      "loss": 3.831,
      "step": 5388
    },
    {
      "epoch": 1.04,
      "learning_rate": 0.001,
      "loss": 3.8246,
      "step": 5400
    },
    {
      "epoch": 1.04,
      "learning_rate": 0.001,
      "loss": 3.8218,
      "step": 5412
    },
    {
      "epoch": 1.04,
      "learning_rate": 0.001,
      "loss": 3.8199,
      "step": 5424
    },
    {
      "epoch": 1.04,
      "learning_rate": 0.001,
      "loss": 3.8166,
      "step": 5436
    },
    {
      "epoch": 1.05,
      "learning_rate": 0.001,
      "loss": 3.8245,
      "step": 5448
    },
    {
      "epoch": 1.05,
      "learning_rate": 0.001,
      "loss": 3.8157,
      "step": 5460
    },
    {
      "epoch": 1.05,
      "learning_rate": 0.001,
      "loss": 3.8211,
      "step": 5472
    },
    {
      "epoch": 1.05,
      "learning_rate": 0.001,
      "loss": 3.8138,
      "step": 5484
    },
    {
      "epoch": 1.06,
      "learning_rate": 0.001,
      "loss": 3.8078,
      "step": 5496
    },
    {
      "epoch": 1.06,
      "learning_rate": 0.001,
      "loss": 3.8049,
      "step": 5508
    },
    {
      "epoch": 1.06,
      "learning_rate": 0.001,
      "loss": 3.8187,
      "step": 5520
    },
    {
      "epoch": 1.06,
      "learning_rate": 0.001,
      "loss": 3.8156,
      "step": 5532
    },
    {
      "epoch": 1.06,
      "learning_rate": 0.001,
      "loss": 3.8031,
      "step": 5544
    },
    {
      "epoch": 1.07,
      "learning_rate": 0.001,
      "loss": 3.7988,
      "step": 5556
    },
    {
      "epoch": 1.07,
      "learning_rate": 0.001,
      "loss": 3.8095,
      "step": 5568
    },
    {
      "epoch": 1.07,
      "learning_rate": 0.001,
      "loss": 3.7981,
      "step": 5580
    },
    {
      "epoch": 1.07,
      "learning_rate": 0.001,
      "loss": 3.7886,
      "step": 5592
    },
    {
      "epoch": 1.08,
      "learning_rate": 0.001,
      "loss": 3.8051,
      "step": 5604
    },
    {
      "epoch": 1.08,
      "learning_rate": 0.001,
      "loss": 3.7981,
      "step": 5616
    },
    {
      "epoch": 1.08,
      "eval_ag_news_accuracy": 0.21903125,
      "eval_ag_news_bleu_score": 2.0852212979487286,
      "eval_ag_news_bleu_score_sem": 0.0772518840990318,
      "eval_ag_news_emb_cos_sim": 0.45783761143684387,
      "eval_ag_news_emb_cos_sim_sem": 0.011666158105762381,
      "eval_ag_news_emb_top1_equal": 0.078125,
      "eval_ag_news_emb_top1_equal_sem": 0.023813825516515504,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.948986530303955,
      "eval_ag_news_n_ngrams_match_1": 6.886,
      "eval_ag_news_n_ngrams_match_2": 0.95,
      "eval_ag_news_n_ngrams_match_3": 0.168,
      "eval_ag_news_num_pred_words": 41.724,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 141.0319598510883,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.16286868877192381,
      "eval_ag_news_runtime": 13.7559,
      "eval_ag_news_samples_per_second": 36.348,
      "eval_ag_news_steps_per_second": 0.073,
      "eval_ag_news_token_set_f1": 0.2019419518206525,
      "eval_ag_news_token_set_f1_sem": 0.00438958396582974,
      "eval_ag_news_token_set_precision": 0.15462844390118893,
      "eval_ag_news_token_set_recall": 0.3451922715581034,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 5625
    },
    {
      "epoch": 1.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.063125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.3700619974288806,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.065019037336038,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.3364288806915283,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011141132179684142,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.73537540435791,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.64,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.358,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.072,
      "eval_anthropic_toxic_prompts_num_pred_words": 39.0,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 113.90621158112307,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.09601927839850205,
      "eval_anthropic_toxic_prompts_runtime": 14.1978,
      "eval_anthropic_toxic_prompts_samples_per_second": 35.217,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.07,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.17096730800689502,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0054358416627761875,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.1633684673481088,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.23170527979202382,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 5625
    },
    {
      "epoch": 1.08,
      "eval_arxiv_accuracy": 0.25628125,
      "eval_arxiv_bleu_score": 1.5205202660326982,
      "eval_arxiv_bleu_score_sem": 0.05244642374548896,
      "eval_arxiv_emb_cos_sim": 0.35741275548934937,
      "eval_arxiv_emb_cos_sim_sem": 0.009298090665517733,
      "eval_arxiv_emb_top1_equal": 0.109375,
      "eval_arxiv_emb_top1_equal_sem": 0.027695207821224692,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.674412250518799,
      "eval_arxiv_n_ngrams_match_1": 6.126,
      "eval_arxiv_n_ngrams_match_2": 0.69,
      "eval_arxiv_n_ngrams_match_3": 0.052,
      "eval_arxiv_num_pred_words": 28.542,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 107.1695597244559,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.1524461639578788,
      "eval_arxiv_runtime": 13.2423,
      "eval_arxiv_samples_per_second": 37.758,
      "eval_arxiv_steps_per_second": 0.076,
      "eval_arxiv_token_set_f1": 0.16228751855782006,
      "eval_arxiv_token_set_f1_sem": 0.0040447406491193215,
      "eval_arxiv_token_set_precision": 0.10593070802353624,
      "eval_arxiv_token_set_recall": 0.43974020961337784,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 5625
    },
    {
      "epoch": 1.08,
      "eval_python_code_alpaca_accuracy": 0.092875,
      "eval_python_code_alpaca_bleu_score": 1.9706582151946554,
      "eval_python_code_alpaca_bleu_score_sem": 0.05821691067171762,
      "eval_python_code_alpaca_emb_cos_sim": 0.25949281454086304,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007641797885836048,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.772955894470215,
      "eval_python_code_alpaca_n_ngrams_match_1": 3.392,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.268,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.036,
      "eval_python_code_alpaca_num_pred_words": 24.73,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 118.26831445247535,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.13994791171993992,
      "eval_python_code_alpaca_runtime": 12.8739,
      "eval_python_code_alpaca_samples_per_second": 38.838,
      "eval_python_code_alpaca_steps_per_second": 0.078,
      "eval_python_code_alpaca_token_set_f1": 0.2212643767192042,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0048426131391084365,
      "eval_python_code_alpaca_token_set_precision": 0.16368758003188943,
      "eval_python_code_alpaca_token_set_recall": 0.44087733508321764,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 5625
    },
    {
      "epoch": 1.08,
      "eval_wikibio_accuracy": 0.23365625,
      "eval_wikibio_bleu_score": 2.568111483794025,
      "eval_wikibio_bleu_score_sem": 0.14700819701895634,
      "eval_wikibio_emb_cos_sim": 0.4436691403388977,
      "eval_wikibio_emb_cos_sim_sem": 0.013843581520757589,
      "eval_wikibio_emb_top1_equal": 0.046875,
      "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.976119041442871,
      "eval_wikibio_n_ngrams_match_1": 4.93,
      "eval_wikibio_n_ngrams_match_2": 1.328,
      "eval_wikibio_n_ngrams_match_3": 0.382,
      "eval_wikibio_num_pred_words": 31.756,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 144.91089574727437,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.15660539151735578,
      "eval_wikibio_runtime": 12.4222,
      "eval_wikibio_samples_per_second": 40.251,
      "eval_wikibio_steps_per_second": 0.081,
      "eval_wikibio_token_set_f1": 0.1738614240702313,
      "eval_wikibio_token_set_f1_sem": 0.006722789829280542,
      "eval_wikibio_token_set_precision": 0.15653899173453673,
      "eval_wikibio_token_set_recall": 0.23529594477293445,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 5625
    },
    {
      "epoch": 1.08,
      "eval_nq_accuracy": 0.3849375,
      "eval_nq_bleu_score": 4.976035605292294,
      "eval_nq_bleu_score_sem": 0.2442060204228987,
      "eval_nq_emb_cos_sim": 0.5293945074081421,
      "eval_nq_emb_cos_sim_sem": 0.0113166395594176,
      "eval_nq_emb_top1_equal": 0.125,
      "eval_nq_emb_top1_equal_sem": 0.02934655822437397,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.4444541931152344,
      "eval_nq_n_ngrams_match_1": 14.606,
      "eval_nq_n_ngrams_match_2": 3.786,
      "eval_nq_n_ngrams_match_3": 1.268,
      "eval_nq_num_pred_words": 45.808,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 31.326180734586615,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.26750302501509104,
      "eval_nq_runtime": 13.693,
      "eval_nq_samples_per_second": 36.515,
      "eval_nq_steps_per_second": 0.073,
      "eval_nq_token_set_f1": 0.3101056589551926,
      "eval_nq_token_set_f1_sem": 0.005158972462744809,
      "eval_nq_token_set_precision": 0.24073283826221548,
      "eval_nq_token_set_recall": 0.48357738017709817,
      "eval_nq_true_num_tokens": 64.0,
      "step": 5625
    },
    {
      "epoch": 1.08,
      "learning_rate": 0.001,
      "loss": 3.797,
      "step": 5628
    },
    {
      "epoch": 1.08,
      "learning_rate": 0.001,
      "loss": 3.7992,
      "step": 5640
    },
    {
      "epoch": 1.09,
      "learning_rate": 0.001,
      "loss": 3.7971,
      "step": 5652
    },
    {
      "epoch": 1.09,
      "learning_rate": 0.001,
      "loss": 3.7993,
      "step": 5664
    },
    {
      "epoch": 1.09,
      "learning_rate": 0.001,
      "loss": 3.7937,
      "step": 5676
    },
    {
      "epoch": 1.09,
      "learning_rate": 0.001,
      "loss": 3.7853,
      "step": 5688
    },
    {
      "epoch": 1.09,
      "learning_rate": 0.001,
      "loss": 3.789,
      "step": 5700
    },
    {
      "epoch": 1.1,
      "learning_rate": 0.001,
      "loss": 3.7834,
      "step": 5712
    },
    {
      "epoch": 1.1,
      "learning_rate": 0.001,
      "loss": 3.7776,
      "step": 5724
    },
    {
      "epoch": 1.1,
      "learning_rate": 0.001,
      "loss": 3.7867,
      "step": 5736
    },
    {
      "epoch": 1.1,
      "learning_rate": 0.001,
      "loss": 3.7764,
      "step": 5748
    },
    {
      "epoch": 1.11,
      "learning_rate": 0.001,
      "loss": 3.778,
      "step": 5760
    },
    {
      "epoch": 1.11,
      "learning_rate": 0.001,
      "loss": 3.7792,
      "step": 5772
    },
    {
      "epoch": 1.11,
      "learning_rate": 0.001,
      "loss": 3.7707,
      "step": 5784
    },
    {
      "epoch": 1.11,
      "learning_rate": 0.001,
      "loss": 3.7826,
      "step": 5796
    },
    {
      "epoch": 1.12,
      "learning_rate": 0.001,
      "loss": 3.7763,
      "step": 5808
    },
    {
      "epoch": 1.12,
      "learning_rate": 0.001,
      "loss": 3.7738,
      "step": 5820
    },
    {
      "epoch": 1.12,
      "learning_rate": 0.001,
      "loss": 3.7572,
      "step": 5832
    },
    {
      "epoch": 1.12,
      "learning_rate": 0.001,
      "loss": 3.7643,
      "step": 5844
    },
    {
      "epoch": 1.12,
      "learning_rate": 0.001,
      "loss": 3.7509,
      "step": 5856
    },
    {
      "epoch": 1.13,
      "learning_rate": 0.001,
      "loss": 3.7663,
      "step": 5868
    },
    {
      "epoch": 1.13,
      "learning_rate": 0.001,
      "loss": 3.7724,
      "step": 5880
    },
    {
      "epoch": 1.13,
      "learning_rate": 0.001,
      "loss": 3.7559,
      "step": 5892
    },
    {
      "epoch": 1.13,
      "learning_rate": 0.001,
      "loss": 3.7517,
      "step": 5904
    },
    {
      "epoch": 1.14,
      "learning_rate": 0.001,
      "loss": 3.7599,
      "step": 5916
    },
    {
      "epoch": 1.14,
      "learning_rate": 0.001,
      "loss": 3.7574,
      "step": 5928
    },
    {
      "epoch": 1.14,
      "learning_rate": 0.001,
      "loss": 3.7599,
      "step": 5940
    },
    {
      "epoch": 1.14,
      "learning_rate": 0.001,
      "loss": 3.7517,
      "step": 5952
    },
    {
      "epoch": 1.15,
      "learning_rate": 0.001,
      "loss": 3.7533,
      "step": 5964
    },
    {
      "epoch": 1.15,
      "learning_rate": 0.001,
      "loss": 3.7509,
      "step": 5976
    },
    {
      "epoch": 1.15,
      "learning_rate": 0.001,
      "loss": 3.7469,
      "step": 5988
    },
    {
      "epoch": 1.15,
      "learning_rate": 0.001,
      "loss": 3.7452,
      "step": 6000
    },
    {
      "epoch": 1.15,
      "learning_rate": 0.001,
      "loss": 3.7334,
      "step": 6012
    },
    {
      "epoch": 1.16,
      "learning_rate": 0.001,
      "loss": 3.7406,
      "step": 6024
    },
    {
      "epoch": 1.16,
      "learning_rate": 0.001,
      "loss": 3.743,
      "step": 6036
    },
    {
      "epoch": 1.16,
      "learning_rate": 0.001,
      "loss": 3.7536,
      "step": 6048
    },
    {
      "epoch": 1.16,
      "learning_rate": 0.001,
      "loss": 3.7452,
      "step": 6060
    },
    {
      "epoch": 1.17,
      "learning_rate": 0.001,
      "loss": 3.7401,
      "step": 6072
    },
    {
      "epoch": 1.17,
      "learning_rate": 0.001,
      "loss": 3.7392,
      "step": 6084
    },
    {
      "epoch": 1.17,
      "learning_rate": 0.001,
      "loss": 3.737,
      "step": 6096
    },
    {
      "epoch": 1.17,
      "learning_rate": 0.001,
      "loss": 3.7437,
      "step": 6108
    },
    {
      "epoch": 1.18,
      "learning_rate": 0.001,
      "loss": 3.7357,
      "step": 6120
    },
    {
      "epoch": 1.18,
      "learning_rate": 0.001,
      "loss": 3.7265,
      "step": 6132
    },
    {
      "epoch": 1.18,
      "learning_rate": 0.001,
      "loss": 3.7375,
      "step": 6144
    },
    {
      "epoch": 1.18,
      "learning_rate": 0.001,
      "loss": 3.7297,
      "step": 6156
    },
    {
      "epoch": 1.18,
      "learning_rate": 0.001,
      "loss": 3.7449,
      "step": 6168
    },
    {
      "epoch": 1.19,
      "learning_rate": 0.001,
      "loss": 3.7342,
      "step": 6180
    },
    {
      "epoch": 1.19,
      "learning_rate": 0.001,
      "loss": 3.7372,
      "step": 6192
    },
    {
      "epoch": 1.19,
      "learning_rate": 0.001,
      "loss": 3.7281,
      "step": 6204
    },
    {
      "epoch": 1.19,
      "learning_rate": 0.001,
      "loss": 3.7244,
      "step": 6216
    },
    {
      "epoch": 1.2,
      "learning_rate": 0.001,
      "loss": 3.7202,
      "step": 6228
    },
    {
      "epoch": 1.2,
      "learning_rate": 0.001,
      "loss": 3.7292,
      "step": 6240
    },
    {
      "epoch": 1.2,
      "eval_ag_news_accuracy": 0.223625,
      "eval_ag_news_bleu_score": 2.2003893664469225,
      "eval_ag_news_bleu_score_sem": 0.07600295288295764,
      "eval_ag_news_emb_cos_sim": 0.46764129400253296,
      "eval_ag_news_emb_cos_sim_sem": 0.012417467827317579,
      "eval_ag_news_emb_top1_equal": 0.078125,
      "eval_ag_news_emb_top1_equal_sem": 0.023813825516515504,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.886475086212158,
      "eval_ag_news_n_ngrams_match_1": 7.52,
      "eval_ag_news_n_ngrams_match_2": 1.008,
      "eval_ag_news_n_ngrams_match_3": 0.162,
      "eval_ag_news_num_pred_words": 42.634,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 132.48574917054702,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.17402363367654597,
      "eval_ag_news_runtime": 13.7861,
      "eval_ag_news_samples_per_second": 36.268,
      "eval_ag_news_steps_per_second": 0.073,
      "eval_ag_news_token_set_f1": 0.21561945554735026,
      "eval_ag_news_token_set_f1_sem": 0.004142957390752193,
      "eval_ag_news_token_set_precision": 0.16989426677959563,
      "eval_ag_news_token_set_recall": 0.34137294115375916,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 6250
    },
    {
      "epoch": 1.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.0645625,
      "eval_anthropic_toxic_prompts_bleu_score": 1.39568488779284,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05493467295759028,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.35283318161964417,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012606663713193966,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.620936393737793,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.878,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.438,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.102,
      "eval_anthropic_toxic_prompts_num_pred_words": 41.662,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 101.58911501621725,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.10442383902310974,
      "eval_anthropic_toxic_prompts_runtime": 12.5933,
      "eval_anthropic_toxic_prompts_samples_per_second": 39.704,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.079,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.18060501522240538,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0052871735653605475,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.18159040562726841,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.22196139809902857,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 6250
    },
    {
      "epoch": 1.2,
      "eval_arxiv_accuracy": 0.2576875,
      "eval_arxiv_bleu_score": 1.78556157209702,
      "eval_arxiv_bleu_score_sem": 0.054154260798773265,
      "eval_arxiv_emb_cos_sim": 0.3743008077144623,
      "eval_arxiv_emb_cos_sim_sem": 0.009504373805742749,
      "eval_arxiv_emb_top1_equal": 0.1484375,
      "eval_arxiv_emb_top1_equal_sem": 0.031548465007086954,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.627006530761719,
      "eval_arxiv_n_ngrams_match_1": 7.1,
      "eval_arxiv_n_ngrams_match_2": 0.86,
      "eval_arxiv_n_ngrams_match_3": 0.064,
      "eval_arxiv_num_pred_words": 32.822,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 102.20765026211305,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.17126011039447742,
      "eval_arxiv_runtime": 13.9229,
      "eval_arxiv_samples_per_second": 35.912,
      "eval_arxiv_steps_per_second": 0.072,
      "eval_arxiv_token_set_f1": 0.17412617347653625,
      "eval_arxiv_token_set_f1_sem": 0.0041455396705183,
      "eval_arxiv_token_set_precision": 0.11760084720737768,
      "eval_arxiv_token_set_recall": 0.41740520863090214,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 6250
    },
    {
      "epoch": 1.2,
      "eval_python_code_alpaca_accuracy": 0.0935625,
      "eval_python_code_alpaca_bleu_score": 1.8913629355214616,
      "eval_python_code_alpaca_bleu_score_sem": 0.053854219366134104,
      "eval_python_code_alpaca_emb_cos_sim": 0.2851085662841797,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008405237338683778,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.660333156585693,
      "eval_python_code_alpaca_n_ngrams_match_1": 3.96,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.354,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.056,
      "eval_python_code_alpaca_num_pred_words": 29.21,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 105.67128137621717,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.15402839704368126,
      "eval_python_code_alpaca_runtime": 13.0877,
      "eval_python_code_alpaca_samples_per_second": 38.204,
      "eval_python_code_alpaca_steps_per_second": 0.076,
      "eval_python_code_alpaca_token_set_f1": 0.2385022744973973,
      "eval_python_code_alpaca_token_set_f1_sem": 0.004628218158418873,
      "eval_python_code_alpaca_token_set_precision": 0.19286711580654978,
      "eval_python_code_alpaca_token_set_recall": 0.3931378201845569,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 6250
    },
    {
      "epoch": 1.2,
      "eval_wikibio_accuracy": 0.2395625,
      "eval_wikibio_bleu_score": 2.898854425610419,
      "eval_wikibio_bleu_score_sem": 0.1332226159733259,
      "eval_wikibio_emb_cos_sim": 0.49516212940216064,
      "eval_wikibio_emb_cos_sim_sem": 0.014601578103246842,
      "eval_wikibio_emb_top1_equal": 0.046875,
      "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.961064338684082,
      "eval_wikibio_n_ngrams_match_1": 6.064,
      "eval_wikibio_n_ngrams_match_2": 1.618,
      "eval_wikibio_n_ngrams_match_3": 0.464,
      "eval_wikibio_num_pred_words": 32.302,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 142.74564478506045,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.18800090028871008,
      "eval_wikibio_runtime": 13.1337,
      "eval_wikibio_samples_per_second": 38.07,
      "eval_wikibio_steps_per_second": 0.076,
      "eval_wikibio_token_set_f1": 0.20637467220203892,
      "eval_wikibio_token_set_f1_sem": 0.006449955585241066,
      "eval_wikibio_token_set_precision": 0.1918647587293939,
      "eval_wikibio_token_set_recall": 0.25269720979397575,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 6250
    },
    {
      "epoch": 1.2,
      "eval_nq_accuracy": 0.38953125,
      "eval_nq_bleu_score": 4.960028595506113,
      "eval_nq_bleu_score_sem": 0.21695526516078933,
      "eval_nq_emb_cos_sim": 0.5501593351364136,
      "eval_nq_emb_cos_sim_sem": 0.011242187151523107,
      "eval_nq_emb_top1_equal": 0.1171875,
      "eval_nq_emb_top1_equal_sem": 0.02854125312152025,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.377890110015869,
      "eval_nq_n_ngrams_match_1": 15.26,
      "eval_nq_n_ngrams_match_2": 3.924,
      "eval_nq_n_ngrams_match_3": 1.21,
      "eval_nq_num_pred_words": 47.27,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 29.308867345525517,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.27758628528050355,
      "eval_nq_runtime": 13.1406,
      "eval_nq_samples_per_second": 38.05,
      "eval_nq_steps_per_second": 0.076,
      "eval_nq_token_set_f1": 0.321318805484751,
      "eval_nq_token_set_f1_sem": 0.005013425778853512,
      "eval_nq_token_set_precision": 0.25456366425467947,
      "eval_nq_token_set_recall": 0.47418945549125713,
      "eval_nq_true_num_tokens": 64.0,
      "step": 6250
    },
    {
      "epoch": 1.2,
      "learning_rate": 0.001,
      "loss": 3.7221,
      "step": 6252
    },
    {
      "epoch": 1.2,
      "learning_rate": 0.001,
      "loss": 3.7203,
      "step": 6264
    },
    {
      "epoch": 1.21,
      "learning_rate": 0.001,
      "loss": 3.7212,
      "step": 6276
    },
    {
      "epoch": 1.21,
      "learning_rate": 0.001,
      "loss": 3.7213,
      "step": 6288
    },
    {
      "epoch": 1.21,
      "learning_rate": 0.001,
      "loss": 3.7104,
      "step": 6300
    },
    {
      "epoch": 1.21,
      "learning_rate": 0.001,
      "loss": 3.7183,
      "step": 6312
    },
    {
      "epoch": 1.21,
      "learning_rate": 0.001,
      "loss": 3.7201,
      "step": 6324
    },
    {
      "epoch": 1.22,
      "learning_rate": 0.001,
      "loss": 3.7067,
      "step": 6336
    },
    {
      "epoch": 1.22,
      "learning_rate": 0.001,
      "loss": 3.7074,
      "step": 6348
    },
    {
      "epoch": 1.22,
      "learning_rate": 0.001,
      "loss": 3.7074,
      "step": 6360
    },
    {
      "epoch": 1.22,
      "learning_rate": 0.001,
      "loss": 3.707,
      "step": 6372
    },
    {
      "epoch": 1.23,
      "learning_rate": 0.001,
      "loss": 3.7127,
      "step": 6384
    },
    {
      "epoch": 1.23,
      "learning_rate": 0.001,
      "loss": 3.7058,
      "step": 6396
    },
    {
      "epoch": 1.23,
      "learning_rate": 0.001,
      "loss": 3.7065,
      "step": 6408
    },
    {
      "epoch": 1.23,
      "learning_rate": 0.001,
      "loss": 3.7012,
      "step": 6420
    },
    {
      "epoch": 1.24,
      "learning_rate": 0.001,
      "loss": 3.6866,
      "step": 6432
    },
    {
      "epoch": 1.24,
      "learning_rate": 0.001,
      "loss": 3.7008,
      "step": 6444
    },
    {
      "epoch": 1.24,
      "learning_rate": 0.001,
      "loss": 3.6935,
      "step": 6456
    },
    {
      "epoch": 1.24,
      "learning_rate": 0.001,
      "loss": 3.6989,
      "step": 6468
    },
    {
      "epoch": 1.24,
      "learning_rate": 0.001,
      "loss": 3.6949,
      "step": 6480
    },
    {
      "epoch": 1.25,
      "learning_rate": 0.001,
      "loss": 3.6986,
      "step": 6492
    },
    {
      "epoch": 1.25,
      "learning_rate": 0.001,
      "loss": 3.7,
      "step": 6504
    },
    {
      "epoch": 1.25,
      "learning_rate": 0.001,
      "loss": 3.6935,
      "step": 6516
    },
    {
      "epoch": 1.25,
      "learning_rate": 0.001,
      "loss": 3.6948,
      "step": 6528
    },
    {
      "epoch": 1.26,
      "learning_rate": 0.001,
      "loss": 3.6923,
      "step": 6540
    },
    {
      "epoch": 1.26,
      "learning_rate": 0.001,
      "loss": 3.6941,
      "step": 6552
    },
    {
      "epoch": 1.26,
      "learning_rate": 0.001,
      "loss": 3.679,
      "step": 6564
    },
    {
      "epoch": 1.26,
      "learning_rate": 0.001,
      "loss": 3.6793,
      "step": 6576
    },
    {
      "epoch": 1.26,
      "learning_rate": 0.001,
      "loss": 3.6846,
      "step": 6588
    },
    {
      "epoch": 1.27,
      "learning_rate": 0.001,
      "loss": 3.6882,
      "step": 6600
    },
    {
      "epoch": 1.27,
      "learning_rate": 0.001,
      "loss": 3.6831,
      "step": 6612
    },
    {
      "epoch": 1.27,
      "learning_rate": 0.001,
      "loss": 3.6803,
      "step": 6624
    },
    {
      "epoch": 1.27,
      "learning_rate": 0.001,
      "loss": 3.6874,
      "step": 6636
    },
    {
      "epoch": 1.28,
      "learning_rate": 0.001,
      "loss": 3.6734,
      "step": 6648
    },
    {
      "epoch": 1.28,
      "learning_rate": 0.001,
      "loss": 3.6657,
      "step": 6660
    },
    {
      "epoch": 1.28,
      "learning_rate": 0.001,
      "loss": 3.6804,
      "step": 6672
    },
    {
      "epoch": 1.28,
      "learning_rate": 0.001,
      "loss": 3.6708,
      "step": 6684
    },
    {
      "epoch": 1.29,
      "learning_rate": 0.001,
      "loss": 3.6885,
      "step": 6696
    },
    {
      "epoch": 1.29,
      "learning_rate": 0.001,
      "loss": 3.6637,
      "step": 6708
    },
    {
      "epoch": 1.29,
      "learning_rate": 0.001,
      "loss": 3.6682,
      "step": 6720
    },
    {
      "epoch": 1.29,
      "learning_rate": 0.001,
      "loss": 3.6622,
      "step": 6732
    },
    {
      "epoch": 1.29,
      "learning_rate": 0.001,
      "loss": 3.6721,
      "step": 6744
    },
    {
      "epoch": 1.3,
      "learning_rate": 0.001,
      "loss": 3.6739,
      "step": 6756
    },
    {
      "epoch": 1.3,
      "learning_rate": 0.001,
      "loss": 3.6681,
      "step": 6768
    },
    {
      "epoch": 1.3,
      "learning_rate": 0.001,
      "loss": 3.6696,
      "step": 6780
    },
    {
      "epoch": 1.3,
      "learning_rate": 0.001,
      "loss": 3.665,
      "step": 6792
    },
    {
      "epoch": 1.31,
      "learning_rate": 0.001,
      "loss": 3.6653,
      "step": 6804
    },
    {
      "epoch": 1.31,
      "learning_rate": 0.001,
      "loss": 3.6636,
      "step": 6816
    },
    {
      "epoch": 1.31,
      "learning_rate": 0.001,
      "loss": 3.662,
      "step": 6828
    },
    {
      "epoch": 1.31,
      "learning_rate": 0.001,
      "loss": 3.6646,
      "step": 6840
    },
    {
      "epoch": 1.32,
      "learning_rate": 0.001,
      "loss": 3.6456,
      "step": 6852
    },
    {
      "epoch": 1.32,
      "learning_rate": 0.001,
      "loss": 3.656,
      "step": 6864
    },
    {
      "epoch": 1.32,
      "eval_ag_news_accuracy": 0.2288125,
      "eval_ag_news_bleu_score": 2.359588628164019,
      "eval_ag_news_bleu_score_sem": 0.07888955746474915,
      "eval_ag_news_emb_cos_sim": 0.49569883942604065,
      "eval_ag_news_emb_cos_sim_sem": 0.01201664998088996,
      "eval_ag_news_emb_top1_equal": 0.09375,
      "eval_ag_news_emb_top1_equal_sem": 0.025864720141013958,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.809776306152344,
      "eval_ag_news_n_ngrams_match_1": 7.838,
      "eval_ag_news_n_ngrams_match_2": 1.138,
      "eval_ag_news_n_ngrams_match_3": 0.254,
      "eval_ag_news_num_pred_words": 42.598,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 122.7041662799646,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.1838621076063327,
      "eval_ag_news_runtime": 10.7475,
      "eval_ag_news_samples_per_second": 46.522,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.22020565527904667,
      "eval_ag_news_token_set_f1_sem": 0.004352469007725244,
      "eval_ag_news_token_set_precision": 0.17789081828133854,
      "eval_ag_news_token_set_recall": 0.3290245425284207,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 6875
    },
    {
      "epoch": 1.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.06628125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.426577494431888,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05917457012513431,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.352634072303772,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011052422283191822,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.573147296905518,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.974,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.454,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.104,
      "eval_anthropic_toxic_prompts_num_pred_words": 41.436,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 96.84844141057704,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.10815269320897025,
      "eval_anthropic_toxic_prompts_runtime": 10.3478,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.319,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.1814536768355274,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005195580488782654,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.19031185557393956,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2187545456208069,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 6875
    },
    {
      "epoch": 1.32,
      "eval_arxiv_accuracy": 0.2654375,
      "eval_arxiv_bleu_score": 1.8755043412212522,
      "eval_arxiv_bleu_score_sem": 0.05895104761607006,
      "eval_arxiv_emb_cos_sim": 0.39819103479385376,
      "eval_arxiv_emb_cos_sim_sem": 0.00897163698202054,
      "eval_arxiv_emb_top1_equal": 0.1171875,
      "eval_arxiv_emb_top1_equal_sem": 0.02854125312152025,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.549220561981201,
      "eval_arxiv_n_ngrams_match_1": 7.446,
      "eval_arxiv_n_ngrams_match_2": 0.942,
      "eval_arxiv_n_ngrams_match_3": 0.088,
      "eval_arxiv_num_pred_words": 33.468,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 94.55867695631777,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.17669009847892378,
      "eval_arxiv_runtime": 10.13,
      "eval_arxiv_samples_per_second": 49.358,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.18453232926216256,
      "eval_arxiv_token_set_f1_sem": 0.004204942824465272,
      "eval_arxiv_token_set_precision": 0.1267731601602522,
      "eval_arxiv_token_set_recall": 0.41309199186316686,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 6875
    },
    {
      "epoch": 1.32,
      "eval_python_code_alpaca_accuracy": 0.096375,
      "eval_python_code_alpaca_bleu_score": 2.0690713383744743,
      "eval_python_code_alpaca_bleu_score_sem": 0.06504442325736357,
      "eval_python_code_alpaca_emb_cos_sim": 0.2938317656517029,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008932365338831499,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.578368186950684,
      "eval_python_code_alpaca_n_ngrams_match_1": 3.982,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.452,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.07,
      "eval_python_code_alpaca_num_pred_words": 28.472,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 97.35539870683805,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.14943516113949668,
      "eval_python_code_alpaca_runtime": 10.0591,
      "eval_python_code_alpaca_samples_per_second": 49.706,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.2358850037931322,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005003715120287452,
      "eval_python_code_alpaca_token_set_precision": 0.19193841138712459,
      "eval_python_code_alpaca_token_set_recall": 0.39438893841796385,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 6875
    },
    {
      "epoch": 1.32,
      "eval_wikibio_accuracy": 0.24259375,
      "eval_wikibio_bleu_score": 3.2213731771417753,
      "eval_wikibio_bleu_score_sem": 0.15564141135127274,
      "eval_wikibio_emb_cos_sim": 0.5043472051620483,
      "eval_wikibio_emb_cos_sim_sem": 0.014252313755082142,
      "eval_wikibio_emb_top1_equal": 0.078125,
      "eval_wikibio_emb_top1_equal_sem": 0.023813825516515504,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.8358540534973145,
      "eval_wikibio_n_ngrams_match_1": 6.44,
      "eval_wikibio_n_ngrams_match_2": 1.822,
      "eval_wikibio_n_ngrams_match_3": 0.55,
      "eval_wikibio_num_pred_words": 33.56,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 125.94610199582124,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2040199948293262,
      "eval_wikibio_runtime": 10.1916,
      "eval_wikibio_samples_per_second": 49.06,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.21533535497686207,
      "eval_wikibio_token_set_f1_sem": 0.006517295930449522,
      "eval_wikibio_token_set_precision": 0.20380042686358332,
      "eval_wikibio_token_set_recall": 0.2625841405555016,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 6875
    },
    {
      "epoch": 1.32,
      "eval_nq_accuracy": 0.39715625,
      "eval_nq_bleu_score": 5.412779838370413,
      "eval_nq_bleu_score_sem": 0.251853099021192,
      "eval_nq_emb_cos_sim": 0.5769795775413513,
      "eval_nq_emb_cos_sim_sem": 0.011507360634374476,
      "eval_nq_emb_top1_equal": 0.1328125,
      "eval_nq_emb_top1_equal_sem": 0.030114394778901498,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.3065907955169678,
      "eval_nq_n_ngrams_match_1": 15.466,
      "eval_nq_n_ngrams_match_2": 4.076,
      "eval_nq_n_ngrams_match_3": 1.442,
      "eval_nq_num_pred_words": 46.512,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 27.291922942887755,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.2815078504978107,
      "eval_nq_runtime": 10.4269,
      "eval_nq_samples_per_second": 47.953,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.3240591933206866,
      "eval_nq_token_set_f1_sem": 0.00509215338977105,
      "eval_nq_token_set_precision": 0.25852701274901707,
      "eval_nq_token_set_recall": 0.47132384335838723,
      "eval_nq_true_num_tokens": 64.0,
      "step": 6875
    },
    {
      "epoch": 1.32,
      "learning_rate": 0.001,
      "loss": 3.6588,
      "step": 6876
    },
    {
      "epoch": 1.32,
      "learning_rate": 0.001,
      "loss": 3.644,
      "step": 6888
    },
    {
      "epoch": 1.32,
      "learning_rate": 0.001,
      "loss": 3.6498,
      "step": 6900
    },
    {
      "epoch": 1.33,
      "learning_rate": 0.001,
      "loss": 3.6553,
      "step": 6912
    },
    {
      "epoch": 1.33,
      "learning_rate": 0.001,
      "loss": 3.6518,
      "step": 6924
    },
    {
      "epoch": 1.33,
      "learning_rate": 0.001,
      "loss": 3.6361,
      "step": 6936
    },
    {
      "epoch": 1.33,
      "learning_rate": 0.001,
      "loss": 3.6353,
      "step": 6948
    },
    {
      "epoch": 1.34,
      "learning_rate": 0.001,
      "loss": 3.6405,
      "step": 6960
    },
    {
      "epoch": 1.34,
      "learning_rate": 0.001,
      "loss": 3.6307,
      "step": 6972
    },
    {
      "epoch": 1.34,
      "learning_rate": 0.001,
      "loss": 3.6434,
      "step": 6984
    },
    {
      "epoch": 1.34,
      "learning_rate": 0.001,
      "loss": 3.6407,
      "step": 6996
    },
    {
      "epoch": 1.35,
      "learning_rate": 0.001,
      "loss": 3.6464,
      "step": 7008
    },
    {
      "epoch": 1.35,
      "learning_rate": 0.001,
      "loss": 3.6493,
      "step": 7020
    },
    {
      "epoch": 1.35,
      "learning_rate": 0.001,
      "loss": 3.6467,
      "step": 7032
    },
    {
      "epoch": 1.35,
      "learning_rate": 0.001,
      "loss": 3.6396,
      "step": 7044
    },
    {
      "epoch": 1.35,
      "learning_rate": 0.001,
      "loss": 3.6316,
      "step": 7056
    },
    {
      "epoch": 1.36,
      "learning_rate": 0.001,
      "loss": 3.6286,
      "step": 7068
    },
    {
      "epoch": 1.36,
      "learning_rate": 0.001,
      "loss": 3.6355,
      "step": 7080
    },
    {
      "epoch": 1.36,
      "learning_rate": 0.001,
      "loss": 3.6289,
      "step": 7092
    },
    {
      "epoch": 1.36,
      "learning_rate": 0.001,
      "loss": 3.6338,
      "step": 7104
    },
    {
      "epoch": 1.37,
      "learning_rate": 0.001,
      "loss": 3.6279,
      "step": 7116
    },
    {
      "epoch": 1.37,
      "learning_rate": 0.001,
      "loss": 3.632,
      "step": 7128
    },
    {
      "epoch": 1.37,
      "learning_rate": 0.001,
      "loss": 3.624,
      "step": 7140
    },
    {
      "epoch": 1.37,
      "learning_rate": 0.001,
      "loss": 3.6223,
      "step": 7152
    },
    {
      "epoch": 1.38,
      "learning_rate": 0.001,
      "loss": 3.6262,
      "step": 7164
    },
    {
      "epoch": 1.38,
      "learning_rate": 0.001,
      "loss": 3.6229,
      "step": 7176
    },
    {
      "epoch": 1.38,
      "learning_rate": 0.001,
      "loss": 3.6279,
      "step": 7188
    },
    {
      "epoch": 1.38,
      "learning_rate": 0.001,
      "loss": 3.617,
      "step": 7200
    },
    {
      "epoch": 1.38,
      "learning_rate": 0.001,
      "loss": 3.603,
      "step": 7212
    },
    {
      "epoch": 1.39,
      "learning_rate": 0.001,
      "loss": 3.615,
      "step": 7224
    },
    {
      "epoch": 1.39,
      "learning_rate": 0.001,
      "loss": 3.6169,
      "step": 7236
    },
    {
      "epoch": 1.39,
      "learning_rate": 0.001,
      "loss": 3.6196,
      "step": 7248
    },
    {
      "epoch": 1.39,
      "learning_rate": 0.001,
      "loss": 3.6218,
      "step": 7260
    },
    {
      "epoch": 1.4,
      "learning_rate": 0.001,
      "loss": 3.6292,
      "step": 7272
    },
    {
      "epoch": 1.4,
      "learning_rate": 0.001,
      "loss": 3.615,
      "step": 7284
    },
    {
      "epoch": 1.4,
      "learning_rate": 0.001,
      "loss": 3.6064,
      "step": 7296
    },
    {
      "epoch": 1.4,
      "learning_rate": 0.001,
      "loss": 3.6111,
      "step": 7308
    },
    {
      "epoch": 1.41,
      "learning_rate": 0.001,
      "loss": 3.6091,
      "step": 7320
    },
    {
      "epoch": 1.41,
      "learning_rate": 0.001,
      "loss": 3.6179,
      "step": 7332
    },
    {
      "epoch": 1.41,
      "learning_rate": 0.001,
      "loss": 3.6042,
      "step": 7344
    },
    {
      "epoch": 1.41,
      "learning_rate": 0.001,
      "loss": 3.6149,
      "step": 7356
    },
    {
      "epoch": 1.41,
      "learning_rate": 0.001,
      "loss": 3.6052,
      "step": 7368
    },
    {
      "epoch": 1.42,
      "learning_rate": 0.001,
      "loss": 3.6146,
      "step": 7380
    },
    {
      "epoch": 1.42,
      "learning_rate": 0.001,
      "loss": 3.5991,
      "step": 7392
    },
    {
      "epoch": 1.42,
      "learning_rate": 0.001,
      "loss": 3.6124,
      "step": 7404
    },
    {
      "epoch": 1.42,
      "learning_rate": 0.001,
      "loss": 3.6048,
      "step": 7416
    },
    {
      "epoch": 1.43,
      "learning_rate": 0.001,
      "loss": 3.6032,
      "step": 7428
    },
    {
      "epoch": 1.43,
      "learning_rate": 0.001,
      "loss": 3.6121,
      "step": 7440
    },
    {
      "epoch": 1.43,
      "learning_rate": 0.001,
      "loss": 3.6015,
      "step": 7452
    },
    {
      "epoch": 1.43,
      "learning_rate": 0.001,
      "loss": 3.5937,
      "step": 7464
    },
    {
      "epoch": 1.44,
      "learning_rate": 0.001,
      "loss": 3.6078,
      "step": 7476
    },
    {
      "epoch": 1.44,
      "learning_rate": 0.001,
      "loss": 3.5833,
      "step": 7488
    },
    {
      "epoch": 1.44,
      "learning_rate": 0.001,
      "loss": 3.5957,
      "step": 7500
    },
    {
      "epoch": 1.44,
      "eval_ag_news_accuracy": 0.2325,
      "eval_ag_news_bleu_score": 2.322543213016337,
      "eval_ag_news_bleu_score_sem": 0.07634130693049505,
      "eval_ag_news_emb_cos_sim": 0.5002522468566895,
      "eval_ag_news_emb_cos_sim_sem": 0.012586354200567779,
      "eval_ag_news_emb_top1_equal": 0.109375,
      "eval_ag_news_emb_top1_equal_sem": 0.027695207821224692,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.758804798126221,
      "eval_ag_news_n_ngrams_match_1": 7.97,
      "eval_ag_news_n_ngrams_match_2": 1.164,
      "eval_ag_news_n_ngrams_match_3": 0.248,
      "eval_ag_news_num_pred_words": 42.966,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 116.60647430260462,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.1884654914558187,
      "eval_ag_news_runtime": 11.2505,
      "eval_ag_news_samples_per_second": 44.443,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.2279867938606758,
      "eval_ag_news_token_set_f1_sem": 0.00423510729547071,
      "eval_ag_news_token_set_precision": 0.18120436383489927,
      "eval_ag_news_token_set_recall": 0.3558759956074076,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 7500
    },
    {
      "epoch": 1.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.06959375,
      "eval_anthropic_toxic_prompts_bleu_score": 1.4314448699640545,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.056008639486394375,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.3752596378326416,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011371612220765966,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.4814229011535645,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.016,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.458,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.096,
      "eval_anthropic_toxic_prompts_num_pred_words": 43.096,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 88.36031125762983,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.10682182327045758,
      "eval_anthropic_toxic_prompts_runtime": 9.9976,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.012,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.19416970230318364,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005311672541103707,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.19473575142660043,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.23870822025649782,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 7500
    },
    {
      "epoch": 1.44,
      "eval_arxiv_accuracy": 0.26834375,
      "eval_arxiv_bleu_score": 1.9023309392053043,
      "eval_arxiv_bleu_score_sem": 0.06088470989707018,
      "eval_arxiv_emb_cos_sim": 0.4109267592430115,
      "eval_arxiv_emb_cos_sim_sem": 0.00933169355758206,
      "eval_arxiv_emb_top1_equal": 0.171875,
      "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.517910003662109,
      "eval_arxiv_n_ngrams_match_1": 7.428,
      "eval_arxiv_n_ngrams_match_2": 0.966,
      "eval_arxiv_n_ngrams_match_3": 0.102,
      "eval_arxiv_num_pred_words": 32.916,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 91.64386234787013,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.1792532314009046,
      "eval_arxiv_runtime": 10.2043,
      "eval_arxiv_samples_per_second": 48.999,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.1857198052309753,
      "eval_arxiv_token_set_f1_sem": 0.004262952480010026,
      "eval_arxiv_token_set_precision": 0.12788930066399967,
      "eval_arxiv_token_set_recall": 0.4141109842090706,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 7500
    },
    {
      "epoch": 1.44,
      "eval_python_code_alpaca_accuracy": 0.09915625,
      "eval_python_code_alpaca_bleu_score": 2.104059565783548,
      "eval_python_code_alpaca_bleu_score_sem": 0.058495974532946075,
      "eval_python_code_alpaca_emb_cos_sim": 0.33410459756851196,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008442297589415015,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.495411396026611,
      "eval_python_code_alpaca_n_ngrams_match_1": 4.276,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.44,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.082,
      "eval_python_code_alpaca_num_pred_words": 31.144,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 89.60502455457436,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.15650192715310313,
      "eval_python_code_alpaca_runtime": 9.8854,
      "eval_python_code_alpaca_samples_per_second": 50.58,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.25125026346892704,
      "eval_python_code_alpaca_token_set_f1_sem": 0.004747398014444095,
      "eval_python_code_alpaca_token_set_precision": 0.20902072223053395,
      "eval_python_code_alpaca_token_set_recall": 0.3877599743416273,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 7500
    },
    {
      "epoch": 1.44,
      "eval_wikibio_accuracy": 0.2471875,
      "eval_wikibio_bleu_score": 3.337191110123847,
      "eval_wikibio_bleu_score_sem": 0.14538434540431933,
      "eval_wikibio_emb_cos_sim": 0.5329362154006958,
      "eval_wikibio_emb_cos_sim_sem": 0.01433505328706509,
      "eval_wikibio_emb_top1_equal": 0.0546875,
      "eval_wikibio_emb_top1_equal_sem": 0.020175758285348722,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.786613464355469,
      "eval_wikibio_n_ngrams_match_1": 6.854,
      "eval_wikibio_n_ngrams_match_2": 1.882,
      "eval_wikibio_n_ngrams_match_3": 0.546,
      "eval_wikibio_num_pred_words": 34.36,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 119.89465285764378,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2151911417248924,
      "eval_wikibio_runtime": 10.1195,
      "eval_wikibio_samples_per_second": 49.41,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.2283322086217695,
      "eval_wikibio_token_set_f1_sem": 0.006563763563716991,
      "eval_wikibio_token_set_precision": 0.2180544644561713,
      "eval_wikibio_token_set_recall": 0.2689410196017307,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 7500
    },
    {
      "epoch": 1.44,
      "eval_nq_accuracy": 0.40428125,
      "eval_nq_bleu_score": 5.407400379278418,
      "eval_nq_bleu_score_sem": 0.2571898131032587,
      "eval_nq_emb_cos_sim": 0.5908706784248352,
      "eval_nq_emb_cos_sim_sem": 0.012227750374190868,
      "eval_nq_emb_top1_equal": 0.125,
      "eval_nq_emb_top1_equal_sem": 0.02934655822437397,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.2414193153381348,
      "eval_nq_n_ngrams_match_1": 15.792,
      "eval_nq_n_ngrams_match_2": 4.186,
      "eval_nq_n_ngrams_match_3": 1.412,
      "eval_nq_num_pred_words": 47.138,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 25.569987880718834,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.2855418074436533,
      "eval_nq_runtime": 10.5726,
      "eval_nq_samples_per_second": 47.292,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.32821161027984797,
      "eval_nq_token_set_f1_sem": 0.005102521485572631,
      "eval_nq_token_set_precision": 0.26286134082350254,
      "eval_nq_token_set_recall": 0.476825728387506,
      "eval_nq_true_num_tokens": 64.0,
      "step": 7500
    },
    {
      "epoch": 1.44,
      "learning_rate": 0.001,
      "loss": 3.5943,
      "step": 7512
    },
    {
      "epoch": 1.44,
      "learning_rate": 0.001,
      "loss": 3.585,
      "step": 7524
    },
    {
      "epoch": 1.45,
      "learning_rate": 0.001,
      "loss": 3.5871,
      "step": 7536
    },
    {
      "epoch": 1.45,
      "learning_rate": 0.001,
      "loss": 3.5742,
      "step": 7548
    },
    {
      "epoch": 1.45,
      "learning_rate": 0.001,
      "loss": 3.5853,
      "step": 7560
    },
    {
      "epoch": 1.45,
      "learning_rate": 0.001,
      "loss": 3.584,
      "step": 7572
    },
    {
      "epoch": 1.46,
      "learning_rate": 0.001,
      "loss": 3.5859,
      "step": 7584
    },
    {
      "epoch": 1.46,
      "learning_rate": 0.001,
      "loss": 3.5973,
      "step": 7596
    },
    {
      "epoch": 1.46,
      "learning_rate": 0.001,
      "loss": 3.584,
      "step": 7608
    },
    {
      "epoch": 1.46,
      "learning_rate": 0.001,
      "loss": 3.5711,
      "step": 7620
    },
    {
      "epoch": 1.47,
      "learning_rate": 0.001,
      "loss": 3.5689,
      "step": 7632
    },
    {
      "epoch": 1.47,
      "learning_rate": 0.001,
      "loss": 3.5813,
      "step": 7644
    },
    {
      "epoch": 1.47,
      "learning_rate": 0.001,
      "loss": 3.582,
      "step": 7656
    },
    {
      "epoch": 1.47,
      "learning_rate": 0.001,
      "loss": 3.586,
      "step": 7668
    },
    {
      "epoch": 1.47,
      "learning_rate": 0.001,
      "loss": 3.5783,
      "step": 7680
    },
    {
      "epoch": 1.48,
      "learning_rate": 0.001,
      "loss": 3.5728,
      "step": 7692
    },
    {
      "epoch": 1.48,
      "learning_rate": 0.001,
      "loss": 3.5719,
      "step": 7704
    },
    {
      "epoch": 1.48,
      "learning_rate": 0.001,
      "loss": 3.5842,
      "step": 7716
    },
    {
      "epoch": 1.48,
      "learning_rate": 0.001,
      "loss": 3.5801,
      "step": 7728
    },
    {
      "epoch": 1.49,
      "learning_rate": 0.001,
      "loss": 3.5589,
      "step": 7740
    },
    {
      "epoch": 1.49,
      "learning_rate": 0.001,
      "loss": 3.573,
      "step": 7752
    },
    {
      "epoch": 1.49,
      "learning_rate": 0.001,
      "loss": 3.5721,
      "step": 7764
    },
    {
      "epoch": 1.49,
      "learning_rate": 0.001,
      "loss": 3.5748,
      "step": 7776
    },
    {
      "epoch": 1.5,
      "learning_rate": 0.001,
      "loss": 3.5474,
      "step": 7788
    },
    {
      "epoch": 1.5,
      "learning_rate": 0.001,
      "loss": 3.5707,
      "step": 7800
    },
    {
      "epoch": 1.5,
      "learning_rate": 0.001,
      "loss": 3.5677,
      "step": 7812
    },
    {
      "epoch": 1.5,
      "learning_rate": 0.001,
      "loss": 3.5741,
      "step": 7824
    },
    {
      "epoch": 1.5,
      "learning_rate": 0.001,
      "loss": 3.5701,
      "step": 7836
    },
    {
      "epoch": 1.51,
      "learning_rate": 0.001,
      "loss": 3.5639,
      "step": 7848
    },
    {
      "epoch": 1.51,
      "learning_rate": 0.001,
      "loss": 3.5625,
      "step": 7860
    },
    {
      "epoch": 1.51,
      "learning_rate": 0.001,
      "loss": 3.5553,
      "step": 7872
    },
    {
      "epoch": 1.51,
      "learning_rate": 0.001,
      "loss": 3.5686,
      "step": 7884
    },
    {
      "epoch": 1.52,
      "learning_rate": 0.001,
      "loss": 3.5667,
      "step": 7896
    },
    {
      "epoch": 1.52,
      "learning_rate": 0.001,
      "loss": 3.556,
      "step": 7908
    },
    {
      "epoch": 1.52,
      "learning_rate": 0.001,
      "loss": 3.5595,
      "step": 7920
    },
    {
      "epoch": 1.52,
      "learning_rate": 0.001,
      "loss": 3.5614,
      "step": 7932
    },
    {
      "epoch": 1.53,
      "learning_rate": 0.001,
      "loss": 3.5585,
      "step": 7944
    },
    {
      "epoch": 1.53,
      "learning_rate": 0.001,
      "loss": 3.5488,
      "step": 7956
    },
    {
      "epoch": 1.53,
      "learning_rate": 0.001,
      "loss": 3.5476,
      "step": 7968
    },
    {
      "epoch": 1.53,
      "learning_rate": 0.001,
      "loss": 3.5541,
      "step": 7980
    },
    {
      "epoch": 1.53,
      "learning_rate": 0.001,
      "loss": 3.5543,
      "step": 7992
    },
    {
      "epoch": 1.54,
      "learning_rate": 0.001,
      "loss": 3.5492,
      "step": 8004
    },
    {
      "epoch": 1.54,
      "learning_rate": 0.001,
      "loss": 3.5435,
      "step": 8016
    },
    {
      "epoch": 1.54,
      "learning_rate": 0.001,
      "loss": 3.5357,
      "step": 8028
    },
    {
      "epoch": 1.54,
      "learning_rate": 0.001,
      "loss": 3.5477,
      "step": 8040
    },
    {
      "epoch": 1.55,
      "learning_rate": 0.001,
      "loss": 3.5515,
      "step": 8052
    },
    {
      "epoch": 1.55,
      "learning_rate": 0.001,
      "loss": 3.5559,
      "step": 8064
    },
    {
      "epoch": 1.55,
      "learning_rate": 0.001,
      "loss": 3.5472,
      "step": 8076
    },
    {
      "epoch": 1.55,
      "learning_rate": 0.001,
      "loss": 3.5544,
      "step": 8088
    },
    {
      "epoch": 1.56,
      "learning_rate": 0.001,
      "loss": 3.5394,
      "step": 8100
    },
    {
      "epoch": 1.56,
      "learning_rate": 0.001,
      "loss": 3.5457,
      "step": 8112
    },
    {
      "epoch": 1.56,
      "learning_rate": 0.001,
      "loss": 3.5442,
      "step": 8124
    },
    {
      "epoch": 1.56,
      "eval_ag_news_accuracy": 0.2358125,
      "eval_ag_news_bleu_score": 2.4883831524173123,
      "eval_ag_news_bleu_score_sem": 0.08879235078149196,
      "eval_ag_news_emb_cos_sim": 0.535577654838562,
      "eval_ag_news_emb_cos_sim_sem": 0.01108892408818029,
      "eval_ag_news_emb_top1_equal": 0.1015625,
      "eval_ag_news_emb_top1_equal_sem": 0.026804565886848545,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.701244354248047,
      "eval_ag_news_n_ngrams_match_1": 8.354,
      "eval_ag_news_n_ngrams_match_2": 1.29,
      "eval_ag_news_n_ngrams_match_3": 0.264,
      "eval_ag_news_num_pred_words": 44.442,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 110.08407084060602,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.19311281601745423,
      "eval_ag_news_runtime": 11.1378,
      "eval_ag_news_samples_per_second": 44.892,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.2328350888237543,
      "eval_ag_news_token_set_f1_sem": 0.004361067956988328,
      "eval_ag_news_token_set_precision": 0.18868677528427935,
      "eval_ag_news_token_set_recall": 0.3524863948893839,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 8125
    },
    {
      "epoch": 1.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.0703125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.545958566368486,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0686130866006481,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.3970567286014557,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011643941448241076,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.448799133300781,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.144,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.566,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.13,
      "eval_anthropic_toxic_prompts_num_pred_words": 43.016,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 85.52417917237305,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.11145307880516193,
      "eval_anthropic_toxic_prompts_runtime": 9.9448,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.278,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.20310301259868394,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005631626723020884,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.20265703593220222,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2510094551800592,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 8125
    },
    {
      "epoch": 1.56,
      "eval_arxiv_accuracy": 0.2689375,
      "eval_arxiv_bleu_score": 1.9417341197111846,
      "eval_arxiv_bleu_score_sem": 0.06158943468102233,
      "eval_arxiv_emb_cos_sim": 0.4218406677246094,
      "eval_arxiv_emb_cos_sim_sem": 0.009850202527209301,
      "eval_arxiv_emb_top1_equal": 0.1171875,
      "eval_arxiv_emb_top1_equal_sem": 0.02854125312152025,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.472835540771484,
      "eval_arxiv_n_ngrams_match_1": 7.666,
      "eval_arxiv_n_ngrams_match_2": 1.002,
      "eval_arxiv_n_ngrams_match_3": 0.1,
      "eval_arxiv_num_pred_words": 34.67,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 87.60477808213642,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.18041833384422198,
      "eval_arxiv_runtime": 10.4142,
      "eval_arxiv_samples_per_second": 48.011,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.18937989286851253,
      "eval_arxiv_token_set_f1_sem": 0.004361374369966269,
      "eval_arxiv_token_set_precision": 0.13118151385965218,
      "eval_arxiv_token_set_recall": 0.41357298590123737,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 8125
    },
    {
      "epoch": 1.56,
      "eval_python_code_alpaca_accuracy": 0.10209375,
      "eval_python_code_alpaca_bleu_score": 2.0361606743426623,
      "eval_python_code_alpaca_bleu_score_sem": 0.059958079090687615,
      "eval_python_code_alpaca_emb_cos_sim": 0.31711506843566895,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008342725659808028,
      "eval_python_code_alpaca_emb_top1_equal": 0.0,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.0,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.409636497497559,
      "eval_python_code_alpaca_n_ngrams_match_1": 4.076,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.464,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.098,
      "eval_python_code_alpaca_num_pred_words": 31.592,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 82.23956378298436,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.1523855505706459,
      "eval_python_code_alpaca_runtime": 9.7335,
      "eval_python_code_alpaca_samples_per_second": 51.369,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.2411080441921459,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005053151733454404,
      "eval_python_code_alpaca_token_set_precision": 0.1960890074593007,
      "eval_python_code_alpaca_token_set_recall": 0.39624581528722413,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 8125
    },
    {
      "epoch": 1.56,
      "eval_wikibio_accuracy": 0.24809375,
      "eval_wikibio_bleu_score": 3.456584045194845,
      "eval_wikibio_bleu_score_sem": 0.14630321622658643,
      "eval_wikibio_emb_cos_sim": 0.5349158644676208,
      "eval_wikibio_emb_cos_sim_sem": 0.014574552753728248,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.757209777832031,
      "eval_wikibio_n_ngrams_match_1": 7.354,
      "eval_wikibio_n_ngrams_match_2": 2.136,
      "eval_wikibio_n_ngrams_match_3": 0.666,
      "eval_wikibio_num_pred_words": 35.106,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 116.4206328594945,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.22421175876502936,
      "eval_wikibio_runtime": 10.2578,
      "eval_wikibio_samples_per_second": 48.743,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.24044633626780249,
      "eval_wikibio_token_set_f1_sem": 0.006852145991997033,
      "eval_wikibio_token_set_precision": 0.23076434597942477,
      "eval_wikibio_token_set_recall": 0.26870808290421594,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 8125
    },
    {
      "epoch": 1.56,
      "eval_nq_accuracy": 0.4089375,
      "eval_nq_bleu_score": 5.701252720797398,
      "eval_nq_bleu_score_sem": 0.26315345818436453,
      "eval_nq_emb_cos_sim": 0.6082754731178284,
      "eval_nq_emb_cos_sim_sem": 0.011521773539476187,
      "eval_nq_emb_top1_equal": 0.0390625,
      "eval_nq_emb_top1_equal_sem": 0.017191973462108996,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.189636468887329,
      "eval_nq_n_ngrams_match_1": 16.084,
      "eval_nq_n_ngrams_match_2": 4.454,
      "eval_nq_n_ngrams_match_3": 1.532,
      "eval_nq_num_pred_words": 48.046,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 24.279599448763665,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.293869560026674,
      "eval_nq_runtime": 10.3857,
      "eval_nq_samples_per_second": 48.143,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.3357395812187461,
      "eval_nq_token_set_f1_sem": 0.005076817269316643,
      "eval_nq_token_set_precision": 0.27053565815348857,
      "eval_nq_token_set_recall": 0.4853697765823322,
      "eval_nq_true_num_tokens": 64.0,
      "step": 8125
    },
    {
      "epoch": 1.56,
      "learning_rate": 0.001,
      "loss": 3.5404,
      "step": 8136
    },
    {
      "epoch": 1.56,
      "learning_rate": 0.001,
      "loss": 3.5336,
      "step": 8148
    },
    {
      "epoch": 1.57,
      "learning_rate": 0.001,
      "loss": 3.5427,
      "step": 8160
    },
    {
      "epoch": 1.57,
      "learning_rate": 0.001,
      "loss": 3.5372,
      "step": 8172
    },
    {
      "epoch": 1.57,
      "learning_rate": 0.001,
      "loss": 3.5478,
      "step": 8184
    },
    {
      "epoch": 1.57,
      "learning_rate": 0.001,
      "loss": 3.5418,
      "step": 8196
    },
    {
      "epoch": 1.58,
      "learning_rate": 0.001,
      "loss": 3.5378,
      "step": 8208
    },
    {
      "epoch": 1.58,
      "learning_rate": 0.001,
      "loss": 3.5395,
      "step": 8220
    },
    {
      "epoch": 1.58,
      "learning_rate": 0.001,
      "loss": 3.5339,
      "step": 8232
    },
    {
      "epoch": 1.58,
      "learning_rate": 0.001,
      "loss": 3.5413,
      "step": 8244
    },
    {
      "epoch": 1.59,
      "learning_rate": 0.001,
      "loss": 3.5412,
      "step": 8256
    },
    {
      "epoch": 1.59,
      "learning_rate": 0.001,
      "loss": 3.5465,
      "step": 8268
    },
    {
      "epoch": 1.59,
      "learning_rate": 0.001,
      "loss": 3.5309,
      "step": 8280
    },
    {
      "epoch": 1.59,
      "learning_rate": 0.001,
      "loss": 3.5358,
      "step": 8292
    },
    {
      "epoch": 1.59,
      "learning_rate": 0.001,
      "loss": 3.5349,
      "step": 8304
    },
    {
      "epoch": 1.6,
      "learning_rate": 0.001,
      "loss": 3.5212,
      "step": 8316
    },
    {
      "epoch": 1.6,
      "learning_rate": 0.001,
      "loss": 3.5231,
      "step": 8328
    },
    {
      "epoch": 1.6,
      "learning_rate": 0.001,
      "loss": 3.5279,
      "step": 8340
    },
    {
      "epoch": 1.6,
      "learning_rate": 0.001,
      "loss": 3.5235,
      "step": 8352
    },
    {
      "epoch": 1.61,
      "learning_rate": 0.001,
      "loss": 3.5204,
      "step": 8364
    },
    {
      "epoch": 1.61,
      "learning_rate": 0.001,
      "loss": 3.5289,
      "step": 8376
    },
    {
      "epoch": 1.61,
      "learning_rate": 0.001,
      "loss": 3.5242,
      "step": 8388
    },
    {
      "epoch": 1.61,
      "learning_rate": 0.001,
      "loss": 3.5169,
      "step": 8400
    },
    {
      "epoch": 1.62,
      "learning_rate": 0.001,
      "loss": 3.5191,
      "step": 8412
    },
    {
      "epoch": 1.62,
      "learning_rate": 0.001,
      "loss": 3.5221,
      "step": 8424
    },
    {
      "epoch": 1.62,
      "learning_rate": 0.001,
      "loss": 3.5202,
      "step": 8436
    },
    {
      "epoch": 1.62,
      "learning_rate": 0.001,
      "loss": 3.5161,
      "step": 8448
    },
    {
      "epoch": 1.62,
      "learning_rate": 0.001,
      "loss": 3.5158,
      "step": 8460
    },
    {
      "epoch": 1.63,
      "learning_rate": 0.001,
      "loss": 3.5224,
      "step": 8472
    },
    {
      "epoch": 1.63,
      "learning_rate": 0.001,
      "loss": 3.5148,
      "step": 8484
    },
    {
      "epoch": 1.63,
      "learning_rate": 0.001,
      "loss": 3.5192,
      "step": 8496
    },
    {
      "epoch": 1.63,
      "learning_rate": 0.001,
      "loss": 3.501,
      "step": 8508
    },
    {
      "epoch": 1.64,
      "learning_rate": 0.001,
      "loss": 3.524,
      "step": 8520
    },
    {
      "epoch": 1.64,
      "learning_rate": 0.001,
      "loss": 3.5008,
      "step": 8532
    },
    {
      "epoch": 1.64,
      "learning_rate": 0.001,
      "loss": 3.5206,
      "step": 8544
    },
    {
      "epoch": 1.64,
      "learning_rate": 0.001,
      "loss": 3.5107,
      "step": 8556
    },
    {
      "epoch": 1.65,
      "learning_rate": 0.001,
      "loss": 3.5069,
      "step": 8568
    },
    {
      "epoch": 1.65,
      "learning_rate": 0.001,
      "loss": 3.5055,
      "step": 8580
    },
    {
      "epoch": 1.65,
      "learning_rate": 0.001,
      "loss": 3.5152,
      "step": 8592
    },
    {
      "epoch": 1.65,
      "learning_rate": 0.001,
      "loss": 3.5064,
      "step": 8604
    },
    {
      "epoch": 1.65,
      "learning_rate": 0.001,
      "loss": 3.5062,
      "step": 8616
    },
    {
      "epoch": 1.66,
      "learning_rate": 0.001,
      "loss": 3.4983,
      "step": 8628
    },
    {
      "epoch": 1.66,
      "learning_rate": 0.001,
      "loss": 3.5099,
      "step": 8640
    },
    {
      "epoch": 1.66,
      "learning_rate": 0.001,
      "loss": 3.4936,
      "step": 8652
    },
    {
      "epoch": 1.66,
      "learning_rate": 0.001,
      "loss": 3.5028,
      "step": 8664
    },
    {
      "epoch": 1.67,
      "learning_rate": 0.001,
      "loss": 3.5084,
      "step": 8676
    },
    {
      "epoch": 1.67,
      "learning_rate": 0.001,
      "loss": 3.5103,
      "step": 8688
    },
    {
      "epoch": 1.67,
      "learning_rate": 0.001,
      "loss": 3.5102,
      "step": 8700
    },
    {
      "epoch": 1.67,
      "learning_rate": 0.001,
      "loss": 3.4951,
      "step": 8712
    },
    {
      "epoch": 1.68,
      "learning_rate": 0.001,
      "loss": 3.5018,
      "step": 8724
    },
    {
      "epoch": 1.68,
      "learning_rate": 0.001,
      "loss": 3.4946,
      "step": 8736
    },
    {
      "epoch": 1.68,
      "learning_rate": 0.001,
      "loss": 3.4883,
      "step": 8748
    },
    {
      "epoch": 1.68,
      "eval_ag_news_accuracy": 0.23778125,
      "eval_ag_news_bleu_score": 2.3704854141674105,
      "eval_ag_news_bleu_score_sem": 0.08805929234025343,
      "eval_ag_news_emb_cos_sim": 0.5304872989654541,
      "eval_ag_news_emb_cos_sim_sem": 0.013255226003524682,
      "eval_ag_news_emb_top1_equal": 0.140625,
      "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.644641399383545,
      "eval_ag_news_n_ngrams_match_1": 7.906,
      "eval_ag_news_n_ngrams_match_2": 1.166,
      "eval_ag_news_n_ngrams_match_3": 0.266,
      "eval_ag_news_num_pred_words": 43.142,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 104.0260552884777,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.1867053586924297,
      "eval_ag_news_runtime": 10.3761,
      "eval_ag_news_samples_per_second": 48.188,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.22633767821861886,
      "eval_ag_news_token_set_f1_sem": 0.00459927641854774,
      "eval_ag_news_token_set_precision": 0.17909611571602405,
      "eval_ag_news_token_set_recall": 0.36599050598760197,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 8750
    },
    {
      "epoch": 1.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.0720625,
      "eval_anthropic_toxic_prompts_bleu_score": 1.5380644383019462,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06938048383491413,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.38932090997695923,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011143205277795273,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0234375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.013424676090873717,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.367114543914795,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.028,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.542,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.116,
      "eval_anthropic_toxic_prompts_num_pred_words": 42.348,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 78.81588350819912,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.10855016633582844,
      "eval_anthropic_toxic_prompts_runtime": 12.2397,
      "eval_anthropic_toxic_prompts_samples_per_second": 40.851,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.082,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.20107549111213632,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005727311625011449,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.19489493214241732,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2561713415112732,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 8750
    },
    {
      "epoch": 1.68,
      "eval_arxiv_accuracy": 0.27196875,
      "eval_arxiv_bleu_score": 1.9063170645113854,
      "eval_arxiv_bleu_score_sem": 0.06346725348494,
      "eval_arxiv_emb_cos_sim": 0.4322901964187622,
      "eval_arxiv_emb_cos_sim_sem": 0.009643776482826055,
      "eval_arxiv_emb_top1_equal": 0.1640625,
      "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.439461708068848,
      "eval_arxiv_n_ngrams_match_1": 7.572,
      "eval_arxiv_n_ngrams_match_2": 0.976,
      "eval_arxiv_n_ngrams_match_3": 0.114,
      "eval_arxiv_num_pred_words": 33.218,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 84.72932028667496,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.17953503878779442,
      "eval_arxiv_runtime": 10.2275,
      "eval_arxiv_samples_per_second": 48.888,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.18809067298590038,
      "eval_arxiv_token_set_f1_sem": 0.004465375225601672,
      "eval_arxiv_token_set_precision": 0.12943848809496553,
      "eval_arxiv_token_set_recall": 0.4167102154679495,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 8750
    },
    {
      "epoch": 1.68,
      "eval_python_code_alpaca_accuracy": 0.1026875,
      "eval_python_code_alpaca_bleu_score": 2.2041862755943775,
      "eval_python_code_alpaca_bleu_score_sem": 0.06834122070214509,
      "eval_python_code_alpaca_emb_cos_sim": 0.3348689675331116,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008170157364213696,
      "eval_python_code_alpaca_emb_top1_equal": 0.0,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.0,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.387907981872559,
      "eval_python_code_alpaca_n_ngrams_match_1": 4.416,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.54,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.12,
      "eval_python_code_alpaca_num_pred_words": 31.672,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 80.47189410217852,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.16196784827312427,
      "eval_python_code_alpaca_runtime": 10.5638,
      "eval_python_code_alpaca_samples_per_second": 47.331,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.26282985117419794,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0049658116580926165,
      "eval_python_code_alpaca_token_set_precision": 0.21515742623335407,
      "eval_python_code_alpaca_token_set_recall": 0.4139358274702672,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 8750
    },
    {
      "epoch": 1.68,
      "eval_wikibio_accuracy": 0.2573125,
      "eval_wikibio_bleu_score": 3.2162036203404467,
      "eval_wikibio_bleu_score_sem": 0.1456760587055847,
      "eval_wikibio_emb_cos_sim": 0.5134081840515137,
      "eval_wikibio_emb_cos_sim_sem": 0.015979585120378715,
      "eval_wikibio_emb_top1_equal": 0.0859375,
      "eval_wikibio_emb_top1_equal_sem": 0.02487009666300537,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.720954895019531,
      "eval_wikibio_n_ngrams_match_1": 6.634,
      "eval_wikibio_n_ngrams_match_2": 1.802,
      "eval_wikibio_n_ngrams_match_3": 0.514,
      "eval_wikibio_num_pred_words": 33.094,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 112.27541272879289,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2122320603838448,
      "eval_wikibio_runtime": 10.1467,
      "eval_wikibio_samples_per_second": 49.277,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.22346104983996015,
      "eval_wikibio_token_set_f1_sem": 0.00664109296140654,
      "eval_wikibio_token_set_precision": 0.21128450768421647,
      "eval_wikibio_token_set_recall": 0.26172893569192507,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 8750
    },
    {
      "epoch": 1.68,
      "eval_nq_accuracy": 0.41128125,
      "eval_nq_bleu_score": 5.820829272424745,
      "eval_nq_bleu_score_sem": 0.2595851636937686,
      "eval_nq_emb_cos_sim": 0.6050189137458801,
      "eval_nq_emb_cos_sim_sem": 0.011790530821539998,
      "eval_nq_emb_top1_equal": 0.140625,
      "eval_nq_emb_top1_equal_sem": 0.030847557647994725,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.144869327545166,
      "eval_nq_n_ngrams_match_1": 16.144,
      "eval_nq_n_ngrams_match_2": 4.418,
      "eval_nq_n_ngrams_match_3": 1.56,
      "eval_nq_num_pred_words": 47.164,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 23.216641499522527,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.2968378615393472,
      "eval_nq_runtime": 10.6142,
      "eval_nq_samples_per_second": 47.107,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.3378667476349911,
      "eval_nq_token_set_f1_sem": 0.00477608986858229,
      "eval_nq_token_set_precision": 0.272093226303916,
      "eval_nq_token_set_recall": 0.4856166580211932,
      "eval_nq_true_num_tokens": 64.0,
      "step": 8750
    },
    {
      "epoch": 1.68,
      "learning_rate": 0.001,
      "loss": 3.4996,
      "step": 8760
    },
    {
      "epoch": 1.68,
      "learning_rate": 0.001,
      "loss": 3.4984,
      "step": 8772
    },
    {
      "epoch": 1.69,
      "learning_rate": 0.001,
      "loss": 3.4953,
      "step": 8784
    },
    {
      "epoch": 1.69,
      "learning_rate": 0.001,
      "loss": 3.4944,
      "step": 8796
    },
    {
      "epoch": 1.69,
      "learning_rate": 0.001,
      "loss": 3.5001,
      "step": 8808
    },
    {
      "epoch": 1.69,
      "learning_rate": 0.001,
      "loss": 3.4954,
      "step": 8820
    },
    {
      "epoch": 1.7,
      "learning_rate": 0.001,
      "loss": 3.4957,
      "step": 8832
    },
    {
      "epoch": 1.7,
      "learning_rate": 0.001,
      "loss": 3.4775,
      "step": 8844
    },
    {
      "epoch": 1.7,
      "learning_rate": 0.001,
      "loss": 3.499,
      "step": 8856
    },
    {
      "epoch": 1.7,
      "learning_rate": 0.001,
      "loss": 3.501,
      "step": 8868
    },
    {
      "epoch": 1.71,
      "learning_rate": 0.001,
      "loss": 3.4837,
      "step": 8880
    },
    {
      "epoch": 1.71,
      "learning_rate": 0.001,
      "loss": 3.4904,
      "step": 8892
    },
    {
      "epoch": 1.71,
      "learning_rate": 0.001,
      "loss": 3.4952,
      "step": 8904
    },
    {
      "epoch": 1.71,
      "learning_rate": 0.001,
      "loss": 3.4865,
      "step": 8916
    },
    {
      "epoch": 1.71,
      "learning_rate": 0.001,
      "loss": 3.4925,
      "step": 8928
    },
    {
      "epoch": 1.72,
      "learning_rate": 0.001,
      "loss": 3.4793,
      "step": 8940
    },
    {
      "epoch": 1.72,
      "learning_rate": 0.001,
      "loss": 3.4854,
      "step": 8952
    },
    {
      "epoch": 1.72,
      "learning_rate": 0.001,
      "loss": 3.4812,
      "step": 8964
    },
    {
      "epoch": 1.72,
      "learning_rate": 0.001,
      "loss": 3.4835,
      "step": 8976
    },
    {
      "epoch": 1.73,
      "learning_rate": 0.001,
      "loss": 3.4805,
      "step": 8988
    },
    {
      "epoch": 1.73,
      "learning_rate": 0.001,
      "loss": 3.4922,
      "step": 9000
    },
    {
      "epoch": 1.73,
      "learning_rate": 0.001,
      "loss": 3.4939,
      "step": 9012
    },
    {
      "epoch": 1.73,
      "learning_rate": 0.001,
      "loss": 3.4814,
      "step": 9024
    },
    {
      "epoch": 1.74,
      "learning_rate": 0.001,
      "loss": 3.4842,
      "step": 9036
    },
    {
      "epoch": 1.74,
      "learning_rate": 0.001,
      "loss": 3.4645,
      "step": 9048
    },
    {
      "epoch": 1.74,
      "learning_rate": 0.001,
      "loss": 3.4831,
      "step": 9060
    },
    {
      "epoch": 1.74,
      "learning_rate": 0.001,
      "loss": 3.4698,
      "step": 9072
    },
    {
      "epoch": 1.74,
      "learning_rate": 0.001,
      "loss": 3.4693,
      "step": 9084
    },
    {
      "epoch": 1.75,
      "learning_rate": 0.001,
      "loss": 3.4736,
      "step": 9096
    },
    {
      "epoch": 1.75,
      "learning_rate": 0.001,
      "loss": 3.4687,
      "step": 9108
    },
    {
      "epoch": 1.75,
      "learning_rate": 0.001,
      "loss": 3.4786,
      "step": 9120
    },
    {
      "epoch": 1.75,
      "learning_rate": 0.001,
      "loss": 3.473,
      "step": 9132
    },
    {
      "epoch": 1.76,
      "learning_rate": 0.001,
      "loss": 3.462,
      "step": 9144
    },
    {
      "epoch": 1.76,
      "learning_rate": 0.001,
      "loss": 3.485,
      "step": 9156
    },
    {
      "epoch": 1.76,
      "learning_rate": 0.001,
      "loss": 3.4625,
      "step": 9168
    },
    {
      "epoch": 1.76,
      "learning_rate": 0.001,
      "loss": 3.4494,
      "step": 9180
    },
    {
      "epoch": 1.76,
      "learning_rate": 0.001,
      "loss": 3.4763,
      "step": 9192
    },
    {
      "epoch": 1.77,
      "learning_rate": 0.001,
      "loss": 3.4664,
      "step": 9204
    },
    {
      "epoch": 1.77,
      "learning_rate": 0.001,
      "loss": 3.4651,
      "step": 9216
    },
    {
      "epoch": 1.77,
      "learning_rate": 0.001,
      "loss": 3.466,
      "step": 9228
    },
    {
      "epoch": 1.77,
      "learning_rate": 0.001,
      "loss": 3.4596,
      "step": 9240
    },
    {
      "epoch": 1.78,
      "learning_rate": 0.001,
      "loss": 3.466,
      "step": 9252
    },
    {
      "epoch": 1.78,
      "learning_rate": 0.001,
      "loss": 3.4631,
      "step": 9264
    },
    {
      "epoch": 1.78,
      "learning_rate": 0.001,
      "loss": 3.4606,
      "step": 9276
    },
    {
      "epoch": 1.78,
      "learning_rate": 0.001,
      "loss": 3.4501,
      "step": 9288
    },
    {
      "epoch": 1.79,
      "learning_rate": 0.001,
      "loss": 3.463,
      "step": 9300
    },
    {
      "epoch": 1.79,
      "learning_rate": 0.001,
      "loss": 3.4535,
      "step": 9312
    },
    {
      "epoch": 1.79,
      "learning_rate": 0.001,
      "loss": 3.4639,
      "step": 9324
    },
    {
      "epoch": 1.79,
      "learning_rate": 0.001,
      "loss": 3.469,
      "step": 9336
    },
    {
      "epoch": 1.79,
      "learning_rate": 0.001,
      "loss": 3.4589,
      "step": 9348
    },
    {
      "epoch": 1.8,
      "learning_rate": 0.001,
      "loss": 3.4613,
      "step": 9360
    },
    {
      "epoch": 1.8,
      "learning_rate": 0.001,
      "loss": 3.4576,
      "step": 9372
    },
    {
      "epoch": 1.8,
      "eval_ag_news_accuracy": 0.2413125,
      "eval_ag_news_bleu_score": 2.5544844559070983,
      "eval_ag_news_bleu_score_sem": 0.08223989162897455,
      "eval_ag_news_emb_cos_sim": 0.5481958389282227,
      "eval_ag_news_emb_cos_sim_sem": 0.011590007970126309,
      "eval_ag_news_emb_top1_equal": 0.03125,
      "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.605099678039551,
      "eval_ag_news_n_ngrams_match_1": 8.48,
      "eval_ag_news_n_ngrams_match_2": 1.362,
      "eval_ag_news_n_ngrams_match_3": 0.29,
      "eval_ag_news_num_pred_words": 43.434,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 99.99294945370863,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.20146325608356472,
      "eval_ag_news_runtime": 11.8026,
      "eval_ag_news_samples_per_second": 42.363,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.23863517867807965,
      "eval_ag_news_token_set_f1_sem": 0.0043812817770934155,
      "eval_ag_news_token_set_precision": 0.19362816222352633,
      "eval_ag_news_token_set_recall": 0.36013605266415294,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 9375
    },
    {
      "epoch": 1.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.072875,
      "eval_anthropic_toxic_prompts_bleu_score": 1.5905957428879907,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06562406450273717,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4287947714328766,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012074247600647145,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.347011566162109,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.15,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.554,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.128,
      "eval_anthropic_toxic_prompts_num_pred_words": 42.386,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 77.24726929044871,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1139319311985194,
      "eval_anthropic_toxic_prompts_runtime": 10.4937,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.648,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.20623150340487895,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005574846580783877,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.20602393665230287,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.25728422074732116,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 9375
    },
    {
      "epoch": 1.8,
      "eval_arxiv_accuracy": 0.27428125,
      "eval_arxiv_bleu_score": 2.024296253189745,
      "eval_arxiv_bleu_score_sem": 0.06256252374453004,
      "eval_arxiv_emb_cos_sim": 0.44694453477859497,
      "eval_arxiv_emb_cos_sim_sem": 0.009536469289428303,
      "eval_arxiv_emb_top1_equal": 0.1796875,
      "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.406665802001953,
      "eval_arxiv_n_ngrams_match_1": 8.296,
      "eval_arxiv_n_ngrams_match_2": 1.064,
      "eval_arxiv_n_ngrams_match_3": 0.114,
      "eval_arxiv_num_pred_words": 34.622,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 81.99561760560155,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.19660889423345324,
      "eval_arxiv_runtime": 11.235,
      "eval_arxiv_samples_per_second": 44.504,
      "eval_arxiv_steps_per_second": 0.089,
      "eval_arxiv_token_set_f1": 0.2050662739932027,
      "eval_arxiv_token_set_f1_sem": 0.004335582554494625,
      "eval_arxiv_token_set_precision": 0.14419046096994229,
      "eval_arxiv_token_set_recall": 0.41716972507487776,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 9375
    },
    {
      "epoch": 1.8,
      "eval_python_code_alpaca_accuracy": 0.10359375,
      "eval_python_code_alpaca_bleu_score": 2.2027003126845086,
      "eval_python_code_alpaca_bleu_score_sem": 0.06224611498459122,
      "eval_python_code_alpaca_emb_cos_sim": 0.359014630317688,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009481825241789421,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.309894561767578,
      "eval_python_code_alpaca_n_ngrams_match_1": 4.75,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.61,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.134,
      "eval_python_code_alpaca_num_pred_words": 34.37,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 74.43264048054209,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.1742315144054959,
      "eval_python_code_alpaca_runtime": 10.8202,
      "eval_python_code_alpaca_samples_per_second": 46.21,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.2747307998613838,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0048689995867937695,
      "eval_python_code_alpaca_token_set_precision": 0.2353429153331676,
      "eval_python_code_alpaca_token_set_recall": 0.39477668375484204,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 9375
    },
    {
      "epoch": 1.8,
      "eval_wikibio_accuracy": 0.252375,
      "eval_wikibio_bleu_score": 3.754466439365244,
      "eval_wikibio_bleu_score_sem": 0.14811967447223032,
      "eval_wikibio_emb_cos_sim": 0.5836482048034668,
      "eval_wikibio_emb_cos_sim_sem": 0.013556932901500902,
      "eval_wikibio_emb_top1_equal": 0.1328125,
      "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.745633602142334,
      "eval_wikibio_n_ngrams_match_1": 7.97,
      "eval_wikibio_n_ngrams_match_2": 2.228,
      "eval_wikibio_n_ngrams_match_3": 0.65,
      "eval_wikibio_num_pred_words": 36.424,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 115.0806977849757,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2555147154181383,
      "eval_wikibio_runtime": 10.875,
      "eval_wikibio_samples_per_second": 45.977,
      "eval_wikibio_steps_per_second": 0.092,
      "eval_wikibio_token_set_f1": 0.2584773880953907,
      "eval_wikibio_token_set_f1_sem": 0.006084545337543673,
      "eval_wikibio_token_set_precision": 0.2550555861799195,
      "eval_wikibio_token_set_recall": 0.28019213217280126,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 9375
    },
    {
      "epoch": 1.8,
      "eval_nq_accuracy": 0.417125,
      "eval_nq_bleu_score": 6.20073225862314,
      "eval_nq_bleu_score_sem": 0.266347373932815,
      "eval_nq_emb_cos_sim": 0.634925127029419,
      "eval_nq_emb_cos_sim_sem": 0.01126875942242021,
      "eval_nq_emb_top1_equal": 0.109375,
      "eval_nq_emb_top1_equal_sem": 0.027695207821224692,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.1027681827545166,
      "eval_nq_n_ngrams_match_1": 16.838,
      "eval_nq_n_ngrams_match_2": 4.704,
      "eval_nq_n_ngrams_match_3": 1.672,
      "eval_nq_num_pred_words": 47.424,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 22.259484395523963,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3107978405072947,
      "eval_nq_runtime": 11.5849,
      "eval_nq_samples_per_second": 43.16,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.3496079670088032,
      "eval_nq_token_set_f1_sem": 0.00473141815781879,
      "eval_nq_token_set_precision": 0.2838536832792571,
      "eval_nq_token_set_recall": 0.48512943651616086,
      "eval_nq_true_num_tokens": 64.0,
      "step": 9375
    },
    {
      "epoch": 1.8,
      "learning_rate": 0.001,
      "loss": 3.4419,
      "step": 9384
    },
    {
      "epoch": 1.8,
      "learning_rate": 0.001,
      "loss": 3.4663,
      "step": 9396
    },
    {
      "epoch": 1.81,
      "learning_rate": 0.001,
      "loss": 3.4548,
      "step": 9408
    },
    {
      "epoch": 1.81,
      "learning_rate": 0.001,
      "loss": 3.4706,
      "step": 9420
    },
    {
      "epoch": 1.81,
      "learning_rate": 0.001,
      "loss": 3.4466,
      "step": 9432
    },
    {
      "epoch": 1.81,
      "learning_rate": 0.001,
      "loss": 3.4504,
      "step": 9444
    },
    {
      "epoch": 1.82,
      "learning_rate": 0.001,
      "loss": 3.4421,
      "step": 9456
    },
    {
      "epoch": 1.82,
      "learning_rate": 0.001,
      "loss": 3.4526,
      "step": 9468
    },
    {
      "epoch": 1.82,
      "learning_rate": 0.001,
      "loss": 3.4514,
      "step": 9480
    },
    {
      "epoch": 1.82,
      "learning_rate": 0.001,
      "loss": 3.4362,
      "step": 9492
    },
    {
      "epoch": 1.82,
      "learning_rate": 0.001,
      "loss": 3.4481,
      "step": 9504
    },
    {
      "epoch": 1.83,
      "learning_rate": 0.001,
      "loss": 3.4493,
      "step": 9516
    },
    {
      "epoch": 1.83,
      "learning_rate": 0.001,
      "loss": 3.4461,
      "step": 9528
    },
    {
      "epoch": 1.83,
      "learning_rate": 0.001,
      "loss": 3.4476,
      "step": 9540
    },
    {
      "epoch": 1.83,
      "learning_rate": 0.001,
      "loss": 3.4497,
      "step": 9552
    },
    {
      "epoch": 1.84,
      "learning_rate": 0.001,
      "loss": 3.4436,
      "step": 9564
    },
    {
      "epoch": 1.84,
      "learning_rate": 0.001,
      "loss": 3.4514,
      "step": 9576
    },
    {
      "epoch": 1.84,
      "learning_rate": 0.001,
      "loss": 3.452,
      "step": 9588
    },
    {
      "epoch": 1.84,
      "learning_rate": 0.001,
      "loss": 3.4462,
      "step": 9600
    },
    {
      "epoch": 1.85,
      "learning_rate": 0.001,
      "loss": 3.4447,
      "step": 9612
    },
    {
      "epoch": 1.85,
      "learning_rate": 0.001,
      "loss": 3.4387,
      "step": 9624
    },
    {
      "epoch": 1.85,
      "learning_rate": 0.001,
      "loss": 3.445,
      "step": 9636
    },
    {
      "epoch": 1.85,
      "learning_rate": 0.001,
      "loss": 3.4333,
      "step": 9648
    },
    {
      "epoch": 1.85,
      "learning_rate": 0.001,
      "loss": 3.4435,
      "step": 9660
    },
    {
      "epoch": 1.86,
      "learning_rate": 0.001,
      "loss": 3.4408,
      "step": 9672
    },
    {
      "epoch": 1.86,
      "learning_rate": 0.001,
      "loss": 3.4445,
      "step": 9684
    },
    {
      "epoch": 1.86,
      "learning_rate": 0.001,
      "loss": 3.4346,
      "step": 9696
    },
    {
      "epoch": 1.86,
      "learning_rate": 0.001,
      "loss": 3.4328,
      "step": 9708
    },
    {
      "epoch": 1.87,
      "learning_rate": 0.001,
      "loss": 3.4498,
      "step": 9720
    },
    {
      "epoch": 1.87,
      "learning_rate": 0.001,
      "loss": 3.4271,
      "step": 9732
    },
    {
      "epoch": 1.87,
      "learning_rate": 0.001,
      "loss": 3.4355,
      "step": 9744
    },
    {
      "epoch": 1.87,
      "learning_rate": 0.001,
      "loss": 3.4384,
      "step": 9756
    },
    {
      "epoch": 1.88,
      "learning_rate": 0.001,
      "loss": 3.4385,
      "step": 9768
    },
    {
      "epoch": 1.88,
      "learning_rate": 0.001,
      "loss": 3.4475,
      "step": 9780
    },
    {
      "epoch": 1.88,
      "learning_rate": 0.001,
      "loss": 3.4299,
      "step": 9792
    },
    {
      "epoch": 1.88,
      "learning_rate": 0.001,
      "loss": 3.4292,
      "step": 9804
    },
    {
      "epoch": 1.88,
      "learning_rate": 0.001,
      "loss": 3.4305,
      "step": 9816
    },
    {
      "epoch": 1.89,
      "learning_rate": 0.001,
      "loss": 3.4308,
      "step": 9828
    },
    {
      "epoch": 1.89,
      "learning_rate": 0.001,
      "loss": 3.4247,
      "step": 9840
    },
    {
      "epoch": 1.89,
      "learning_rate": 0.001,
      "loss": 3.4309,
      "step": 9852
    },
    {
      "epoch": 1.89,
      "learning_rate": 0.001,
      "loss": 3.4209,
      "step": 9864
    },
    {
      "epoch": 1.9,
      "learning_rate": 0.001,
      "loss": 3.4368,
      "step": 9876
    },
    {
      "epoch": 1.9,
      "learning_rate": 0.001,
      "loss": 3.4361,
      "step": 9888
    },
    {
      "epoch": 1.9,
      "learning_rate": 0.001,
      "loss": 3.4188,
      "step": 9900
    },
    {
      "epoch": 1.9,
      "learning_rate": 0.001,
      "loss": 3.424,
      "step": 9912
    },
    {
      "epoch": 1.91,
      "learning_rate": 0.001,
      "loss": 3.4236,
      "step": 9924
    },
    {
      "epoch": 1.91,
      "learning_rate": 0.001,
      "loss": 3.4265,
      "step": 9936
    },
    {
      "epoch": 1.91,
      "learning_rate": 0.001,
      "loss": 3.4117,
      "step": 9948
    },
    {
      "epoch": 1.91,
      "learning_rate": 0.001,
      "loss": 3.4115,
      "step": 9960
    },
    {
      "epoch": 1.91,
      "learning_rate": 0.001,
      "loss": 3.4166,
      "step": 9972
    },
    {
      "epoch": 1.92,
      "learning_rate": 0.001,
      "loss": 3.4291,
      "step": 9984
    },
    {
      "epoch": 1.92,
      "learning_rate": 0.001,
      "loss": 3.4302,
      "step": 9996
    },
    {
      "epoch": 1.92,
      "eval_ag_news_accuracy": 0.2426875,
      "eval_ag_news_bleu_score": 2.562018860710131,
      "eval_ag_news_bleu_score_sem": 0.08929048573768676,
      "eval_ag_news_emb_cos_sim": 0.557389497756958,
      "eval_ag_news_emb_cos_sim_sem": 0.01211784983252068,
      "eval_ag_news_emb_top1_equal": 0.078125,
      "eval_ag_news_emb_top1_equal_sem": 0.023813825516515504,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.576404094696045,
      "eval_ag_news_n_ngrams_match_1": 8.74,
      "eval_ag_news_n_ngrams_match_2": 1.382,
      "eval_ag_news_n_ngrams_match_3": 0.298,
      "eval_ag_news_num_pred_words": 44.534,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 97.16437138134575,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2046196696514389,
      "eval_ag_news_runtime": 11.0855,
      "eval_ag_news_samples_per_second": 45.104,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.24116201266850124,
      "eval_ag_news_token_set_f1_sem": 0.0043896089504334036,
      "eval_ag_news_token_set_precision": 0.19924948641894807,
      "eval_ag_news_token_set_recall": 0.346416504838885,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 10000
    },
    {
      "epoch": 1.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.073375,
      "eval_anthropic_toxic_prompts_bleu_score": 1.5396786676637733,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05939510111961594,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4273219406604767,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01240072367465761,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.297293663024902,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.308,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.592,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.124,
      "eval_anthropic_toxic_prompts_num_pred_words": 44.402,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 73.50060687775462,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.11638285444199434,
      "eval_anthropic_toxic_prompts_runtime": 9.8467,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.778,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.21663816968739305,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005421078331209499,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.21744291581774905,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2613636287290501,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 10000
    },
    {
      "epoch": 1.92,
      "eval_arxiv_accuracy": 0.27715625,
      "eval_arxiv_bleu_score": 2.1180178192309875,
      "eval_arxiv_bleu_score_sem": 0.06668480554139618,
      "eval_arxiv_emb_cos_sim": 0.45609450340270996,
      "eval_arxiv_emb_cos_sim_sem": 0.010038108982939059,
      "eval_arxiv_emb_top1_equal": 0.140625,
      "eval_arxiv_emb_top1_equal_sem": 0.030847557647994725,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.365558624267578,
      "eval_arxiv_n_ngrams_match_1": 8.536,
      "eval_arxiv_n_ngrams_match_2": 1.09,
      "eval_arxiv_n_ngrams_match_3": 0.14,
      "eval_arxiv_num_pred_words": 36.128,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 78.6933476792083,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.20092655363184458,
      "eval_arxiv_runtime": 10.6599,
      "eval_arxiv_samples_per_second": 46.905,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.20857388365008178,
      "eval_arxiv_token_set_f1_sem": 0.004363607045164412,
      "eval_arxiv_token_set_precision": 0.14981453338106698,
      "eval_arxiv_token_set_recall": 0.3936188877012378,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 10000
    },
    {
      "epoch": 1.92,
      "eval_python_code_alpaca_accuracy": 0.106625,
      "eval_python_code_alpaca_bleu_score": 2.306136867673515,
      "eval_python_code_alpaca_bleu_score_sem": 0.06733216200187893,
      "eval_python_code_alpaca_emb_cos_sim": 0.36762118339538574,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009133783810776045,
      "eval_python_code_alpaca_emb_top1_equal": 0.0234375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.2383131980896,
      "eval_python_code_alpaca_n_ngrams_match_1": 4.888,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.696,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.156,
      "eval_python_code_alpaca_num_pred_words": 33.856,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 69.29087322928436,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.17943855514448526,
      "eval_python_code_alpaca_runtime": 10.1289,
      "eval_python_code_alpaca_samples_per_second": 49.364,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.27891009379117004,
      "eval_python_code_alpaca_token_set_f1_sem": 0.004791397788533149,
      "eval_python_code_alpaca_token_set_precision": 0.2380736863803701,
      "eval_python_code_alpaca_token_set_recall": 0.4021618677653544,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 10000
    },
    {
      "epoch": 1.92,
      "eval_wikibio_accuracy": 0.2575625,
      "eval_wikibio_bleu_score": 4.0528717657567555,
      "eval_wikibio_bleu_score_sem": 0.15489019946329716,
      "eval_wikibio_emb_cos_sim": 0.6026620864868164,
      "eval_wikibio_emb_cos_sim_sem": 0.011716515687198803,
      "eval_wikibio_emb_top1_equal": 0.046875,
      "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.687716960906982,
      "eval_wikibio_n_ngrams_match_1": 8.38,
      "eval_wikibio_n_ngrams_match_2": 2.432,
      "eval_wikibio_n_ngrams_match_3": 0.738,
      "eval_wikibio_num_pred_words": 37.304,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 108.60494721269654,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2670667623430598,
      "eval_wikibio_runtime": 10.2666,
      "eval_wikibio_samples_per_second": 48.702,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.27028002450450317,
      "eval_wikibio_token_set_f1_sem": 0.005603489513180497,
      "eval_wikibio_token_set_precision": 0.26877049911201645,
      "eval_wikibio_token_set_recall": 0.2900711922225668,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 10000
    },
    {
      "epoch": 1.92,
      "eval_nq_accuracy": 0.42325,
      "eval_nq_bleu_score": 6.175180435775883,
      "eval_nq_bleu_score_sem": 0.2765888910925053,
      "eval_nq_emb_cos_sim": 0.62808758020401,
      "eval_nq_emb_cos_sim_sem": 0.011437373158244104,
      "eval_nq_emb_top1_equal": 0.1171875,
      "eval_nq_emb_top1_equal_sem": 0.02854125312152025,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.0615806579589844,
      "eval_nq_n_ngrams_match_1": 16.964,
      "eval_nq_n_ngrams_match_2": 4.73,
      "eval_nq_n_ngrams_match_3": 1.672,
      "eval_nq_num_pred_words": 47.94,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 21.361295392277313,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3126946565468729,
      "eval_nq_runtime": 10.6225,
      "eval_nq_samples_per_second": 47.07,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.34982885493782756,
      "eval_nq_token_set_f1_sem": 0.0048272711436809015,
      "eval_nq_token_set_precision": 0.2884604033974762,
      "eval_nq_token_set_recall": 0.47479353318137724,
      "eval_nq_true_num_tokens": 64.0,
      "step": 10000
    },
    {
      "epoch": 1.92,
      "learning_rate": 0.001,
      "loss": 3.431,
      "step": 10008
    },
    {
      "epoch": 1.92,
      "learning_rate": 0.001,
      "loss": 3.4281,
      "step": 10020
    },
    {
      "epoch": 1.93,
      "learning_rate": 0.001,
      "loss": 3.4155,
      "step": 10032
    },
    {
      "epoch": 1.93,
      "learning_rate": 0.001,
      "loss": 3.4156,
      "step": 10044
    },
    {
      "epoch": 1.93,
      "learning_rate": 0.001,
      "loss": 3.4122,
      "step": 10056
    },
    {
      "epoch": 1.93,
      "learning_rate": 0.001,
      "loss": 3.4074,
      "step": 10068
    },
    {
      "epoch": 1.94,
      "learning_rate": 0.001,
      "loss": 3.4036,
      "step": 10080
    },
    {
      "epoch": 1.94,
      "learning_rate": 0.001,
      "loss": 3.4091,
      "step": 10092
    },
    {
      "epoch": 1.94,
      "learning_rate": 0.001,
      "loss": 3.4035,
      "step": 10104
    },
    {
      "epoch": 1.94,
      "learning_rate": 0.001,
      "loss": 3.4123,
      "step": 10116
    },
    {
      "epoch": 1.94,
      "learning_rate": 0.001,
      "loss": 3.4017,
      "step": 10128
    },
    {
      "epoch": 1.95,
      "learning_rate": 0.001,
      "loss": 3.4128,
      "step": 10140
    },
    {
      "epoch": 1.95,
      "learning_rate": 0.001,
      "loss": 3.3969,
      "step": 10152
    },
    {
      "epoch": 1.95,
      "learning_rate": 0.001,
      "loss": 3.4173,
      "step": 10164
    },
    {
      "epoch": 1.95,
      "learning_rate": 0.001,
      "loss": 3.4068,
      "step": 10176
    },
    {
      "epoch": 1.96,
      "learning_rate": 0.001,
      "loss": 3.4229,
      "step": 10188
    },
    {
      "epoch": 1.96,
      "learning_rate": 0.001,
      "loss": 3.4106,
      "step": 10200
    },
    {
      "epoch": 1.96,
      "learning_rate": 0.001,
      "loss": 3.4135,
      "step": 10212
    },
    {
      "epoch": 1.96,
      "learning_rate": 0.001,
      "loss": 3.4101,
      "step": 10224
    },
    {
      "epoch": 1.97,
      "learning_rate": 0.001,
      "loss": 3.4037,
      "step": 10236
    },
    {
      "epoch": 1.97,
      "learning_rate": 0.001,
      "loss": 3.3929,
      "step": 10248
    },
    {
      "epoch": 1.97,
      "learning_rate": 0.001,
      "loss": 3.4174,
      "step": 10260
    },
    {
      "epoch": 1.97,
      "learning_rate": 0.001,
      "loss": 3.4023,
      "step": 10272
    },
    {
      "epoch": 1.97,
      "learning_rate": 0.001,
      "loss": 3.4014,
      "step": 10284
    },
    {
      "epoch": 1.98,
      "learning_rate": 0.001,
      "loss": 3.4067,
      "step": 10296
    },
    {
      "epoch": 1.98,
      "learning_rate": 0.001,
      "loss": 3.4152,
      "step": 10308
    },
    {
      "epoch": 1.98,
      "learning_rate": 0.001,
      "loss": 3.404,
      "step": 10320
    },
    {
      "epoch": 1.98,
      "learning_rate": 0.001,
      "loss": 3.3939,
      "step": 10332
    },
    {
      "epoch": 1.99,
      "learning_rate": 0.001,
      "loss": 3.3946,
      "step": 10344
    },
    {
      "epoch": 1.99,
      "learning_rate": 0.001,
      "loss": 3.3971,
      "step": 10356
    },
    {
      "epoch": 1.99,
      "learning_rate": 0.001,
      "loss": 3.4069,
      "step": 10368
    },
    {
      "epoch": 1.99,
      "learning_rate": 0.001,
      "loss": 3.4033,
      "step": 10380
    },
    {
      "epoch": 2.0,
      "learning_rate": 0.001,
      "loss": 3.4036,
      "step": 10392
    },
    {
      "epoch": 2.0,
      "learning_rate": 0.001,
      "loss": 3.3982,
      "step": 10404
    },
    {
      "epoch": 2.0,
      "learning_rate": 0.001,
      "loss": 3.3945,
      "step": 10416
    },
    {
      "epoch": 2.0,
      "learning_rate": 0.001,
      "loss": 3.3884,
      "step": 10428
    },
    {
      "epoch": 2.0,
      "learning_rate": 0.001,
      "loss": 3.379,
      "step": 10440
    },
    {
      "epoch": 2.01,
      "learning_rate": 0.001,
      "loss": 3.3761,
      "step": 10452
    },
    {
      "epoch": 2.01,
      "learning_rate": 0.001,
      "loss": 3.3945,
      "step": 10464
    },
    {
      "epoch": 2.01,
      "learning_rate": 0.001,
      "loss": 3.3764,
      "step": 10476
    },
    {
      "epoch": 2.01,
      "learning_rate": 0.001,
      "loss": 3.3722,
      "step": 10488
    },
    {
      "epoch": 2.02,
      "learning_rate": 0.001,
      "loss": 3.3789,
      "step": 10500
    },
    {
      "epoch": 2.02,
      "learning_rate": 0.001,
      "loss": 3.3745,
      "step": 10512
    },
    {
      "epoch": 2.02,
      "learning_rate": 0.001,
      "loss": 3.3741,
      "step": 10524
    },
    {
      "epoch": 2.02,
      "learning_rate": 0.001,
      "loss": 3.3759,
      "step": 10536
    },
    {
      "epoch": 2.03,
      "learning_rate": 0.001,
      "loss": 3.3591,
      "step": 10548
    },
    {
      "epoch": 2.03,
      "learning_rate": 0.001,
      "loss": 3.3719,
      "step": 10560
    },
    {
      "epoch": 2.03,
      "learning_rate": 0.001,
      "loss": 3.3824,
      "step": 10572
    },
    {
      "epoch": 2.03,
      "learning_rate": 0.001,
      "loss": 3.3717,
      "step": 10584
    },
    {
      "epoch": 2.03,
      "learning_rate": 0.001,
      "loss": 3.3741,
      "step": 10596
    },
    {
      "epoch": 2.04,
      "learning_rate": 0.001,
      "loss": 3.3633,
      "step": 10608
    },
    {
      "epoch": 2.04,
      "learning_rate": 0.001,
      "loss": 3.378,
      "step": 10620
    },
    {
      "epoch": 2.04,
      "eval_ag_news_accuracy": 0.2460625,
      "eval_ag_news_bleu_score": 2.5823070923245934,
      "eval_ag_news_bleu_score_sem": 0.09557406089950654,
      "eval_ag_news_emb_cos_sim": 0.5710728168487549,
      "eval_ag_news_emb_cos_sim_sem": 0.012378251501867207,
      "eval_ag_news_emb_top1_equal": 0.109375,
      "eval_ag_news_emb_top1_equal_sem": 0.027695207821224692,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.5237579345703125,
      "eval_ag_news_n_ngrams_match_1": 8.692,
      "eval_ag_news_n_ngrams_match_2": 1.34,
      "eval_ag_news_n_ngrams_match_3": 0.308,
      "eval_ag_news_num_pred_words": 44.374,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 92.18135941410526,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2078462630268746,
      "eval_ag_news_runtime": 10.746,
      "eval_ag_news_samples_per_second": 46.529,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.23999342859317674,
      "eval_ag_news_token_set_f1_sem": 0.0045327772425795745,
      "eval_ag_news_token_set_precision": 0.19763210209260546,
      "eval_ag_news_token_set_recall": 0.34862594959553017,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 10625
    },
    {
      "epoch": 2.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.07575,
      "eval_anthropic_toxic_prompts_bleu_score": 1.5667824704383029,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0684543929000709,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.42869311571121216,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012086998339410706,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.234001636505127,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.188,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.6,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.164,
      "eval_anthropic_toxic_prompts_num_pred_words": 44.038,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 68.99276448103703,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.11331182812465268,
      "eval_anthropic_toxic_prompts_runtime": 9.8803,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.606,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2063507067504228,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005421787028026613,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.2067507138332873,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.25887168962558404,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 10625
    },
    {
      "epoch": 2.04,
      "eval_arxiv_accuracy": 0.27859375,
      "eval_arxiv_bleu_score": 2.2033503045504976,
      "eval_arxiv_bleu_score_sem": 0.06797319733926978,
      "eval_arxiv_emb_cos_sim": 0.4704684019088745,
      "eval_arxiv_emb_cos_sim_sem": 0.010251886359664918,
      "eval_arxiv_emb_top1_equal": 0.171875,
      "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.335144519805908,
      "eval_arxiv_n_ngrams_match_1": 8.744,
      "eval_arxiv_n_ngrams_match_2": 1.182,
      "eval_arxiv_n_ngrams_match_3": 0.15,
      "eval_arxiv_num_pred_words": 35.222,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 76.33599015511138,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.20541628902508738,
      "eval_arxiv_runtime": 10.3443,
      "eval_arxiv_samples_per_second": 48.336,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.2136171243119018,
      "eval_arxiv_token_set_f1_sem": 0.004410207072698844,
      "eval_arxiv_token_set_precision": 0.15367142683030394,
      "eval_arxiv_token_set_recall": 0.40260993530323613,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 10625
    },
    {
      "epoch": 2.04,
      "eval_python_code_alpaca_accuracy": 0.10709375,
      "eval_python_code_alpaca_bleu_score": 2.4331331266225,
      "eval_python_code_alpaca_bleu_score_sem": 0.07723671548849642,
      "eval_python_code_alpaca_emb_cos_sim": 0.3782823979854584,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010019162657444396,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.180656909942627,
      "eval_python_code_alpaca_n_ngrams_match_1": 5.046,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.798,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.206,
      "eval_python_code_alpaca_num_pred_words": 35.73,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 65.4088067996686,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.17535980862514963,
      "eval_python_code_alpaca_runtime": 10.6607,
      "eval_python_code_alpaca_samples_per_second": 46.901,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.2851954812355219,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005362792811425221,
      "eval_python_code_alpaca_token_set_precision": 0.2508346643902476,
      "eval_python_code_alpaca_token_set_recall": 0.3983363955063751,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 10625
    },
    {
      "epoch": 2.04,
      "eval_wikibio_accuracy": 0.26234375,
      "eval_wikibio_bleu_score": 4.121821244516629,
      "eval_wikibio_bleu_score_sem": 0.17431240658392042,
      "eval_wikibio_emb_cos_sim": 0.5726078748703003,
      "eval_wikibio_emb_cos_sim_sem": 0.013797334574139994,
      "eval_wikibio_emb_top1_equal": 0.046875,
      "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.681370258331299,
      "eval_wikibio_n_ngrams_match_1": 7.89,
      "eval_wikibio_n_ngrams_match_2": 2.302,
      "eval_wikibio_n_ngrams_match_3": 0.71,
      "eval_wikibio_num_pred_words": 34.322,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 107.91784663240344,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.25907671373595287,
      "eval_wikibio_runtime": 10.0235,
      "eval_wikibio_samples_per_second": 49.883,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.2583744902020269,
      "eval_wikibio_token_set_f1_sem": 0.0062210127601358195,
      "eval_wikibio_token_set_precision": 0.25181756012882145,
      "eval_wikibio_token_set_recall": 0.2830897168428563,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 10625
    },
    {
      "epoch": 2.04,
      "eval_nq_accuracy": 0.42671875,
      "eval_nq_bleu_score": 6.514900534831086,
      "eval_nq_bleu_score_sem": 0.287436139214196,
      "eval_nq_emb_cos_sim": 0.6404000520706177,
      "eval_nq_emb_cos_sim_sem": 0.012100701333532811,
      "eval_nq_emb_top1_equal": 0.15625,
      "eval_nq_emb_top1_equal_sem": 0.03221922156442571,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 3.0305635929107666,
      "eval_nq_n_ngrams_match_1": 17.164,
      "eval_nq_n_ngrams_match_2": 4.976,
      "eval_nq_n_ngrams_match_3": 1.864,
      "eval_nq_num_pred_words": 47.74,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 20.708900690670102,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.31591395402792466,
      "eval_nq_runtime": 10.8708,
      "eval_nq_samples_per_second": 45.995,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.3545936741460492,
      "eval_nq_token_set_f1_sem": 0.004861322315256306,
      "eval_nq_token_set_precision": 0.29108134708930233,
      "eval_nq_token_set_recall": 0.48476600959067834,
      "eval_nq_true_num_tokens": 64.0,
      "step": 10625
    },
    {
      "epoch": 2.04,
      "learning_rate": 0.001,
      "loss": 3.3669,
      "step": 10632
    },
    {
      "epoch": 2.04,
      "learning_rate": 0.001,
      "loss": 3.3898,
      "step": 10644
    },
    {
      "epoch": 2.05,
      "learning_rate": 0.001,
      "loss": 3.366,
      "step": 10656
    },
    {
      "epoch": 2.05,
      "learning_rate": 0.001,
      "loss": 3.3637,
      "step": 10668
    },
    {
      "epoch": 2.05,
      "learning_rate": 0.001,
      "loss": 3.3699,
      "step": 10680
    },
    {
      "epoch": 2.05,
      "learning_rate": 0.001,
      "loss": 3.3736,
      "step": 10692
    },
    {
      "epoch": 2.06,
      "learning_rate": 0.001,
      "loss": 3.3761,
      "step": 10704
    },
    {
      "epoch": 2.06,
      "learning_rate": 0.001,
      "loss": 3.3563,
      "step": 10716
    },
    {
      "epoch": 2.06,
      "learning_rate": 0.001,
      "loss": 3.3725,
      "step": 10728
    },
    {
      "epoch": 2.06,
      "learning_rate": 0.001,
      "loss": 3.372,
      "step": 10740
    },
    {
      "epoch": 2.06,
      "learning_rate": 0.001,
      "loss": 3.3638,
      "step": 10752
    },
    {
      "epoch": 2.07,
      "learning_rate": 0.001,
      "loss": 3.3702,
      "step": 10764
    },
    {
      "epoch": 2.07,
      "learning_rate": 0.001,
      "loss": 3.3725,
      "step": 10776
    },
    {
      "epoch": 2.07,
      "learning_rate": 0.001,
      "loss": 3.3669,
      "step": 10788
    },
    {
      "epoch": 2.07,
      "learning_rate": 0.001,
      "loss": 3.3658,
      "step": 10800
    },
    {
      "epoch": 2.08,
      "learning_rate": 0.001,
      "loss": 3.3695,
      "step": 10812
    },
    {
      "epoch": 2.08,
      "learning_rate": 0.001,
      "loss": 3.3622,
      "step": 10824
    },
    {
      "epoch": 2.08,
      "learning_rate": 0.001,
      "loss": 3.3703,
      "step": 10836
    },
    {
      "epoch": 2.08,
      "learning_rate": 0.001,
      "loss": 3.3648,
      "step": 10848
    },
    {
      "epoch": 2.09,
      "learning_rate": 0.001,
      "loss": 3.3652,
      "step": 10860
    },
    {
      "epoch": 2.09,
      "learning_rate": 0.001,
      "loss": 3.37,
      "step": 10872
    },
    {
      "epoch": 2.09,
      "learning_rate": 0.001,
      "loss": 3.3578,
      "step": 10884
    },
    {
      "epoch": 2.09,
      "learning_rate": 0.001,
      "loss": 3.3616,
      "step": 10896
    },
    {
      "epoch": 2.09,
      "learning_rate": 0.001,
      "loss": 3.3533,
      "step": 10908
    },
    {
      "epoch": 2.1,
      "learning_rate": 0.001,
      "loss": 3.3664,
      "step": 10920
    },
    {
      "epoch": 2.1,
      "learning_rate": 0.001,
      "loss": 3.3681,
      "step": 10932
    },
    {
      "epoch": 2.1,
      "learning_rate": 0.001,
      "loss": 3.3596,
      "step": 10944
    },
    {
      "epoch": 2.1,
      "learning_rate": 0.001,
      "loss": 3.3547,
      "step": 10956
    },
    {
      "epoch": 2.11,
      "learning_rate": 0.001,
      "loss": 3.3687,
      "step": 10968
    },
    {
      "epoch": 2.11,
      "learning_rate": 0.001,
      "loss": 3.365,
      "step": 10980
    },
    {
      "epoch": 2.11,
      "learning_rate": 0.001,
      "loss": 3.3633,
      "step": 10992
    },
    {
      "epoch": 2.11,
      "learning_rate": 0.001,
      "loss": 3.351,
      "step": 11004
    },
    {
      "epoch": 2.12,
      "learning_rate": 0.001,
      "loss": 3.3533,
      "step": 11016
    },
    {
      "epoch": 2.12,
      "learning_rate": 0.001,
      "loss": 3.3579,
      "step": 11028
    },
    {
      "epoch": 2.12,
      "learning_rate": 0.001,
      "loss": 3.3604,
      "step": 11040
    },
    {
      "epoch": 2.12,
      "learning_rate": 0.001,
      "loss": 3.3567,
      "step": 11052
    },
    {
      "epoch": 2.12,
      "learning_rate": 0.001,
      "loss": 3.3409,
      "step": 11064
    },
    {
      "epoch": 2.13,
      "learning_rate": 0.001,
      "loss": 3.3481,
      "step": 11076
    },
    {
      "epoch": 2.13,
      "learning_rate": 0.001,
      "loss": 3.3667,
      "step": 11088
    },
    {
      "epoch": 2.13,
      "learning_rate": 0.001,
      "loss": 3.3462,
      "step": 11100
    },
    {
      "epoch": 2.13,
      "learning_rate": 0.001,
      "loss": 3.3446,
      "step": 11112
    },
    {
      "epoch": 2.14,
      "learning_rate": 0.001,
      "loss": 3.3504,
      "step": 11124
    },
    {
      "epoch": 2.14,
      "learning_rate": 0.001,
      "loss": 3.3443,
      "step": 11136
    },
    {
      "epoch": 2.14,
      "learning_rate": 0.001,
      "loss": 3.3672,
      "step": 11148
    },
    {
      "epoch": 2.14,
      "learning_rate": 0.001,
      "loss": 3.3504,
      "step": 11160
    },
    {
      "epoch": 2.15,
      "learning_rate": 0.001,
      "loss": 3.3632,
      "step": 11172
    },
    {
      "epoch": 2.15,
      "learning_rate": 0.001,
      "loss": 3.348,
      "step": 11184
    },
    {
      "epoch": 2.15,
      "learning_rate": 0.001,
      "loss": 3.3525,
      "step": 11196
    },
    {
      "epoch": 2.15,
      "learning_rate": 0.001,
      "loss": 3.3268,
      "step": 11208
    },
    {
      "epoch": 2.15,
      "learning_rate": 0.001,
      "loss": 3.3477,
      "step": 11220
    },
    {
      "epoch": 2.16,
      "learning_rate": 0.001,
      "loss": 3.347,
      "step": 11232
    },
    {
      "epoch": 2.16,
      "learning_rate": 0.001,
      "loss": 3.3552,
      "step": 11244
    },
    {
      "epoch": 2.16,
      "eval_ag_news_accuracy": 0.24784375,
      "eval_ag_news_bleu_score": 2.7445599258443236,
      "eval_ag_news_bleu_score_sem": 0.09877952293750877,
      "eval_ag_news_emb_cos_sim": 0.5810866355895996,
      "eval_ag_news_emb_cos_sim_sem": 0.01221696078097208,
      "eval_ag_news_emb_top1_equal": 0.1171875,
      "eval_ag_news_emb_top1_equal_sem": 0.02854125312152025,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.501502990722656,
      "eval_ag_news_n_ngrams_match_1": 8.896,
      "eval_ag_news_n_ngrams_match_2": 1.484,
      "eval_ag_news_n_ngrams_match_3": 0.336,
      "eval_ag_news_num_pred_words": 43.876,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 90.15252793820358,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2109255283508678,
      "eval_ag_news_runtime": 10.3507,
      "eval_ag_news_samples_per_second": 48.306,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.24467123734232962,
      "eval_ag_news_token_set_f1_sem": 0.00464406244716166,
      "eval_ag_news_token_set_precision": 0.20432879044949856,
      "eval_ag_news_token_set_recall": 0.3474431784687123,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 11250
    },
    {
      "epoch": 2.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.076,
      "eval_anthropic_toxic_prompts_bleu_score": 1.6010230290027194,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06516102392011566,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4394418001174927,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011667782076519067,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.201409339904785,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.322,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.664,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.158,
      "eval_anthropic_toxic_prompts_num_pred_words": 44.876,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 66.7803810069828,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.11635369473081233,
      "eval_anthropic_toxic_prompts_runtime": 9.951,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.246,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.21500305601408232,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0055104148309599145,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.22044425128328066,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2552014335409628,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 11250
    },
    {
      "epoch": 2.16,
      "eval_arxiv_accuracy": 0.2788125,
      "eval_arxiv_bleu_score": 2.331719697644204,
      "eval_arxiv_bleu_score_sem": 0.0718559434105871,
      "eval_arxiv_emb_cos_sim": 0.47331756353378296,
      "eval_arxiv_emb_cos_sim_sem": 0.010864395972361612,
      "eval_arxiv_emb_top1_equal": 0.1328125,
      "eval_arxiv_emb_top1_equal_sem": 0.030114394778901498,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.310990810394287,
      "eval_arxiv_n_ngrams_match_1": 8.764,
      "eval_arxiv_n_ngrams_match_2": 1.314,
      "eval_arxiv_n_ngrams_match_3": 0.182,
      "eval_arxiv_num_pred_words": 36.87,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 74.51428190192313,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.20464207246615473,
      "eval_arxiv_runtime": 10.7068,
      "eval_arxiv_samples_per_second": 46.699,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.2151186928719234,
      "eval_arxiv_token_set_f1_sem": 0.004250104115131363,
      "eval_arxiv_token_set_precision": 0.15530441116875587,
      "eval_arxiv_token_set_recall": 0.40453428097621075,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 11250
    },
    {
      "epoch": 2.16,
      "eval_python_code_alpaca_accuracy": 0.10753125,
      "eval_python_code_alpaca_bleu_score": 2.33818332320569,
      "eval_python_code_alpaca_bleu_score_sem": 0.07790112017255417,
      "eval_python_code_alpaca_emb_cos_sim": 0.38225212693214417,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009633005987397997,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.1621551513671875,
      "eval_python_code_alpaca_n_ngrams_match_1": 4.762,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.676,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.144,
      "eval_python_code_alpaca_num_pred_words": 33.514,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 64.20975533219276,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.17286769300553573,
      "eval_python_code_alpaca_runtime": 10.8297,
      "eval_python_code_alpaca_samples_per_second": 46.169,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.27530757024370717,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005168029282860783,
      "eval_python_code_alpaca_token_set_precision": 0.2346115716046044,
      "eval_python_code_alpaca_token_set_recall": 0.3960837590032956,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 11250
    },
    {
      "epoch": 2.16,
      "eval_wikibio_accuracy": 0.2603125,
      "eval_wikibio_bleu_score": 4.025779789448843,
      "eval_wikibio_bleu_score_sem": 0.17339246261718744,
      "eval_wikibio_emb_cos_sim": 0.6065972447395325,
      "eval_wikibio_emb_cos_sim_sem": 0.012248833018069427,
      "eval_wikibio_emb_top1_equal": 0.0703125,
      "eval_wikibio_emb_top1_equal_sem": 0.022687306110270106,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.674853324890137,
      "eval_wikibio_n_ngrams_match_1": 8.23,
      "eval_wikibio_n_ngrams_match_2": 2.33,
      "eval_wikibio_n_ngrams_match_3": 0.736,
      "eval_wikibio_num_pred_words": 36.226,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 107.21683989690757,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2648880309162607,
      "eval_wikibio_runtime": 10.1697,
      "eval_wikibio_samples_per_second": 49.166,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.264549726186732,
      "eval_wikibio_token_set_f1_sem": 0.005905943122690257,
      "eval_wikibio_token_set_precision": 0.26461674832557164,
      "eval_wikibio_token_set_recall": 0.28152007760025227,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 11250
    },
    {
      "epoch": 2.16,
      "eval_nq_accuracy": 0.4299375,
      "eval_nq_bleu_score": 6.468436321292758,
      "eval_nq_bleu_score_sem": 0.2825036321460263,
      "eval_nq_emb_cos_sim": 0.647667407989502,
      "eval_nq_emb_cos_sim_sem": 0.011768005965188956,
      "eval_nq_emb_top1_equal": 0.109375,
      "eval_nq_emb_top1_equal_sem": 0.027695207821224692,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.998945474624634,
      "eval_nq_n_ngrams_match_1": 17.144,
      "eval_nq_n_ngrams_match_2": 4.894,
      "eval_nq_n_ngrams_match_3": 1.824,
      "eval_nq_num_pred_words": 47.63,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 20.06436737869699,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.318101444874767,
      "eval_nq_runtime": 10.4086,
      "eval_nq_samples_per_second": 48.037,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.35412734518069694,
      "eval_nq_token_set_f1_sem": 0.0049829476778019055,
      "eval_nq_token_set_precision": 0.29336270290241306,
      "eval_nq_token_set_recall": 0.4751326232364711,
      "eval_nq_true_num_tokens": 64.0,
      "step": 11250
    },
    {
      "epoch": 2.16,
      "learning_rate": 0.001,
      "loss": 3.3426,
      "step": 11256
    },
    {
      "epoch": 2.16,
      "learning_rate": 0.001,
      "loss": 3.3484,
      "step": 11268
    },
    {
      "epoch": 2.17,
      "learning_rate": 0.001,
      "loss": 3.3409,
      "step": 11280
    },
    {
      "epoch": 2.17,
      "learning_rate": 0.001,
      "loss": 3.347,
      "step": 11292
    },
    {
      "epoch": 2.17,
      "learning_rate": 0.001,
      "loss": 3.3473,
      "step": 11304
    },
    {
      "epoch": 2.17,
      "learning_rate": 0.001,
      "loss": 3.3389,
      "step": 11316
    },
    {
      "epoch": 2.18,
      "learning_rate": 0.001,
      "loss": 3.3433,
      "step": 11328
    },
    {
      "epoch": 2.18,
      "learning_rate": 0.001,
      "loss": 3.3393,
      "step": 11340
    },
    {
      "epoch": 2.18,
      "learning_rate": 0.001,
      "loss": 3.3541,
      "step": 11352
    },
    {
      "epoch": 2.18,
      "learning_rate": 0.001,
      "loss": 3.3482,
      "step": 11364
    },
    {
      "epoch": 2.18,
      "learning_rate": 0.001,
      "loss": 3.3396,
      "step": 11376
    },
    {
      "epoch": 2.19,
      "learning_rate": 0.001,
      "loss": 3.3381,
      "step": 11388
    },
    {
      "epoch": 2.19,
      "learning_rate": 0.001,
      "loss": 3.3423,
      "step": 11400
    },
    {
      "epoch": 2.19,
      "learning_rate": 0.001,
      "loss": 3.3383,
      "step": 11412
    },
    {
      "epoch": 2.19,
      "learning_rate": 0.001,
      "loss": 3.3368,
      "step": 11424
    },
    {
      "epoch": 2.2,
      "learning_rate": 0.001,
      "loss": 3.3477,
      "step": 11436
    },
    {
      "epoch": 2.2,
      "learning_rate": 0.001,
      "loss": 3.3431,
      "step": 11448
    },
    {
      "epoch": 2.2,
      "learning_rate": 0.001,
      "loss": 3.3404,
      "step": 11460
    },
    {
      "epoch": 2.2,
      "learning_rate": 0.001,
      "loss": 3.3363,
      "step": 11472
    },
    {
      "epoch": 2.21,
      "learning_rate": 0.001,
      "loss": 3.3391,
      "step": 11484
    },
    {
      "epoch": 2.21,
      "learning_rate": 0.001,
      "loss": 3.3476,
      "step": 11496
    },
    {
      "epoch": 2.21,
      "learning_rate": 0.001,
      "loss": 3.3461,
      "step": 11508
    },
    {
      "epoch": 2.21,
      "learning_rate": 0.001,
      "loss": 3.334,
      "step": 11520
    },
    {
      "epoch": 2.21,
      "learning_rate": 0.001,
      "loss": 3.349,
      "step": 11532
    },
    {
      "epoch": 2.22,
      "learning_rate": 0.001,
      "loss": 3.3412,
      "step": 11544
    },
    {
      "epoch": 2.22,
      "learning_rate": 0.001,
      "loss": 3.3314,
      "step": 11556
    },
    {
      "epoch": 2.22,
      "learning_rate": 0.001,
      "loss": 3.3283,
      "step": 11568
    },
    {
      "epoch": 2.22,
      "learning_rate": 0.001,
      "loss": 3.3298,
      "step": 11580
    },
    {
      "epoch": 2.23,
      "learning_rate": 0.001,
      "loss": 3.3416,
      "step": 11592
    },
    {
      "epoch": 2.23,
      "learning_rate": 0.001,
      "loss": 3.3344,
      "step": 11604
    },
    {
      "epoch": 2.23,
      "learning_rate": 0.001,
      "loss": 3.3361,
      "step": 11616
    },
    {
      "epoch": 2.23,
      "learning_rate": 0.001,
      "loss": 3.3334,
      "step": 11628
    },
    {
      "epoch": 2.24,
      "learning_rate": 0.001,
      "loss": 3.3384,
      "step": 11640
    },
    {
      "epoch": 2.24,
      "learning_rate": 0.001,
      "loss": 3.3416,
      "step": 11652
    },
    {
      "epoch": 2.24,
      "learning_rate": 0.001,
      "loss": 3.3276,
      "step": 11664
    },
    {
      "epoch": 2.24,
      "learning_rate": 0.001,
      "loss": 3.3344,
      "step": 11676
    },
    {
      "epoch": 2.24,
      "learning_rate": 0.001,
      "loss": 3.3316,
      "step": 11688
    },
    {
      "epoch": 2.25,
      "learning_rate": 0.001,
      "loss": 3.3375,
      "step": 11700
    },
    {
      "epoch": 2.25,
      "learning_rate": 0.001,
      "loss": 3.3329,
      "step": 11712
    },
    {
      "epoch": 2.25,
      "learning_rate": 0.001,
      "loss": 3.3339,
      "step": 11724
    },
    {
      "epoch": 2.25,
      "learning_rate": 0.001,
      "loss": 3.3366,
      "step": 11736
    },
    {
      "epoch": 2.26,
      "learning_rate": 0.001,
      "loss": 3.3447,
      "step": 11748
    },
    {
      "epoch": 2.26,
      "learning_rate": 0.001,
      "loss": 3.3198,
      "step": 11760
    },
    {
      "epoch": 2.26,
      "learning_rate": 0.001,
      "loss": 3.3338,
      "step": 11772
    },
    {
      "epoch": 2.26,
      "learning_rate": 0.001,
      "loss": 3.318,
      "step": 11784
    },
    {
      "epoch": 2.26,
      "learning_rate": 0.001,
      "loss": 3.3252,
      "step": 11796
    },
    {
      "epoch": 2.27,
      "learning_rate": 0.001,
      "loss": 3.3258,
      "step": 11808
    },
    {
      "epoch": 2.27,
      "learning_rate": 0.001,
      "loss": 3.3341,
      "step": 11820
    },
    {
      "epoch": 2.27,
      "learning_rate": 0.001,
      "loss": 3.32,
      "step": 11832
    },
    {
      "epoch": 2.27,
      "learning_rate": 0.001,
      "loss": 3.3266,
      "step": 11844
    },
    {
      "epoch": 2.28,
      "learning_rate": 0.001,
      "loss": 3.318,
      "step": 11856
    },
    {
      "epoch": 2.28,
      "learning_rate": 0.001,
      "loss": 3.3183,
      "step": 11868
    },
    {
      "epoch": 2.28,
      "eval_ag_news_accuracy": 0.24703125,
      "eval_ag_news_bleu_score": 2.7937015627662416,
      "eval_ag_news_bleu_score_sem": 0.09449300742565286,
      "eval_ag_news_emb_cos_sim": 0.5981181859970093,
      "eval_ag_news_emb_cos_sim_sem": 0.010756656773766624,
      "eval_ag_news_emb_top1_equal": 0.1328125,
      "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.4853644371032715,
      "eval_ag_news_n_ngrams_match_1": 9.444,
      "eval_ag_news_n_ngrams_match_2": 1.514,
      "eval_ag_news_n_ngrams_match_3": 0.324,
      "eval_ag_news_num_pred_words": 45.914,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 88.70927387354537,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2223248034476243,
      "eval_ag_news_runtime": 10.5258,
      "eval_ag_news_samples_per_second": 47.502,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.2553805768334155,
      "eval_ag_news_token_set_f1_sem": 0.004149271939107957,
      "eval_ag_news_token_set_precision": 0.21490000811978566,
      "eval_ag_news_token_set_recall": 0.3457388318396937,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 11875
    },
    {
      "epoch": 2.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.0768125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.6148785945359487,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06695166853089543,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4605611562728882,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012429816853931271,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0234375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.013424676090873717,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.194695949554443,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.338,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.618,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.152,
      "eval_anthropic_toxic_prompts_num_pred_words": 44.854,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 66.33355976242449,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.11675054713114878,
      "eval_anthropic_toxic_prompts_runtime": 10.0662,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.671,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.21671206336021467,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005479358237617751,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.22181571243632656,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.25917747894962717,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 11875
    },
    {
      "epoch": 2.28,
      "eval_arxiv_accuracy": 0.2815625,
      "eval_arxiv_bleu_score": 2.4608083050923413,
      "eval_arxiv_bleu_score_sem": 0.07339472201132706,
      "eval_arxiv_emb_cos_sim": 0.49826580286026,
      "eval_arxiv_emb_cos_sim_sem": 0.009700269723443353,
      "eval_arxiv_emb_top1_equal": 0.1796875,
      "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.294712543487549,
      "eval_arxiv_n_ngrams_match_1": 9.472,
      "eval_arxiv_n_ngrams_match_2": 1.352,
      "eval_arxiv_n_ngrams_match_3": 0.212,
      "eval_arxiv_num_pred_words": 37.294,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 73.31113765188522,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.21865798895946734,
      "eval_arxiv_runtime": 10.1772,
      "eval_arxiv_samples_per_second": 49.13,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.22903631514810155,
      "eval_arxiv_token_set_f1_sem": 0.004112179214568153,
      "eval_arxiv_token_set_precision": 0.168405257229587,
      "eval_arxiv_token_set_recall": 0.3998475453420297,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 11875
    },
    {
      "epoch": 2.28,
      "eval_python_code_alpaca_accuracy": 0.1076875,
      "eval_python_code_alpaca_bleu_score": 2.4303940005366877,
      "eval_python_code_alpaca_bleu_score_sem": 0.08203146448186399,
      "eval_python_code_alpaca_emb_cos_sim": 0.39726346731185913,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010924835870215251,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.135598659515381,
      "eval_python_code_alpaca_n_ngrams_match_1": 5.21,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.806,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.19,
      "eval_python_code_alpaca_num_pred_words": 33.626,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 62.527012258929766,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.18712196486011473,
      "eval_python_code_alpaca_runtime": 10.6872,
      "eval_python_code_alpaca_samples_per_second": 46.785,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.2889798734703638,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0053625081564402585,
      "eval_python_code_alpaca_token_set_precision": 0.25991972374284805,
      "eval_python_code_alpaca_token_set_recall": 0.3795541481451212,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 11875
    },
    {
      "epoch": 2.28,
      "eval_wikibio_accuracy": 0.2579375,
      "eval_wikibio_bleu_score": 4.146975743252501,
      "eval_wikibio_bleu_score_sem": 0.1704537263692191,
      "eval_wikibio_emb_cos_sim": 0.5991557836532593,
      "eval_wikibio_emb_cos_sim_sem": 0.013391245737470482,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.702815532684326,
      "eval_wikibio_n_ngrams_match_1": 8.508,
      "eval_wikibio_n_ngrams_match_2": 2.502,
      "eval_wikibio_n_ngrams_match_3": 0.788,
      "eval_wikibio_num_pred_words": 37.052,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 110.25716850693988,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.27106466548154684,
      "eval_wikibio_runtime": 10.0116,
      "eval_wikibio_samples_per_second": 49.942,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.26870454642046887,
      "eval_wikibio_token_set_f1_sem": 0.0059767885878211574,
      "eval_wikibio_token_set_precision": 0.2682234422428167,
      "eval_wikibio_token_set_recall": 0.2876715398715617,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 11875
    },
    {
      "epoch": 2.28,
      "eval_nq_accuracy": 0.432125,
      "eval_nq_bleu_score": 6.927951880533188,
      "eval_nq_bleu_score_sem": 0.32423319331243844,
      "eval_nq_emb_cos_sim": 0.6612377762794495,
      "eval_nq_emb_cos_sim_sem": 0.011943865007689678,
      "eval_nq_emb_top1_equal": 0.140625,
      "eval_nq_emb_top1_equal_sem": 0.030847557647994725,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.968759059906006,
      "eval_nq_n_ngrams_match_1": 17.66,
      "eval_nq_n_ngrams_match_2": 5.206,
      "eval_nq_n_ngrams_match_3": 2.034,
      "eval_nq_num_pred_words": 48.306,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 19.46774629340781,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.32456458019104223,
      "eval_nq_runtime": 10.4751,
      "eval_nq_samples_per_second": 47.732,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.3622490786429294,
      "eval_nq_token_set_f1_sem": 0.004978429005560884,
      "eval_nq_token_set_precision": 0.299877244152493,
      "eval_nq_token_set_recall": 0.48329076637595264,
      "eval_nq_true_num_tokens": 64.0,
      "step": 11875
    },
    {
      "epoch": 2.28,
      "learning_rate": 0.001,
      "loss": 3.3233,
      "step": 11880
    },
    {
      "epoch": 2.28,
      "learning_rate": 0.001,
      "loss": 3.3263,
      "step": 11892
    },
    {
      "epoch": 2.29,
      "learning_rate": 0.001,
      "loss": 3.3272,
      "step": 11904
    },
    {
      "epoch": 2.29,
      "learning_rate": 0.001,
      "loss": 3.3166,
      "step": 11916
    },
    {
      "epoch": 2.29,
      "learning_rate": 0.001,
      "loss": 3.3213,
      "step": 11928
    },
    {
      "epoch": 2.29,
      "learning_rate": 0.001,
      "loss": 3.3213,
      "step": 11940
    },
    {
      "epoch": 2.29,
      "learning_rate": 0.001,
      "loss": 3.3227,
      "step": 11952
    },
    {
      "epoch": 2.3,
      "learning_rate": 0.001,
      "loss": 3.3134,
      "step": 11964
    },
    {
      "epoch": 2.3,
      "learning_rate": 0.001,
      "loss": 3.319,
      "step": 11976
    },
    {
      "epoch": 2.3,
      "learning_rate": 0.001,
      "loss": 3.3222,
      "step": 11988
    },
    {
      "epoch": 2.3,
      "learning_rate": 0.001,
      "loss": 3.3201,
      "step": 12000
    },
    {
      "epoch": 2.31,
      "learning_rate": 0.001,
      "loss": 3.3125,
      "step": 12012
    },
    {
      "epoch": 2.31,
      "learning_rate": 0.001,
      "loss": 3.3249,
      "step": 12024
    },
    {
      "epoch": 2.31,
      "learning_rate": 0.001,
      "loss": 3.3003,
      "step": 12036
    },
    {
      "epoch": 2.31,
      "learning_rate": 0.001,
      "loss": 3.3122,
      "step": 12048
    },
    {
      "epoch": 2.32,
      "learning_rate": 0.001,
      "loss": 3.3112,
      "step": 12060
    },
    {
      "epoch": 2.32,
      "learning_rate": 0.001,
      "loss": 3.3167,
      "step": 12072
    },
    {
      "epoch": 2.32,
      "learning_rate": 0.001,
      "loss": 3.3116,
      "step": 12084
    },
    {
      "epoch": 2.32,
      "learning_rate": 0.001,
      "loss": 3.3118,
      "step": 12096
    },
    {
      "epoch": 2.32,
      "learning_rate": 0.001,
      "loss": 3.3067,
      "step": 12108
    },
    {
      "epoch": 2.33,
      "learning_rate": 0.001,
      "loss": 3.3098,
      "step": 12120
    },
    {
      "epoch": 2.33,
      "learning_rate": 0.001,
      "loss": 3.3024,
      "step": 12132
    },
    {
      "epoch": 2.33,
      "learning_rate": 0.001,
      "loss": 3.3164,
      "step": 12144
    },
    {
      "epoch": 2.33,
      "learning_rate": 0.001,
      "loss": 3.3169,
      "step": 12156
    },
    {
      "epoch": 2.34,
      "learning_rate": 0.001,
      "loss": 3.2913,
      "step": 12168
    },
    {
      "epoch": 2.34,
      "learning_rate": 0.001,
      "loss": 3.3111,
      "step": 12180
    },
    {
      "epoch": 2.34,
      "learning_rate": 0.001,
      "loss": 3.3134,
      "step": 12192
    },
    {
      "epoch": 2.34,
      "learning_rate": 0.001,
      "loss": 3.3113,
      "step": 12204
    },
    {
      "epoch": 2.35,
      "learning_rate": 0.001,
      "loss": 3.3067,
      "step": 12216
    },
    {
      "epoch": 2.35,
      "learning_rate": 0.001,
      "loss": 3.3071,
      "step": 12228
    },
    {
      "epoch": 2.35,
      "learning_rate": 0.001,
      "loss": 3.2984,
      "step": 12240
    },
    {
      "epoch": 2.35,
      "learning_rate": 0.001,
      "loss": 3.2973,
      "step": 12252
    },
    {
      "epoch": 2.35,
      "learning_rate": 0.001,
      "loss": 3.2992,
      "step": 12264
    },
    {
      "epoch": 2.36,
      "learning_rate": 0.001,
      "loss": 3.2974,
      "step": 12276
    },
    {
      "epoch": 2.36,
      "learning_rate": 0.001,
      "loss": 3.3075,
      "step": 12288
    },
    {
      "epoch": 2.36,
      "learning_rate": 0.001,
      "loss": 3.3089,
      "step": 12300
    },
    {
      "epoch": 2.36,
      "learning_rate": 0.001,
      "loss": 3.2975,
      "step": 12312
    },
    {
      "epoch": 2.37,
      "learning_rate": 0.001,
      "loss": 3.311,
      "step": 12324
    },
    {
      "epoch": 2.37,
      "learning_rate": 0.001,
      "loss": 3.3121,
      "step": 12336
    },
    {
      "epoch": 2.37,
      "learning_rate": 0.001,
      "loss": 3.3084,
      "step": 12348
    },
    {
      "epoch": 2.37,
      "learning_rate": 0.001,
      "loss": 3.2951,
      "step": 12360
    },
    {
      "epoch": 2.38,
      "learning_rate": 0.001,
      "loss": 3.2934,
      "step": 12372
    },
    {
      "epoch": 2.38,
      "learning_rate": 0.001,
      "loss": 3.2989,
      "step": 12384
    },
    {
      "epoch": 2.38,
      "learning_rate": 0.001,
      "loss": 3.3096,
      "step": 12396
    },
    {
      "epoch": 2.38,
      "learning_rate": 0.001,
      "loss": 3.2998,
      "step": 12408
    },
    {
      "epoch": 2.38,
      "learning_rate": 0.001,
      "loss": 3.2956,
      "step": 12420
    },
    {
      "epoch": 2.39,
      "learning_rate": 0.001,
      "loss": 3.3046,
      "step": 12432
    },
    {
      "epoch": 2.39,
      "learning_rate": 0.001,
      "loss": 3.2991,
      "step": 12444
    },
    {
      "epoch": 2.39,
      "learning_rate": 0.001,
      "loss": 3.3048,
      "step": 12456
    },
    {
      "epoch": 2.39,
      "learning_rate": 0.001,
      "loss": 3.3134,
      "step": 12468
    },
    {
      "epoch": 2.4,
      "learning_rate": 0.001,
      "loss": 3.2976,
      "step": 12480
    },
    {
      "epoch": 2.4,
      "learning_rate": 0.001,
      "loss": 3.3001,
      "step": 12492
    },
    {
      "epoch": 2.4,
      "eval_ag_news_accuracy": 0.24971875,
      "eval_ag_news_bleu_score": 2.826092698746348,
      "eval_ag_news_bleu_score_sem": 0.10405368399502497,
      "eval_ag_news_emb_cos_sim": 0.6051392555236816,
      "eval_ag_news_emb_cos_sim_sem": 0.011451116982125704,
      "eval_ag_news_emb_top1_equal": 0.1328125,
      "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.447063446044922,
      "eval_ag_news_n_ngrams_match_1": 9.228,
      "eval_ag_news_n_ngrams_match_2": 1.486,
      "eval_ag_news_n_ngrams_match_3": 0.328,
      "eval_ag_news_num_pred_words": 44.924,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 85.37586469552387,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2190468766261795,
      "eval_ag_news_runtime": 12.4371,
      "eval_ag_news_samples_per_second": 40.202,
      "eval_ag_news_steps_per_second": 0.08,
      "eval_ag_news_token_set_f1": 0.252671136839368,
      "eval_ag_news_token_set_f1_sem": 0.00416211692282695,
      "eval_ag_news_token_set_precision": 0.2116944573657276,
      "eval_ag_news_token_set_recall": 0.34707036925453266,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 12500
    },
    {
      "epoch": 2.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.07859375,
      "eval_anthropic_toxic_prompts_bleu_score": 1.6646446228802656,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06766547312689805,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.45219215750694275,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01256326694720692,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.135889530181885,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.384,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.664,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.166,
      "eval_anthropic_toxic_prompts_num_pred_words": 43.678,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 62.54520217798871,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.11992763643210105,
      "eval_anthropic_toxic_prompts_runtime": 9.7978,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.032,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2172559367052368,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005754577580731006,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.22574504442036494,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2507367217451066,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 12500
    },
    {
      "epoch": 2.4,
      "eval_arxiv_accuracy": 0.28115625,
      "eval_arxiv_bleu_score": 2.3457166495172768,
      "eval_arxiv_bleu_score_sem": 0.0671725321596959,
      "eval_arxiv_emb_cos_sim": 0.48694080114364624,
      "eval_arxiv_emb_cos_sim_sem": 0.01065699528210655,
      "eval_arxiv_emb_top1_equal": 0.1953125,
      "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.275970935821533,
      "eval_arxiv_n_ngrams_match_1": 9.372,
      "eval_arxiv_n_ngrams_match_2": 1.272,
      "eval_arxiv_n_ngrams_match_3": 0.158,
      "eval_arxiv_num_pred_words": 35.53,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 71.94996420403176,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.21996145432350012,
      "eval_arxiv_runtime": 10.2338,
      "eval_arxiv_samples_per_second": 48.858,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.22896068861045651,
      "eval_arxiv_token_set_f1_sem": 0.004288100563487217,
      "eval_arxiv_token_set_precision": 0.16931357812446676,
      "eval_arxiv_token_set_recall": 0.3940005563945889,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 12500
    },
    {
      "epoch": 2.4,
      "eval_python_code_alpaca_accuracy": 0.10940625,
      "eval_python_code_alpaca_bleu_score": 2.440995180886357,
      "eval_python_code_alpaca_bleu_score_sem": 0.07114712202953336,
      "eval_python_code_alpaca_emb_cos_sim": 0.4028877019882202,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009777283869473899,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.087093353271484,
      "eval_python_code_alpaca_n_ngrams_match_1": 5.048,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.754,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.164,
      "eval_python_code_alpaca_num_pred_words": 33.442,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 59.56650105811409,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.18386204582450022,
      "eval_python_code_alpaca_runtime": 9.9476,
      "eval_python_code_alpaca_samples_per_second": 50.263,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.28525719592563015,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00517466331528906,
      "eval_python_code_alpaca_token_set_precision": 0.25021821502350705,
      "eval_python_code_alpaca_token_set_recall": 0.39145710504919656,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 12500
    },
    {
      "epoch": 2.4,
      "eval_wikibio_accuracy": 0.26415625,
      "eval_wikibio_bleu_score": 3.8662112814379053,
      "eval_wikibio_bleu_score_sem": 0.1616383110079937,
      "eval_wikibio_emb_cos_sim": 0.5877749919891357,
      "eval_wikibio_emb_cos_sim_sem": 0.014569499083173986,
      "eval_wikibio_emb_top1_equal": 0.09375,
      "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.646920204162598,
      "eval_wikibio_n_ngrams_match_1": 8.052,
      "eval_wikibio_n_ngrams_match_2": 2.288,
      "eval_wikibio_n_ngrams_match_3": 0.668,
      "eval_wikibio_num_pred_words": 35.422,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 104.26338066681669,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.26084861896744355,
      "eval_wikibio_runtime": 10.8319,
      "eval_wikibio_samples_per_second": 46.16,
      "eval_wikibio_steps_per_second": 0.092,
      "eval_wikibio_token_set_f1": 0.2563883128449072,
      "eval_wikibio_token_set_f1_sem": 0.006257974487319957,
      "eval_wikibio_token_set_precision": 0.2543646756806158,
      "eval_wikibio_token_set_recall": 0.27274956503895254,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 12500
    },
    {
      "epoch": 2.4,
      "eval_nq_accuracy": 0.4365,
      "eval_nq_bleu_score": 6.746635596538757,
      "eval_nq_bleu_score_sem": 0.2874480202467293,
      "eval_nq_emb_cos_sim": 0.6585381031036377,
      "eval_nq_emb_cos_sim_sem": 0.01168807973534411,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.9432129859924316,
      "eval_nq_n_ngrams_match_1": 17.83,
      "eval_nq_n_ngrams_match_2": 5.082,
      "eval_nq_n_ngrams_match_3": 1.918,
      "eval_nq_num_pred_words": 48.308,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 18.97672040292327,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3324887701032784,
      "eval_nq_runtime": 10.4015,
      "eval_nq_samples_per_second": 48.07,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.36427021711641167,
      "eval_nq_token_set_f1_sem": 0.004655915477029171,
      "eval_nq_token_set_precision": 0.30350370361311,
      "eval_nq_token_set_recall": 0.47646411215695267,
      "eval_nq_true_num_tokens": 64.0,
      "step": 12500
    },
    {
      "epoch": 2.4,
      "learning_rate": 0.001,
      "loss": 3.2849,
      "step": 12504
    },
    {
      "epoch": 2.4,
      "learning_rate": 0.001,
      "loss": 3.293,
      "step": 12516
    },
    {
      "epoch": 2.41,
      "learning_rate": 0.001,
      "loss": 3.303,
      "step": 12528
    },
    {
      "epoch": 2.41,
      "learning_rate": 0.001,
      "loss": 3.2915,
      "step": 12540
    },
    {
      "epoch": 2.41,
      "learning_rate": 0.001,
      "loss": 3.2959,
      "step": 12552
    },
    {
      "epoch": 2.41,
      "learning_rate": 0.001,
      "loss": 3.3051,
      "step": 12564
    },
    {
      "epoch": 2.41,
      "learning_rate": 0.001,
      "loss": 3.3144,
      "step": 12576
    },
    {
      "epoch": 2.42,
      "learning_rate": 0.001,
      "loss": 3.2956,
      "step": 12588
    },
    {
      "epoch": 2.42,
      "learning_rate": 0.001,
      "loss": 3.2904,
      "step": 12600
    },
    {
      "epoch": 2.42,
      "learning_rate": 0.001,
      "loss": 3.2985,
      "step": 12612
    },
    {
      "epoch": 2.42,
      "learning_rate": 0.001,
      "loss": 3.2916,
      "step": 12624
    },
    {
      "epoch": 2.43,
      "learning_rate": 0.001,
      "loss": 3.2831,
      "step": 12636
    },
    {
      "epoch": 2.43,
      "learning_rate": 0.001,
      "loss": 3.2799,
      "step": 12648
    },
    {
      "epoch": 2.43,
      "learning_rate": 0.001,
      "loss": 3.287,
      "step": 12660
    },
    {
      "epoch": 2.43,
      "learning_rate": 0.001,
      "loss": 3.284,
      "step": 12672
    },
    {
      "epoch": 2.44,
      "learning_rate": 0.001,
      "loss": 3.2984,
      "step": 12684
    },
    {
      "epoch": 2.44,
      "learning_rate": 0.001,
      "loss": 3.2869,
      "step": 12696
    },
    {
      "epoch": 2.44,
      "learning_rate": 0.001,
      "loss": 3.2933,
      "step": 12708
    },
    {
      "epoch": 2.44,
      "learning_rate": 0.001,
      "loss": 3.2856,
      "step": 12720
    },
    {
      "epoch": 2.44,
      "learning_rate": 0.001,
      "loss": 3.2864,
      "step": 12732
    },
    {
      "epoch": 2.45,
      "learning_rate": 0.001,
      "loss": 3.2873,
      "step": 12744
    },
    {
      "epoch": 2.45,
      "learning_rate": 0.001,
      "loss": 3.2849,
      "step": 12756
    },
    {
      "epoch": 2.45,
      "learning_rate": 0.001,
      "loss": 3.2829,
      "step": 12768
    },
    {
      "epoch": 2.45,
      "learning_rate": 0.001,
      "loss": 3.2822,
      "step": 12780
    },
    {
      "epoch": 2.46,
      "learning_rate": 0.001,
      "loss": 3.2721,
      "step": 12792
    },
    {
      "epoch": 2.46,
      "learning_rate": 0.001,
      "loss": 3.2747,
      "step": 12804
    },
    {
      "epoch": 2.46,
      "learning_rate": 0.001,
      "loss": 3.2763,
      "step": 12816
    },
    {
      "epoch": 2.46,
      "learning_rate": 0.001,
      "loss": 3.2932,
      "step": 12828
    },
    {
      "epoch": 2.47,
      "learning_rate": 0.001,
      "loss": 3.2863,
      "step": 12840
    },
    {
      "epoch": 2.47,
      "learning_rate": 0.001,
      "loss": 3.2846,
      "step": 12852
    },
    {
      "epoch": 2.47,
      "learning_rate": 0.001,
      "loss": 3.2786,
      "step": 12864
    },
    {
      "epoch": 2.47,
      "learning_rate": 0.001,
      "loss": 3.2894,
      "step": 12876
    },
    {
      "epoch": 2.47,
      "learning_rate": 0.001,
      "loss": 3.2768,
      "step": 12888
    },
    {
      "epoch": 2.48,
      "learning_rate": 0.001,
      "loss": 3.2748,
      "step": 12900
    },
    {
      "epoch": 2.48,
      "learning_rate": 0.001,
      "loss": 3.2761,
      "step": 12912
    },
    {
      "epoch": 2.48,
      "learning_rate": 0.001,
      "loss": 3.282,
      "step": 12924
    },
    {
      "epoch": 2.48,
      "learning_rate": 0.001,
      "loss": 3.2828,
      "step": 12936
    },
    {
      "epoch": 2.49,
      "learning_rate": 0.001,
      "loss": 3.2652,
      "step": 12948
    },
    {
      "epoch": 2.49,
      "learning_rate": 0.001,
      "loss": 3.2758,
      "step": 12960
    },
    {
      "epoch": 2.49,
      "learning_rate": 0.001,
      "loss": 3.2806,
      "step": 12972
    },
    {
      "epoch": 2.49,
      "learning_rate": 0.001,
      "loss": 3.2714,
      "step": 12984
    },
    {
      "epoch": 2.5,
      "learning_rate": 0.001,
      "loss": 3.264,
      "step": 12996
    },
    {
      "epoch": 2.5,
      "learning_rate": 0.001,
      "loss": 3.2716,
      "step": 13008
    },
    {
      "epoch": 2.5,
      "learning_rate": 0.001,
      "loss": 3.2926,
      "step": 13020
    },
    {
      "epoch": 2.5,
      "learning_rate": 0.001,
      "loss": 3.2747,
      "step": 13032
    },
    {
      "epoch": 2.5,
      "learning_rate": 0.001,
      "loss": 3.2642,
      "step": 13044
    },
    {
      "epoch": 2.51,
      "learning_rate": 0.001,
      "loss": 3.2794,
      "step": 13056
    },
    {
      "epoch": 2.51,
      "learning_rate": 0.001,
      "loss": 3.2818,
      "step": 13068
    },
    {
      "epoch": 2.51,
      "learning_rate": 0.001,
      "loss": 3.2701,
      "step": 13080
    },
    {
      "epoch": 2.51,
      "learning_rate": 0.001,
      "loss": 3.2793,
      "step": 13092
    },
    {
      "epoch": 2.52,
      "learning_rate": 0.001,
      "loss": 3.2623,
      "step": 13104
    },
    {
      "epoch": 2.52,
      "learning_rate": 0.001,
      "loss": 3.2681,
      "step": 13116
    },
    {
      "epoch": 2.52,
      "eval_ag_news_accuracy": 0.2536875,
      "eval_ag_news_bleu_score": 2.9449891964169406,
      "eval_ag_news_bleu_score_sem": 0.11170758560265776,
      "eval_ag_news_emb_cos_sim": 0.6044542789459229,
      "eval_ag_news_emb_cos_sim_sem": 0.012544080911511352,
      "eval_ag_news_emb_top1_equal": 0.0859375,
      "eval_ag_news_emb_top1_equal_sem": 0.02487009666300537,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.407440662384033,
      "eval_ag_news_n_ngrams_match_1": 9.36,
      "eval_ag_news_n_ngrams_match_2": 1.544,
      "eval_ag_news_n_ngrams_match_3": 0.362,
      "eval_ag_news_num_pred_words": 43.174,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 82.05917738298486,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.22583801754747374,
      "eval_ag_news_runtime": 15.3408,
      "eval_ag_news_samples_per_second": 32.593,
      "eval_ag_news_steps_per_second": 0.065,
      "eval_ag_news_token_set_f1": 0.25504214343372367,
      "eval_ag_news_token_set_f1_sem": 0.004312547375032933,
      "eval_ag_news_token_set_precision": 0.2148031596987005,
      "eval_ag_news_token_set_recall": 0.34836961628933527,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 13125
    },
    {
      "epoch": 2.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.077875,
      "eval_anthropic_toxic_prompts_bleu_score": 1.751248530201478,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06713836266353478,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4631584584712982,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012515903109103787,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.03125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015439349450344106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.127745151519775,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.592,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.696,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.17,
      "eval_anthropic_toxic_prompts_num_pred_words": 42.91,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 62.03787908789136,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.12692583998457163,
      "eval_anthropic_toxic_prompts_runtime": 10.2293,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.879,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2325835861480348,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005848272577870726,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.23768760026944197,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2732707689420043,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 13125
    },
    {
      "epoch": 2.52,
      "eval_arxiv_accuracy": 0.28484375,
      "eval_arxiv_bleu_score": 2.5811135225852504,
      "eval_arxiv_bleu_score_sem": 0.07618768579731194,
      "eval_arxiv_emb_cos_sim": 0.5199321508407593,
      "eval_arxiv_emb_cos_sim_sem": 0.008200699341404919,
      "eval_arxiv_emb_top1_equal": 0.171875,
      "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.23995304107666,
      "eval_arxiv_n_ngrams_match_1": 9.88,
      "eval_arxiv_n_ngrams_match_2": 1.476,
      "eval_arxiv_n_ngrams_match_3": 0.226,
      "eval_arxiv_num_pred_words": 37.59,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 69.40459259728735,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.23090852795615785,
      "eval_arxiv_runtime": 10.4624,
      "eval_arxiv_samples_per_second": 47.79,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.238395627428193,
      "eval_arxiv_token_set_f1_sem": 0.0039886683522581765,
      "eval_arxiv_token_set_precision": 0.1773076603557147,
      "eval_arxiv_token_set_recall": 0.40447644208248185,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 13125
    },
    {
      "epoch": 2.52,
      "eval_python_code_alpaca_accuracy": 0.1119375,
      "eval_python_code_alpaca_bleu_score": 2.561315340916709,
      "eval_python_code_alpaca_bleu_score_sem": 0.08306839700716231,
      "eval_python_code_alpaca_emb_cos_sim": 0.42342567443847656,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009762105077109406,
      "eval_python_code_alpaca_emb_top1_equal": 0.0234375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.990710973739624,
      "eval_python_code_alpaca_n_ngrams_match_1": 5.514,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.91,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.224,
      "eval_python_code_alpaca_num_pred_words": 35.608,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 54.09333463533215,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.19147301206757505,
      "eval_python_code_alpaca_runtime": 9.9754,
      "eval_python_code_alpaca_samples_per_second": 50.123,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.30585590490609965,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005042719737604739,
      "eval_python_code_alpaca_token_set_precision": 0.2745375740186613,
      "eval_python_code_alpaca_token_set_recall": 0.3981217515573972,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 13125
    },
    {
      "epoch": 2.52,
      "eval_wikibio_accuracy": 0.26803125,
      "eval_wikibio_bleu_score": 4.323228286961814,
      "eval_wikibio_bleu_score_sem": 0.17612224389306505,
      "eval_wikibio_emb_cos_sim": 0.6059797406196594,
      "eval_wikibio_emb_cos_sim_sem": 0.012709243874113417,
      "eval_wikibio_emb_top1_equal": 0.09375,
      "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.592751979827881,
      "eval_wikibio_n_ngrams_match_1": 8.424,
      "eval_wikibio_n_ngrams_match_2": 2.436,
      "eval_wikibio_n_ngrams_match_3": 0.786,
      "eval_wikibio_num_pred_words": 35.794,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 98.76585815779272,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2725388227904081,
      "eval_wikibio_runtime": 10.3485,
      "eval_wikibio_samples_per_second": 48.316,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.2719072575997325,
      "eval_wikibio_token_set_f1_sem": 0.00600774684149003,
      "eval_wikibio_token_set_precision": 0.26944509606780204,
      "eval_wikibio_token_set_recall": 0.29515541713185645,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 13125
    },
    {
      "epoch": 2.52,
      "eval_nq_accuracy": 0.4376875,
      "eval_nq_bleu_score": 6.738113226507277,
      "eval_nq_bleu_score_sem": 0.30399523897515385,
      "eval_nq_emb_cos_sim": 0.6823140978813171,
      "eval_nq_emb_cos_sim_sem": 0.010425863281961824,
      "eval_nq_emb_top1_equal": 0.1484375,
      "eval_nq_emb_top1_equal_sem": 0.031548465007086954,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.915091037750244,
      "eval_nq_n_ngrams_match_1": 17.774,
      "eval_nq_n_ngrams_match_2": 5.142,
      "eval_nq_n_ngrams_match_3": 1.908,
      "eval_nq_num_pred_words": 47.11,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 18.450492017575765,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.33280402860050506,
      "eval_nq_runtime": 10.6217,
      "eval_nq_samples_per_second": 47.073,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.3666457291777285,
      "eval_nq_token_set_f1_sem": 0.004688051975408419,
      "eval_nq_token_set_precision": 0.30308058586002634,
      "eval_nq_token_set_recall": 0.486597131621353,
      "eval_nq_true_num_tokens": 64.0,
      "step": 13125
    },
    {
      "epoch": 2.52,
      "learning_rate": 0.001,
      "loss": 3.2737,
      "step": 13128
    },
    {
      "epoch": 2.52,
      "learning_rate": 0.001,
      "loss": 3.2603,
      "step": 13140
    },
    {
      "epoch": 2.53,
      "learning_rate": 0.001,
      "loss": 3.2597,
      "step": 13152
    },
    {
      "epoch": 2.53,
      "learning_rate": 0.001,
      "loss": 3.2711,
      "step": 13164
    },
    {
      "epoch": 2.53,
      "learning_rate": 0.001,
      "loss": 3.2802,
      "step": 13176
    },
    {
      "epoch": 2.53,
      "learning_rate": 0.001,
      "loss": 3.272,
      "step": 13188
    },
    {
      "epoch": 2.53,
      "learning_rate": 0.001,
      "loss": 3.2725,
      "step": 13200
    },
    {
      "epoch": 2.54,
      "learning_rate": 0.001,
      "loss": 3.2786,
      "step": 13212
    },
    {
      "epoch": 2.54,
      "learning_rate": 0.001,
      "loss": 3.2792,
      "step": 13224
    },
    {
      "epoch": 2.54,
      "learning_rate": 0.001,
      "loss": 3.272,
      "step": 13236
    },
    {
      "epoch": 2.54,
      "learning_rate": 0.001,
      "loss": 3.2692,
      "step": 13248
    },
    {
      "epoch": 2.55,
      "learning_rate": 0.001,
      "loss": 3.2652,
      "step": 13260
    },
    {
      "epoch": 2.55,
      "learning_rate": 0.001,
      "loss": 3.2718,
      "step": 13272
    },
    {
      "epoch": 2.55,
      "learning_rate": 0.001,
      "loss": 3.2621,
      "step": 13284
    },
    {
      "epoch": 2.55,
      "learning_rate": 0.001,
      "loss": 3.2689,
      "step": 13296
    },
    {
      "epoch": 2.56,
      "learning_rate": 0.001,
      "loss": 3.2621,
      "step": 13308
    },
    {
      "epoch": 2.56,
      "learning_rate": 0.001,
      "loss": 3.2754,
      "step": 13320
    },
    {
      "epoch": 2.56,
      "learning_rate": 0.001,
      "loss": 3.2726,
      "step": 13332
    },
    {
      "epoch": 2.56,
      "learning_rate": 0.001,
      "loss": 3.2598,
      "step": 13344
    },
    {
      "epoch": 2.56,
      "learning_rate": 0.001,
      "loss": 3.2694,
      "step": 13356
    },
    {
      "epoch": 2.57,
      "learning_rate": 0.001,
      "loss": 3.2706,
      "step": 13368
    },
    {
      "epoch": 2.57,
      "learning_rate": 0.001,
      "loss": 3.2583,
      "step": 13380
    },
    {
      "epoch": 2.57,
      "learning_rate": 0.001,
      "loss": 3.2653,
      "step": 13392
    },
    {
      "epoch": 2.57,
      "learning_rate": 0.001,
      "loss": 3.26,
      "step": 13404
    },
    {
      "epoch": 2.58,
      "learning_rate": 0.001,
      "loss": 3.2639,
      "step": 13416
    },
    {
      "epoch": 2.58,
      "learning_rate": 0.001,
      "loss": 3.2541,
      "step": 13428
    },
    {
      "epoch": 2.58,
      "learning_rate": 0.001,
      "loss": 3.2659,
      "step": 13440
    },
    {
      "epoch": 2.58,
      "learning_rate": 0.001,
      "loss": 3.2545,
      "step": 13452
    },
    {
      "epoch": 2.59,
      "learning_rate": 0.001,
      "loss": 3.2692,
      "step": 13464
    },
    {
      "epoch": 2.59,
      "learning_rate": 0.001,
      "loss": 3.2641,
      "step": 13476
    },
    {
      "epoch": 2.59,
      "learning_rate": 0.001,
      "loss": 3.2666,
      "step": 13488
    },
    {
      "epoch": 2.59,
      "learning_rate": 0.001,
      "loss": 3.2633,
      "step": 13500
    },
    {
      "epoch": 2.59,
      "learning_rate": 0.001,
      "loss": 3.2579,
      "step": 13512
    },
    {
      "epoch": 2.6,
      "learning_rate": 0.001,
      "loss": 3.261,
      "step": 13524
    },
    {
      "epoch": 2.6,
      "learning_rate": 0.001,
      "loss": 3.251,
      "step": 13536
    },
    {
      "epoch": 2.6,
      "learning_rate": 0.001,
      "loss": 3.2652,
      "step": 13548
    },
    {
      "epoch": 2.6,
      "learning_rate": 0.001,
      "loss": 3.2574,
      "step": 13560
    },
    {
      "epoch": 2.61,
      "learning_rate": 0.001,
      "loss": 3.2446,
      "step": 13572
    },
    {
      "epoch": 2.61,
      "learning_rate": 0.001,
      "loss": 3.2615,
      "step": 13584
    },
    {
      "epoch": 2.61,
      "learning_rate": 0.001,
      "loss": 3.2528,
      "step": 13596
    },
    {
      "epoch": 2.61,
      "learning_rate": 0.001,
      "loss": 3.2607,
      "step": 13608
    },
    {
      "epoch": 2.62,
      "learning_rate": 0.001,
      "loss": 3.255,
      "step": 13620
    },
    {
      "epoch": 2.62,
      "learning_rate": 0.001,
      "loss": 3.2528,
      "step": 13632
    },
    {
      "epoch": 2.62,
      "learning_rate": 0.001,
      "loss": 3.2607,
      "step": 13644
    },
    {
      "epoch": 2.62,
      "learning_rate": 0.001,
      "loss": 3.2503,
      "step": 13656
    },
    {
      "epoch": 2.62,
      "learning_rate": 0.001,
      "loss": 3.2505,
      "step": 13668
    },
    {
      "epoch": 2.63,
      "learning_rate": 0.001,
      "loss": 3.2545,
      "step": 13680
    },
    {
      "epoch": 2.63,
      "learning_rate": 0.001,
      "loss": 3.2544,
      "step": 13692
    },
    {
      "epoch": 2.63,
      "learning_rate": 0.001,
      "loss": 3.2668,
      "step": 13704
    },
    {
      "epoch": 2.63,
      "learning_rate": 0.001,
      "loss": 3.2631,
      "step": 13716
    },
    {
      "epoch": 2.64,
      "learning_rate": 0.001,
      "loss": 3.242,
      "step": 13728
    },
    {
      "epoch": 2.64,
      "learning_rate": 0.001,
      "loss": 3.2561,
      "step": 13740
    },
    {
      "epoch": 2.64,
      "eval_ag_news_accuracy": 0.2550625,
      "eval_ag_news_bleu_score": 3.0595852539348525,
      "eval_ag_news_bleu_score_sem": 0.10125286244043642,
      "eval_ag_news_emb_cos_sim": 0.6164160966873169,
      "eval_ag_news_emb_cos_sim_sem": 0.011056885897866929,
      "eval_ag_news_emb_top1_equal": 0.125,
      "eval_ag_news_emb_top1_equal_sem": 0.02934655822437397,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.388191223144531,
      "eval_ag_news_n_ngrams_match_1": 9.738,
      "eval_ag_news_n_ngrams_match_2": 1.69,
      "eval_ag_news_n_ngrams_match_3": 0.416,
      "eval_ag_news_num_pred_words": 45.606,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 80.49469029208058,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.22959446391039756,
      "eval_ag_news_runtime": 11.6257,
      "eval_ag_news_samples_per_second": 43.008,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.26311258895517087,
      "eval_ag_news_token_set_f1_sem": 0.004360061114056252,
      "eval_ag_news_token_set_precision": 0.2244133253321986,
      "eval_ag_news_token_set_recall": 0.3514068328125633,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 13750
    },
    {
      "epoch": 2.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.0798125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.7996500465751293,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06931926454336522,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.48772984743118286,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.013242475264761121,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0234375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.013424676090873717,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.096881866455078,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.838,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.848,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.246,
      "eval_anthropic_toxic_prompts_num_pred_words": 45.678,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 60.15243155481487,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.13198045152000265,
      "eval_anthropic_toxic_prompts_runtime": 11.7437,
      "eval_anthropic_toxic_prompts_samples_per_second": 42.576,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.085,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2450321035635283,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006188998623815163,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.2594393760445298,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.27603164540936503,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 13750
    },
    {
      "epoch": 2.64,
      "eval_arxiv_accuracy": 0.287,
      "eval_arxiv_bleu_score": 2.632543308552264,
      "eval_arxiv_bleu_score_sem": 0.07272150154183635,
      "eval_arxiv_emb_cos_sim": 0.5344278216362,
      "eval_arxiv_emb_cos_sim_sem": 0.008885583654073423,
      "eval_arxiv_emb_top1_equal": 0.1953125,
      "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.203517436981201,
      "eval_arxiv_n_ngrams_match_1": 10.338,
      "eval_arxiv_n_ngrams_match_2": 1.5,
      "eval_arxiv_n_ngrams_match_3": 0.22,
      "eval_arxiv_num_pred_words": 38.834,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 66.92130902572652,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.23994740816249588,
      "eval_arxiv_runtime": 12.495,
      "eval_arxiv_samples_per_second": 40.016,
      "eval_arxiv_steps_per_second": 0.08,
      "eval_arxiv_token_set_f1": 0.2490406751942718,
      "eval_arxiv_token_set_f1_sem": 0.0038308698796611497,
      "eval_arxiv_token_set_precision": 0.18654490094102041,
      "eval_arxiv_token_set_recall": 0.4134002024260476,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 13750
    },
    {
      "epoch": 2.64,
      "eval_python_code_alpaca_accuracy": 0.11378125,
      "eval_python_code_alpaca_bleu_score": 2.5490362239244195,
      "eval_python_code_alpaca_bleu_score_sem": 0.08137361657241758,
      "eval_python_code_alpaca_emb_cos_sim": 0.4182761609554291,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010729133564599994,
      "eval_python_code_alpaca_emb_top1_equal": 0.03125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.015439349450344106,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 4.002032279968262,
      "eval_python_code_alpaca_n_ngrams_match_1": 5.458,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.958,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.222,
      "eval_python_code_alpaca_num_pred_words": 35.752,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 54.709221585777854,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.18824091787657266,
      "eval_python_code_alpaca_runtime": 14.4755,
      "eval_python_code_alpaca_samples_per_second": 34.541,
      "eval_python_code_alpaca_steps_per_second": 0.069,
      "eval_python_code_alpaca_token_set_f1": 0.3069438577192991,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0053490704214093675,
      "eval_python_code_alpaca_token_set_precision": 0.274700933677545,
      "eval_python_code_alpaca_token_set_recall": 0.4007708336990634,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 13750
    },
    {
      "epoch": 2.64,
      "eval_wikibio_accuracy": 0.26978125,
      "eval_wikibio_bleu_score": 4.288860876153392,
      "eval_wikibio_bleu_score_sem": 0.1662797453557564,
      "eval_wikibio_emb_cos_sim": 0.629534125328064,
      "eval_wikibio_emb_cos_sim_sem": 0.013088037358016062,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.626995086669922,
      "eval_wikibio_n_ngrams_match_1": 8.706,
      "eval_wikibio_n_ngrams_match_2": 2.566,
      "eval_wikibio_n_ngrams_match_3": 0.768,
      "eval_wikibio_num_pred_words": 36.79,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 102.20648059507401,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.28293588694026894,
      "eval_wikibio_runtime": 15.3486,
      "eval_wikibio_samples_per_second": 32.576,
      "eval_wikibio_steps_per_second": 0.065,
      "eval_wikibio_token_set_f1": 0.2771370476635763,
      "eval_wikibio_token_set_f1_sem": 0.005909443023008455,
      "eval_wikibio_token_set_precision": 0.2793071172081303,
      "eval_wikibio_token_set_recall": 0.29145766768540515,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 13750
    },
    {
      "epoch": 2.64,
      "eval_nq_accuracy": 0.441375,
      "eval_nq_bleu_score": 6.936821043395195,
      "eval_nq_bleu_score_sem": 0.30734579894337744,
      "eval_nq_emb_cos_sim": 0.6749643087387085,
      "eval_nq_emb_cos_sim_sem": 0.011556900303531984,
      "eval_nq_emb_top1_equal": 0.1640625,
      "eval_nq_emb_top1_equal_sem": 0.03286167651298939,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.8895976543426514,
      "eval_nq_n_ngrams_match_1": 17.95,
      "eval_nq_n_ngrams_match_2": 5.266,
      "eval_nq_n_ngrams_match_3": 1.988,
      "eval_nq_num_pred_words": 48.37,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 17.98607152777222,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3366740775514444,
      "eval_nq_runtime": 13.0683,
      "eval_nq_samples_per_second": 38.26,
      "eval_nq_steps_per_second": 0.077,
      "eval_nq_token_set_f1": 0.3699267581375482,
      "eval_nq_token_set_f1_sem": 0.0047740210719885885,
      "eval_nq_token_set_precision": 0.30951190839774545,
      "eval_nq_token_set_recall": 0.483800685180859,
      "eval_nq_true_num_tokens": 64.0,
      "step": 13750
    },
    {
      "epoch": 2.64,
      "learning_rate": 0.001,
      "loss": 3.2637,
      "step": 13752
    },
    {
      "epoch": 2.64,
      "learning_rate": 0.001,
      "loss": 3.2672,
      "step": 13764
    },
    {
      "epoch": 2.65,
      "learning_rate": 0.001,
      "loss": 3.2434,
      "step": 13776
    },
    {
      "epoch": 2.65,
      "learning_rate": 0.001,
      "loss": 3.2529,
      "step": 13788
    },
    {
      "epoch": 2.65,
      "learning_rate": 0.001,
      "loss": 3.2508,
      "step": 13800
    },
    {
      "epoch": 2.65,
      "learning_rate": 0.001,
      "loss": 3.2417,
      "step": 13812
    },
    {
      "epoch": 2.65,
      "learning_rate": 0.001,
      "loss": 3.2503,
      "step": 13824
    },
    {
      "epoch": 2.66,
      "learning_rate": 0.001,
      "loss": 3.245,
      "step": 13836
    },
    {
      "epoch": 2.66,
      "learning_rate": 0.001,
      "loss": 3.2398,
      "step": 13848
    },
    {
      "epoch": 2.66,
      "learning_rate": 0.001,
      "loss": 3.2501,
      "step": 13860
    },
    {
      "epoch": 2.66,
      "learning_rate": 0.001,
      "loss": 3.2507,
      "step": 13872
    },
    {
      "epoch": 2.67,
      "learning_rate": 0.001,
      "loss": 3.2506,
      "step": 13884
    },
    {
      "epoch": 2.67,
      "learning_rate": 0.001,
      "loss": 3.2511,
      "step": 13896
    },
    {
      "epoch": 2.67,
      "learning_rate": 0.001,
      "loss": 3.2521,
      "step": 13908
    },
    {
      "epoch": 2.67,
      "learning_rate": 0.001,
      "loss": 3.2431,
      "step": 13920
    },
    {
      "epoch": 2.68,
      "learning_rate": 0.001,
      "loss": 3.2436,
      "step": 13932
    },
    {
      "epoch": 2.68,
      "learning_rate": 0.001,
      "loss": 3.2263,
      "step": 13944
    },
    {
      "epoch": 2.68,
      "learning_rate": 0.001,
      "loss": 3.2548,
      "step": 13956
    },
    {
      "epoch": 2.68,
      "learning_rate": 0.001,
      "loss": 3.2387,
      "step": 13968
    },
    {
      "epoch": 2.68,
      "learning_rate": 0.001,
      "loss": 3.247,
      "step": 13980
    },
    {
      "epoch": 2.69,
      "learning_rate": 0.001,
      "loss": 3.246,
      "step": 13992
    },
    {
      "epoch": 2.69,
      "learning_rate": 0.001,
      "loss": 3.2473,
      "step": 14004
    },
    {
      "epoch": 2.69,
      "learning_rate": 0.001,
      "loss": 3.249,
      "step": 14016
    },
    {
      "epoch": 2.69,
      "learning_rate": 0.001,
      "loss": 3.2356,
      "step": 14028
    },
    {
      "epoch": 2.7,
      "learning_rate": 0.001,
      "loss": 3.2423,
      "step": 14040
    },
    {
      "epoch": 2.7,
      "learning_rate": 0.001,
      "loss": 3.2445,
      "step": 14052
    },
    {
      "epoch": 2.7,
      "learning_rate": 0.001,
      "loss": 3.2361,
      "step": 14064
    },
    {
      "epoch": 2.7,
      "learning_rate": 0.001,
      "loss": 3.2529,
      "step": 14076
    },
    {
      "epoch": 2.71,
      "learning_rate": 0.001,
      "loss": 3.2334,
      "step": 14088
    },
    {
      "epoch": 2.71,
      "learning_rate": 0.001,
      "loss": 3.2309,
      "step": 14100
    },
    {
      "epoch": 2.71,
      "learning_rate": 0.001,
      "loss": 3.2429,
      "step": 14112
    },
    {
      "epoch": 2.71,
      "learning_rate": 0.001,
      "loss": 3.2419,
      "step": 14124
    },
    {
      "epoch": 2.71,
      "learning_rate": 0.001,
      "loss": 3.2352,
      "step": 14136
    },
    {
      "epoch": 2.72,
      "learning_rate": 0.001,
      "loss": 3.2317,
      "step": 14148
    },
    {
      "epoch": 2.72,
      "learning_rate": 0.001,
      "loss": 3.2368,
      "step": 14160
    },
    {
      "epoch": 2.72,
      "learning_rate": 0.001,
      "loss": 3.2424,
      "step": 14172
    },
    {
      "epoch": 2.72,
      "learning_rate": 0.001,
      "loss": 3.2388,
      "step": 14184
    },
    {
      "epoch": 2.73,
      "learning_rate": 0.001,
      "loss": 3.2367,
      "step": 14196
    },
    {
      "epoch": 2.73,
      "learning_rate": 0.001,
      "loss": 3.2314,
      "step": 14208
    },
    {
      "epoch": 2.73,
      "learning_rate": 0.001,
      "loss": 3.2157,
      "step": 14220
    },
    {
      "epoch": 2.73,
      "learning_rate": 0.001,
      "loss": 3.2254,
      "step": 14232
    },
    {
      "epoch": 2.74,
      "learning_rate": 0.001,
      "loss": 3.2334,
      "step": 14244
    },
    {
      "epoch": 2.74,
      "learning_rate": 0.001,
      "loss": 3.2297,
      "step": 14256
    },
    {
      "epoch": 2.74,
      "learning_rate": 0.001,
      "loss": 3.2353,
      "step": 14268
    },
    {
      "epoch": 2.74,
      "learning_rate": 0.001,
      "loss": 3.2344,
      "step": 14280
    },
    {
      "epoch": 2.74,
      "learning_rate": 0.001,
      "loss": 3.2369,
      "step": 14292
    },
    {
      "epoch": 2.75,
      "learning_rate": 0.001,
      "loss": 3.2255,
      "step": 14304
    },
    {
      "epoch": 2.75,
      "learning_rate": 0.001,
      "loss": 3.2334,
      "step": 14316
    },
    {
      "epoch": 2.75,
      "learning_rate": 0.001,
      "loss": 3.2351,
      "step": 14328
    },
    {
      "epoch": 2.75,
      "learning_rate": 0.001,
      "loss": 3.2212,
      "step": 14340
    },
    {
      "epoch": 2.76,
      "learning_rate": 0.001,
      "loss": 3.2223,
      "step": 14352
    },
    {
      "epoch": 2.76,
      "learning_rate": 0.001,
      "loss": 3.2268,
      "step": 14364
    },
    {
      "epoch": 2.76,
      "eval_ag_news_accuracy": 0.25671875,
      "eval_ag_news_bleu_score": 2.9228933524795804,
      "eval_ag_news_bleu_score_sem": 0.09653019308025049,
      "eval_ag_news_emb_cos_sim": 0.6249127388000488,
      "eval_ag_news_emb_cos_sim_sem": 0.01130702612669007,
      "eval_ag_news_emb_top1_equal": 0.109375,
      "eval_ag_news_emb_top1_equal_sem": 0.027695207821224692,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.3742289543151855,
      "eval_ag_news_n_ngrams_match_1": 9.716,
      "eval_ag_news_n_ngrams_match_2": 1.65,
      "eval_ag_news_n_ngrams_match_3": 0.366,
      "eval_ag_news_num_pred_words": 44.996,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 79.37861141465126,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.22996264353717513,
      "eval_ag_news_runtime": 11.1257,
      "eval_ag_news_samples_per_second": 44.941,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.26167208160282823,
      "eval_ag_news_token_set_f1_sem": 0.0041832893715841825,
      "eval_ag_news_token_set_precision": 0.22363620007574808,
      "eval_ag_news_token_set_recall": 0.34746237249697737,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 14375
    },
    {
      "epoch": 2.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.08171875,
      "eval_anthropic_toxic_prompts_bleu_score": 1.8160183436173238,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0799183565327385,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4794783592224121,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011927572616562186,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.034400463104248,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.716,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.812,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.228,
      "eval_anthropic_toxic_prompts_num_pred_words": 45.804,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 56.50903083143656,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1293065098026931,
      "eval_anthropic_toxic_prompts_runtime": 10.1683,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.173,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.23886608170548068,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005991189123483841,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.2483372762205062,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2699679507572635,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 14375
    },
    {
      "epoch": 2.76,
      "eval_arxiv_accuracy": 0.2880625,
      "eval_arxiv_bleu_score": 2.5976802419398055,
      "eval_arxiv_bleu_score_sem": 0.07470704319548123,
      "eval_arxiv_emb_cos_sim": 0.5267419219017029,
      "eval_arxiv_emb_cos_sim_sem": 0.010388814226487224,
      "eval_arxiv_emb_top1_equal": 0.15625,
      "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.1842041015625,
      "eval_arxiv_n_ngrams_match_1": 9.898,
      "eval_arxiv_n_ngrams_match_2": 1.492,
      "eval_arxiv_n_ngrams_match_3": 0.218,
      "eval_arxiv_num_pred_words": 37.536,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 65.64123636340126,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.23095370274952293,
      "eval_arxiv_runtime": 11.0414,
      "eval_arxiv_samples_per_second": 45.284,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.23907810694460827,
      "eval_arxiv_token_set_f1_sem": 0.004133082865537206,
      "eval_arxiv_token_set_precision": 0.17881862594052503,
      "eval_arxiv_token_set_recall": 0.3997951484317853,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 14375
    },
    {
      "epoch": 2.76,
      "eval_python_code_alpaca_accuracy": 0.111875,
      "eval_python_code_alpaca_bleu_score": 2.3207171005376215,
      "eval_python_code_alpaca_bleu_score_sem": 0.07783894784708693,
      "eval_python_code_alpaca_emb_cos_sim": 0.42236027121543884,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010205397727212894,
      "eval_python_code_alpaca_emb_top1_equal": 0.0234375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.9959185123443604,
      "eval_python_code_alpaca_n_ngrams_match_1": 5.32,
      "eval_python_code_alpaca_n_ngrams_match_2": 0.822,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.176,
      "eval_python_code_alpaca_num_pred_words": 37.104,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 54.375762502464724,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.1815364623681976,
      "eval_python_code_alpaca_runtime": 11.6638,
      "eval_python_code_alpaca_samples_per_second": 42.868,
      "eval_python_code_alpaca_steps_per_second": 0.086,
      "eval_python_code_alpaca_token_set_f1": 0.2983793633896166,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005223507485486749,
      "eval_python_code_alpaca_token_set_precision": 0.26527427441961166,
      "eval_python_code_alpaca_token_set_recall": 0.38938111330511516,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 14375
    },
    {
      "epoch": 2.76,
      "eval_wikibio_accuracy": 0.2723125,
      "eval_wikibio_bleu_score": 4.344365844835519,
      "eval_wikibio_bleu_score_sem": 0.16470560340428783,
      "eval_wikibio_emb_cos_sim": 0.6137693524360657,
      "eval_wikibio_emb_cos_sim_sem": 0.01276244505073531,
      "eval_wikibio_emb_top1_equal": 0.0625,
      "eval_wikibio_emb_top1_equal_sem": 0.02147948148198014,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.514110088348389,
      "eval_wikibio_n_ngrams_match_1": 8.652,
      "eval_wikibio_n_ngrams_match_2": 2.51,
      "eval_wikibio_n_ngrams_match_3": 0.79,
      "eval_wikibio_num_pred_words": 36.946,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 91.2962842338586,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.28470175904362316,
      "eval_wikibio_runtime": 10.714,
      "eval_wikibio_samples_per_second": 46.668,
      "eval_wikibio_steps_per_second": 0.093,
      "eval_wikibio_token_set_f1": 0.2786252165977963,
      "eval_wikibio_token_set_f1_sem": 0.005705800909168101,
      "eval_wikibio_token_set_precision": 0.2789999887690485,
      "eval_wikibio_token_set_recall": 0.2914470857535824,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 14375
    },
    {
      "epoch": 2.76,
      "eval_nq_accuracy": 0.44321875,
      "eval_nq_bleu_score": 7.142864121366401,
      "eval_nq_bleu_score_sem": 0.3259829282669103,
      "eval_nq_emb_cos_sim": 0.6839103698730469,
      "eval_nq_emb_cos_sim_sem": 0.011409034670976592,
      "eval_nq_emb_top1_equal": 0.1171875,
      "eval_nq_emb_top1_equal_sem": 0.02854125312152025,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.8703842163085938,
      "eval_nq_n_ngrams_match_1": 18.118,
      "eval_nq_n_ngrams_match_2": 5.434,
      "eval_nq_n_ngrams_match_3": 2.086,
      "eval_nq_num_pred_words": 48.448,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 17.64379593183887,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3374483407607677,
      "eval_nq_runtime": 19.6069,
      "eval_nq_samples_per_second": 25.501,
      "eval_nq_steps_per_second": 0.051,
      "eval_nq_token_set_f1": 0.36932619798037547,
      "eval_nq_token_set_f1_sem": 0.004742159484697793,
      "eval_nq_token_set_precision": 0.3094702412379923,
      "eval_nq_token_set_recall": 0.4798212823065362,
      "eval_nq_true_num_tokens": 64.0,
      "step": 14375
    },
    {
      "epoch": 2.76,
      "learning_rate": 0.001,
      "loss": 3.2386,
      "step": 14376
    },
    {
      "epoch": 2.76,
      "learning_rate": 0.001,
      "loss": 3.2286,
      "step": 14388
    },
    {
      "epoch": 2.76,
      "learning_rate": 0.001,
      "loss": 3.2141,
      "step": 14400
    },
    {
      "epoch": 2.77,
      "learning_rate": 0.001,
      "loss": 3.2231,
      "step": 14412
    },
    {
      "epoch": 2.77,
      "learning_rate": 0.001,
      "loss": 3.2178,
      "step": 14424
    },
    {
      "epoch": 2.77,
      "learning_rate": 0.001,
      "loss": 3.2316,
      "step": 14436
    },
    {
      "epoch": 2.77,
      "learning_rate": 0.001,
      "loss": 3.2307,
      "step": 14448
    },
    {
      "epoch": 2.78,
      "learning_rate": 0.001,
      "loss": 3.2246,
      "step": 14460
    },
    {
      "epoch": 2.78,
      "learning_rate": 0.001,
      "loss": 3.227,
      "step": 14472
    },
    {
      "epoch": 2.78,
      "learning_rate": 0.001,
      "loss": 3.2174,
      "step": 14484
    },
    {
      "epoch": 2.78,
      "learning_rate": 0.001,
      "loss": 3.2122,
      "step": 14496
    },
    {
      "epoch": 2.79,
      "learning_rate": 0.001,
      "loss": 3.2225,
      "step": 14508
    },
    {
      "epoch": 2.79,
      "learning_rate": 0.001,
      "loss": 3.2192,
      "step": 14520
    },
    {
      "epoch": 2.79,
      "learning_rate": 0.001,
      "loss": 3.2307,
      "step": 14532
    },
    {
      "epoch": 2.79,
      "learning_rate": 0.001,
      "loss": 3.221,
      "step": 14544
    },
    {
      "epoch": 2.79,
      "learning_rate": 0.001,
      "loss": 3.2284,
      "step": 14556
    },
    {
      "epoch": 2.8,
      "learning_rate": 0.001,
      "loss": 3.2116,
      "step": 14568
    },
    {
      "epoch": 2.8,
      "learning_rate": 0.001,
      "loss": 3.2036,
      "step": 14580
    },
    {
      "epoch": 2.8,
      "learning_rate": 0.001,
      "loss": 3.222,
      "step": 14592
    },
    {
      "epoch": 2.8,
      "learning_rate": 0.001,
      "loss": 3.2189,
      "step": 14604
    },
    {
      "epoch": 2.81,
      "learning_rate": 0.001,
      "loss": 3.2201,
      "step": 14616
    },
    {
      "epoch": 2.81,
      "learning_rate": 0.001,
      "loss": 3.214,
      "step": 14628
    },
    {
      "epoch": 2.81,
      "learning_rate": 0.001,
      "loss": 3.2172,
      "step": 14640
    },
    {
      "epoch": 2.81,
      "learning_rate": 0.001,
      "loss": 3.2111,
      "step": 14652
    },
    {
      "epoch": 2.82,
      "learning_rate": 0.001,
      "loss": 3.2105,
      "step": 14664
    },
    {
      "epoch": 2.82,
      "learning_rate": 0.001,
      "loss": 3.2106,
      "step": 14676
    },
    {
      "epoch": 2.82,
      "learning_rate": 0.001,
      "loss": 3.2145,
      "step": 14688
    },
    {
      "epoch": 2.82,
      "learning_rate": 0.001,
      "loss": 3.2146,
      "step": 14700
    },
    {
      "epoch": 2.82,
      "learning_rate": 0.001,
      "loss": 3.2141,
      "step": 14712
    },
    {
      "epoch": 2.83,
      "learning_rate": 0.001,
      "loss": 3.2206,
      "step": 14724
    },
    {
      "epoch": 2.83,
      "learning_rate": 0.001,
      "loss": 3.2162,
      "step": 14736
    },
    {
      "epoch": 2.83,
      "learning_rate": 0.001,
      "loss": 3.2175,
      "step": 14748
    },
    {
      "epoch": 2.83,
      "learning_rate": 0.001,
      "loss": 3.2075,
      "step": 14760
    },
    {
      "epoch": 2.84,
      "learning_rate": 0.001,
      "loss": 3.2282,
      "step": 14772
    },
    {
      "epoch": 2.84,
      "learning_rate": 0.001,
      "loss": 3.2177,
      "step": 14784
    },
    {
      "epoch": 2.84,
      "learning_rate": 0.001,
      "loss": 3.2168,
      "step": 14796
    },
    {
      "epoch": 2.84,
      "learning_rate": 0.001,
      "loss": 3.211,
      "step": 14808
    },
    {
      "epoch": 2.85,
      "learning_rate": 0.001,
      "loss": 3.221,
      "step": 14820
    },
    {
      "epoch": 2.85,
      "learning_rate": 0.001,
      "loss": 3.2264,
      "step": 14832
    },
    {
      "epoch": 2.85,
      "learning_rate": 0.001,
      "loss": 3.2003,
      "step": 14844
    },
    {
      "epoch": 2.85,
      "learning_rate": 0.001,
      "loss": 3.2169,
      "step": 14856
    },
    {
      "epoch": 2.85,
      "learning_rate": 0.001,
      "loss": 3.2082,
      "step": 14868
    },
    {
      "epoch": 2.86,
      "learning_rate": 0.001,
      "loss": 3.2176,
      "step": 14880
    },
    {
      "epoch": 2.86,
      "learning_rate": 0.001,
      "loss": 3.1955,
      "step": 14892
    },
    {
      "epoch": 2.86,
      "learning_rate": 0.001,
      "loss": 3.2079,
      "step": 14904
    },
    {
      "epoch": 2.86,
      "learning_rate": 0.001,
      "loss": 3.2158,
      "step": 14916
    },
    {
      "epoch": 2.87,
      "learning_rate": 0.001,
      "loss": 3.2051,
      "step": 14928
    },
    {
      "epoch": 2.87,
      "learning_rate": 0.001,
      "loss": 3.2095,
      "step": 14940
    },
    {
      "epoch": 2.87,
      "learning_rate": 0.001,
      "loss": 3.2042,
      "step": 14952
    },
    {
      "epoch": 2.87,
      "learning_rate": 0.001,
      "loss": 3.2064,
      "step": 14964
    },
    {
      "epoch": 2.88,
      "learning_rate": 0.001,
      "loss": 3.2085,
      "step": 14976
    },
    {
      "epoch": 2.88,
      "learning_rate": 0.001,
      "loss": 3.205,
      "step": 14988
    },
    {
      "epoch": 2.88,
      "learning_rate": 0.001,
      "loss": 3.2067,
      "step": 15000
    },
    {
      "epoch": 2.88,
      "eval_ag_news_accuracy": 0.25921875,
      "eval_ag_news_bleu_score": 3.1146728829454373,
      "eval_ag_news_bleu_score_sem": 0.11705073789729323,
      "eval_ag_news_emb_cos_sim": 0.6343845129013062,
      "eval_ag_news_emb_cos_sim_sem": 0.01061068182249365,
      "eval_ag_news_emb_top1_equal": 0.1171875,
      "eval_ag_news_emb_top1_equal_sem": 0.02854125312152025,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.330489158630371,
      "eval_ag_news_n_ngrams_match_1": 9.858,
      "eval_ag_news_n_ngrams_match_2": 1.73,
      "eval_ag_news_n_ngrams_match_3": 0.418,
      "eval_ag_news_num_pred_words": 44.542,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 75.98144445970051,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.23708583069714723,
      "eval_ag_news_runtime": 11.0115,
      "eval_ag_news_samples_per_second": 45.407,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.26458062076987976,
      "eval_ag_news_token_set_f1_sem": 0.00436743089470579,
      "eval_ag_news_token_set_precision": 0.22692343709668275,
      "eval_ag_news_token_set_recall": 0.3478912418817946,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 15000
    },
    {
      "epoch": 2.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.0831875,
      "eval_anthropic_toxic_prompts_bleu_score": 1.9228351405685142,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0778504739820073,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5017146468162537,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012301013450703946,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 4.030600547790527,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.848,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.858,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.254,
      "eval_anthropic_toxic_prompts_num_pred_words": 44.712,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 56.29470876056559,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.13774657993685357,
      "eval_anthropic_toxic_prompts_runtime": 10.4249,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.962,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.23852053641150797,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0058289375314690695,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.25924826553107483,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2587102409923538,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 15000
    },
    {
      "epoch": 2.88,
      "eval_arxiv_accuracy": 0.288625,
      "eval_arxiv_bleu_score": 2.6195801248021557,
      "eval_arxiv_bleu_score_sem": 0.08564188071800778,
      "eval_arxiv_emb_cos_sim": 0.5326679348945618,
      "eval_arxiv_emb_cos_sim_sem": 0.009802819591315437,
      "eval_arxiv_emb_top1_equal": 0.15625,
      "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.175990581512451,
      "eval_arxiv_n_ngrams_match_1": 10.0,
      "eval_arxiv_n_ngrams_match_2": 1.562,
      "eval_arxiv_n_ngrams_match_3": 0.238,
      "eval_arxiv_num_pred_words": 37.344,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 65.10429884452482,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.23304834470136332,
      "eval_arxiv_runtime": 11.1299,
      "eval_arxiv_samples_per_second": 44.924,
      "eval_arxiv_steps_per_second": 0.09,
      "eval_arxiv_token_set_f1": 0.23857942787074587,
      "eval_arxiv_token_set_f1_sem": 0.004409098720889074,
      "eval_arxiv_token_set_precision": 0.1782094588867365,
      "eval_arxiv_token_set_recall": 0.4031503223975969,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 15000
    },
    {
      "epoch": 2.88,
      "eval_python_code_alpaca_accuracy": 0.1154375,
      "eval_python_code_alpaca_bleu_score": 2.602421954645876,
      "eval_python_code_alpaca_bleu_score_sem": 0.08119990895067912,
      "eval_python_code_alpaca_emb_cos_sim": 0.4306205213069916,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009464545033899954,
      "eval_python_code_alpaca_emb_top1_equal": 0.03125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.015439349450344106,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.8729326725006104,
      "eval_python_code_alpaca_n_ngrams_match_1": 5.614,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.004,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.236,
      "eval_python_code_alpaca_num_pred_words": 36.152,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 48.08319176565465,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.1942210687228555,
      "eval_python_code_alpaca_runtime": 10.6812,
      "eval_python_code_alpaca_samples_per_second": 46.811,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.3129188630928717,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00527457308292601,
      "eval_python_code_alpaca_token_set_precision": 0.2849465040152018,
      "eval_python_code_alpaca_token_set_recall": 0.3965390098016386,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 15000
    },
    {
      "epoch": 2.88,
      "eval_wikibio_accuracy": 0.2720625,
      "eval_wikibio_bleu_score": 4.657730721407051,
      "eval_wikibio_bleu_score_sem": 0.19389034286767964,
      "eval_wikibio_emb_cos_sim": 0.6262930631637573,
      "eval_wikibio_emb_cos_sim_sem": 0.01195884557815727,
      "eval_wikibio_emb_top1_equal": 0.1171875,
      "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.540160655975342,
      "eval_wikibio_n_ngrams_match_1": 8.852,
      "eval_wikibio_n_ngrams_match_2": 2.612,
      "eval_wikibio_n_ngrams_match_3": 0.84,
      "eval_wikibio_num_pred_words": 36.068,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 93.70585331550829,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2899552860106226,
      "eval_wikibio_runtime": 10.6219,
      "eval_wikibio_samples_per_second": 47.073,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.2817308915432452,
      "eval_wikibio_token_set_f1_sem": 0.005963242266797947,
      "eval_wikibio_token_set_precision": 0.28220718950235946,
      "eval_wikibio_token_set_recall": 0.2949596795865249,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 15000
    },
    {
      "epoch": 2.88,
      "eval_nq_accuracy": 0.44459375,
      "eval_nq_bleu_score": 7.449978514961194,
      "eval_nq_bleu_score_sem": 0.32287563581044465,
      "eval_nq_emb_cos_sim": 0.6952831745147705,
      "eval_nq_emb_cos_sim_sem": 0.011135441038046156,
      "eval_nq_emb_top1_equal": 0.1640625,
      "eval_nq_emb_top1_equal_sem": 0.03286167651298939,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.845280170440674,
      "eval_nq_n_ngrams_match_1": 18.454,
      "eval_nq_n_ngrams_match_2": 5.65,
      "eval_nq_n_ngrams_match_3": 2.206,
      "eval_nq_num_pred_words": 48.112,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 17.20637871238331,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3457408374951795,
      "eval_nq_runtime": 11.4399,
      "eval_nq_samples_per_second": 43.706,
      "eval_nq_steps_per_second": 0.087,
      "eval_nq_token_set_f1": 0.37587631562377505,
      "eval_nq_token_set_f1_sem": 0.004711899644655883,
      "eval_nq_token_set_precision": 0.3183044542889795,
      "eval_nq_token_set_recall": 0.4760862717565977,
      "eval_nq_true_num_tokens": 64.0,
      "step": 15000
    },
    {
      "epoch": 2.88,
      "learning_rate": 0.001,
      "loss": 3.2003,
      "step": 15012
    },
    {
      "epoch": 2.88,
      "learning_rate": 0.001,
      "loss": 3.206,
      "step": 15024
    },
    {
      "epoch": 2.89,
      "learning_rate": 0.001,
      "loss": 3.2072,
      "step": 15036
    },
    {
      "epoch": 2.89,
      "learning_rate": 0.001,
      "loss": 3.207,
      "step": 15048
    },
    {
      "epoch": 2.89,
      "learning_rate": 0.001,
      "loss": 3.2047,
      "step": 15060
    },
    {
      "epoch": 2.89,
      "learning_rate": 0.001,
      "loss": 3.2034,
      "step": 15072
    },
    {
      "epoch": 2.9,
      "learning_rate": 0.001,
      "loss": 3.2035,
      "step": 15084
    },
    {
      "epoch": 2.9,
      "learning_rate": 0.001,
      "loss": 3.2006,
      "step": 15096
    },
    {
      "epoch": 2.9,
      "learning_rate": 0.001,
      "loss": 3.2006,
      "step": 15108
    },
    {
      "epoch": 2.9,
      "learning_rate": 0.001,
      "loss": 3.2184,
      "step": 15120
    },
    {
      "epoch": 2.91,
      "learning_rate": 0.001,
      "loss": 3.2001,
      "step": 15132
    },
    {
      "epoch": 2.91,
      "learning_rate": 0.001,
      "loss": 3.1975,
      "step": 15144
    },
    {
      "epoch": 2.91,
      "learning_rate": 0.001,
      "loss": 3.2084,
      "step": 15156
    },
    {
      "epoch": 2.91,
      "learning_rate": 0.001,
      "loss": 3.2001,
      "step": 15168
    },
    {
      "epoch": 2.91,
      "learning_rate": 0.001,
      "loss": 3.1977,
      "step": 15180
    },
    {
      "epoch": 2.92,
      "learning_rate": 0.001,
      "loss": 3.1937,
      "step": 15192
    },
    {
      "epoch": 2.92,
      "learning_rate": 0.001,
      "loss": 3.1947,
      "step": 15204
    },
    {
      "epoch": 2.92,
      "learning_rate": 0.001,
      "loss": 3.2067,
      "step": 15216
    },
    {
      "epoch": 2.92,
      "learning_rate": 0.001,
      "loss": 3.2051,
      "step": 15228
    },
    {
      "epoch": 2.93,
      "learning_rate": 0.001,
      "loss": 3.2091,
      "step": 15240
    },
    {
      "epoch": 2.93,
      "learning_rate": 0.001,
      "loss": 3.1982,
      "step": 15252
    },
    {
      "epoch": 2.93,
      "learning_rate": 0.001,
      "loss": 3.1982,
      "step": 15264
    },
    {
      "epoch": 2.93,
      "learning_rate": 0.001,
      "loss": 3.1999,
      "step": 15276
    },
    {
      "epoch": 2.94,
      "learning_rate": 0.001,
      "loss": 3.1946,
      "step": 15288
    },
    {
      "epoch": 2.94,
      "learning_rate": 0.001,
      "loss": 3.2004,
      "step": 15300
    },
    {
      "epoch": 2.94,
      "learning_rate": 0.001,
      "loss": 3.1929,
      "step": 15312
    },
    {
      "epoch": 2.94,
      "learning_rate": 0.001,
      "loss": 3.1967,
      "step": 15324
    },
    {
      "epoch": 2.94,
      "learning_rate": 0.001,
      "loss": 3.2117,
      "step": 15336
    },
    {
      "epoch": 2.95,
      "learning_rate": 0.001,
      "loss": 3.209,
      "step": 15348
    },
    {
      "epoch": 2.95,
      "learning_rate": 0.001,
      "loss": 3.1961,
      "step": 15360
    },
    {
      "epoch": 2.95,
      "learning_rate": 0.001,
      "loss": 3.192,
      "step": 15372
    },
    {
      "epoch": 2.95,
      "learning_rate": 0.001,
      "loss": 3.1932,
      "step": 15384
    },
    {
      "epoch": 2.96,
      "learning_rate": 0.001,
      "loss": 3.1985,
      "step": 15396
    },
    {
      "epoch": 2.96,
      "learning_rate": 0.001,
      "loss": 3.1908,
      "step": 15408
    },
    {
      "epoch": 2.96,
      "learning_rate": 0.001,
      "loss": 3.2049,
      "step": 15420
    },
    {
      "epoch": 2.96,
      "learning_rate": 0.001,
      "loss": 3.2007,
      "step": 15432
    },
    {
      "epoch": 2.97,
      "learning_rate": 0.001,
      "loss": 3.1823,
      "step": 15444
    },
    {
      "epoch": 2.97,
      "learning_rate": 0.001,
      "loss": 3.1852,
      "step": 15456
    },
    {
      "epoch": 2.97,
      "learning_rate": 0.001,
      "loss": 3.1955,
      "step": 15468
    },
    {
      "epoch": 2.97,
      "learning_rate": 0.001,
      "loss": 3.197,
      "step": 15480
    },
    {
      "epoch": 2.97,
      "learning_rate": 0.001,
      "loss": 3.2003,
      "step": 15492
    },
    {
      "epoch": 2.98,
      "learning_rate": 0.001,
      "loss": 3.197,
      "step": 15504
    },
    {
      "epoch": 2.98,
      "learning_rate": 0.001,
      "loss": 3.191,
      "step": 15516
    },
    {
      "epoch": 2.98,
      "learning_rate": 0.001,
      "loss": 3.1927,
      "step": 15528
    },
    {
      "epoch": 2.98,
      "learning_rate": 0.001,
      "loss": 3.1963,
      "step": 15540
    },
    {
      "epoch": 2.99,
      "learning_rate": 0.001,
      "loss": 3.1868,
      "step": 15552
    },
    {
      "epoch": 2.99,
      "learning_rate": 0.001,
      "loss": 3.1884,
      "step": 15564
    },
    {
      "epoch": 2.99,
      "learning_rate": 0.001,
      "loss": 3.1942,
      "step": 15576
    },
    {
      "epoch": 2.99,
      "learning_rate": 0.001,
      "loss": 3.2021,
      "step": 15588
    },
    {
      "epoch": 3.0,
      "learning_rate": 0.001,
      "loss": 3.1847,
      "step": 15600
    },
    {
      "epoch": 3.0,
      "learning_rate": 0.001,
      "loss": 3.1873,
      "step": 15612
    },
    {
      "epoch": 3.0,
      "learning_rate": 0.001,
      "loss": 3.1711,
      "step": 15624
    },
    {
      "epoch": 3.0,
      "eval_ag_news_accuracy": 0.2603125,
      "eval_ag_news_bleu_score": 3.268182348171603,
      "eval_ag_news_bleu_score_sem": 0.11873960172756134,
      "eval_ag_news_emb_cos_sim": 0.6460250020027161,
      "eval_ag_news_emb_cos_sim_sem": 0.011290181875264888,
      "eval_ag_news_emb_top1_equal": 0.0859375,
      "eval_ag_news_emb_top1_equal_sem": 0.02487009666300537,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.309754848480225,
      "eval_ag_news_n_ngrams_match_1": 10.272,
      "eval_ag_news_n_ngrams_match_2": 1.886,
      "eval_ag_news_n_ngrams_match_3": 0.466,
      "eval_ag_news_num_pred_words": 46.156,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 74.42224197807555,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2440984893400114,
      "eval_ag_news_runtime": 10.637,
      "eval_ag_news_samples_per_second": 47.006,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.27347343185235684,
      "eval_ag_news_token_set_f1_sem": 0.004372880692145891,
      "eval_ag_news_token_set_precision": 0.23585201604261652,
      "eval_ag_news_token_set_recall": 0.35244043449096946,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 15625
    },
    {
      "epoch": 3.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.08459375,
      "eval_anthropic_toxic_prompts_bleu_score": 1.9100364629326811,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07250582354424716,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5081263184547424,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011363577977768578,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.03125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015439349450344106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.976088285446167,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.004,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.932,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.27,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.7,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 53.3080997782062,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.13961901513372155,
      "eval_anthropic_toxic_prompts_runtime": 10.1073,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.469,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2496937537485495,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0057900691088243975,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.27453209078491864,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.26925618875136736,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 15625
    },
    {
      "epoch": 3.0,
      "eval_arxiv_accuracy": 0.29284375,
      "eval_arxiv_bleu_score": 2.696982060820797,
      "eval_arxiv_bleu_score_sem": 0.08248124219938613,
      "eval_arxiv_emb_cos_sim": 0.5395243167877197,
      "eval_arxiv_emb_cos_sim_sem": 0.010061520240197845,
      "eval_arxiv_emb_top1_equal": 0.1796875,
      "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.148216247558594,
      "eval_arxiv_n_ngrams_match_1": 10.214,
      "eval_arxiv_n_ngrams_match_2": 1.606,
      "eval_arxiv_n_ngrams_match_3": 0.254,
      "eval_arxiv_num_pred_words": 38.316,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 63.32095060153439,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2366294890814875,
      "eval_arxiv_runtime": 17.6163,
      "eval_arxiv_samples_per_second": 28.383,
      "eval_arxiv_steps_per_second": 0.057,
      "eval_arxiv_token_set_f1": 0.2455057804067272,
      "eval_arxiv_token_set_f1_sem": 0.004128954099926646,
      "eval_arxiv_token_set_precision": 0.18625161758459255,
      "eval_arxiv_token_set_recall": 0.39564808691839587,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 15625
    },
    {
      "epoch": 3.0,
      "eval_python_code_alpaca_accuracy": 0.11765625,
      "eval_python_code_alpaca_bleu_score": 2.795027764789367,
      "eval_python_code_alpaca_bleu_score_sem": 0.08584239624961469,
      "eval_python_code_alpaca_emb_cos_sim": 0.46135619282722473,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009966544951256578,
      "eval_python_code_alpaca_emb_top1_equal": 0.0078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.8526418209075928,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.012,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.094,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.278,
      "eval_python_code_alpaca_num_pred_words": 36.364,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 47.1173746204497,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2102465222751838,
      "eval_python_code_alpaca_runtime": 9.8792,
      "eval_python_code_alpaca_samples_per_second": 50.612,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.3280570871663888,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0049616657410164735,
      "eval_python_code_alpaca_token_set_precision": 0.30581218258487314,
      "eval_python_code_alpaca_token_set_recall": 0.3993076825225803,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 15625
    },
    {
      "epoch": 3.0,
      "eval_wikibio_accuracy": 0.26859375,
      "eval_wikibio_bleu_score": 4.3952367121655245,
      "eval_wikibio_bleu_score_sem": 0.16632628594435483,
      "eval_wikibio_emb_cos_sim": 0.6318996548652649,
      "eval_wikibio_emb_cos_sim_sem": 0.012864224421533055,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.579071521759033,
      "eval_wikibio_n_ngrams_match_1": 8.778,
      "eval_wikibio_n_ngrams_match_2": 2.546,
      "eval_wikibio_n_ngrams_match_3": 0.784,
      "eval_wikibio_num_pred_words": 36.46,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 97.42389623305286,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2871338496619885,
      "eval_wikibio_runtime": 10.4046,
      "eval_wikibio_samples_per_second": 48.056,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.2791240929968217,
      "eval_wikibio_token_set_f1_sem": 0.005736262864538556,
      "eval_wikibio_token_set_precision": 0.27854953209470507,
      "eval_wikibio_token_set_recall": 0.2935683917036409,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 15625
    },
    {
      "epoch": 3.0,
      "eval_nq_accuracy": 0.44878125,
      "eval_nq_bleu_score": 7.387878440285374,
      "eval_nq_bleu_score_sem": 0.29598220306382333,
      "eval_nq_emb_cos_sim": 0.7005928754806519,
      "eval_nq_emb_cos_sim_sem": 0.011368274717199511,
      "eval_nq_emb_top1_equal": 0.1796875,
      "eval_nq_emb_top1_equal_sem": 0.034068008879424266,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.824287176132202,
      "eval_nq_n_ngrams_match_1": 18.572,
      "eval_nq_n_ngrams_match_2": 5.672,
      "eval_nq_n_ngrams_match_3": 2.136,
      "eval_nq_num_pred_words": 48.676,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 16.848930384725364,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3483729051240489,
      "eval_nq_runtime": 10.8993,
      "eval_nq_samples_per_second": 45.875,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.377098468533167,
      "eval_nq_token_set_f1_sem": 0.004663483861263476,
      "eval_nq_token_set_precision": 0.319404770270989,
      "eval_nq_token_set_recall": 0.4766853399137281,
      "eval_nq_true_num_tokens": 64.0,
      "step": 15625
    },
    {
      "epoch": 3.0,
      "learning_rate": 0.001,
      "loss": 3.1806,
      "step": 15636
    },
    {
      "epoch": 3.0,
      "learning_rate": 0.001,
      "loss": 3.1734,
      "step": 15648
    },
    {
      "epoch": 3.01,
      "learning_rate": 0.001,
      "loss": 3.1738,
      "step": 15660
    },
    {
      "epoch": 3.01,
      "learning_rate": 0.001,
      "loss": 3.1834,
      "step": 15672
    },
    {
      "epoch": 3.01,
      "learning_rate": 0.001,
      "loss": 3.1724,
      "step": 15684
    },
    {
      "epoch": 3.01,
      "learning_rate": 0.001,
      "loss": 3.1817,
      "step": 15696
    },
    {
      "epoch": 3.02,
      "learning_rate": 0.001,
      "loss": 3.176,
      "step": 15708
    },
    {
      "epoch": 3.02,
      "learning_rate": 0.001,
      "loss": 3.1563,
      "step": 15720
    },
    {
      "epoch": 3.02,
      "learning_rate": 0.001,
      "loss": 3.1622,
      "step": 15732
    },
    {
      "epoch": 3.02,
      "learning_rate": 0.001,
      "loss": 3.1693,
      "step": 15744
    },
    {
      "epoch": 3.03,
      "learning_rate": 0.001,
      "loss": 3.1809,
      "step": 15756
    },
    {
      "epoch": 3.03,
      "learning_rate": 0.001,
      "loss": 3.1824,
      "step": 15768
    },
    {
      "epoch": 3.03,
      "learning_rate": 0.001,
      "loss": 3.1667,
      "step": 15780
    },
    {
      "epoch": 3.03,
      "learning_rate": 0.001,
      "loss": 3.1679,
      "step": 15792
    },
    {
      "epoch": 3.03,
      "learning_rate": 0.001,
      "loss": 3.1847,
      "step": 15804
    },
    {
      "epoch": 3.04,
      "learning_rate": 0.001,
      "loss": 3.1623,
      "step": 15816
    },
    {
      "epoch": 3.04,
      "learning_rate": 0.001,
      "loss": 3.1641,
      "step": 15828
    },
    {
      "epoch": 3.04,
      "learning_rate": 0.001,
      "loss": 3.1671,
      "step": 15840
    },
    {
      "epoch": 3.04,
      "learning_rate": 0.001,
      "loss": 3.1782,
      "step": 15852
    },
    {
      "epoch": 3.05,
      "learning_rate": 0.001,
      "loss": 3.1654,
      "step": 15864
    },
    {
      "epoch": 3.05,
      "learning_rate": 0.001,
      "loss": 3.1616,
      "step": 15876
    },
    {
      "epoch": 3.05,
      "learning_rate": 0.001,
      "loss": 3.1711,
      "step": 15888
    },
    {
      "epoch": 3.05,
      "learning_rate": 0.001,
      "loss": 3.1624,
      "step": 15900
    },
    {
      "epoch": 3.06,
      "learning_rate": 0.001,
      "loss": 3.1787,
      "step": 15912
    },
    {
      "epoch": 3.06,
      "learning_rate": 0.001,
      "loss": 3.1705,
      "step": 15924
    },
    {
      "epoch": 3.06,
      "learning_rate": 0.001,
      "loss": 3.1592,
      "step": 15936
    },
    {
      "epoch": 3.06,
      "learning_rate": 0.001,
      "loss": 3.1644,
      "step": 15948
    },
    {
      "epoch": 3.06,
      "learning_rate": 0.001,
      "loss": 3.1611,
      "step": 15960
    },
    {
      "epoch": 3.07,
      "learning_rate": 0.001,
      "loss": 3.1743,
      "step": 15972
    },
    {
      "epoch": 3.07,
      "learning_rate": 0.001,
      "loss": 3.1627,
      "step": 15984
    },
    {
      "epoch": 3.07,
      "learning_rate": 0.001,
      "loss": 3.1836,
      "step": 15996
    },
    {
      "epoch": 3.07,
      "learning_rate": 0.001,
      "loss": 3.1657,
      "step": 16008
    },
    {
      "epoch": 3.08,
      "learning_rate": 0.001,
      "loss": 3.159,
      "step": 16020
    },
    {
      "epoch": 3.08,
      "learning_rate": 0.001,
      "loss": 3.1646,
      "step": 16032
    },
    {
      "epoch": 3.08,
      "learning_rate": 0.001,
      "loss": 3.1568,
      "step": 16044
    },
    {
      "epoch": 3.08,
      "learning_rate": 0.001,
      "loss": 3.1673,
      "step": 16056
    },
    {
      "epoch": 3.09,
      "learning_rate": 0.001,
      "loss": 3.1648,
      "step": 16068
    },
    {
      "epoch": 3.09,
      "learning_rate": 0.001,
      "loss": 3.1641,
      "step": 16080
    },
    {
      "epoch": 3.09,
      "learning_rate": 0.001,
      "loss": 3.1565,
      "step": 16092
    },
    {
      "epoch": 3.09,
      "learning_rate": 0.001,
      "loss": 3.1479,
      "step": 16104
    },
    {
      "epoch": 3.09,
      "learning_rate": 0.001,
      "loss": 3.1622,
      "step": 16116
    },
    {
      "epoch": 3.1,
      "learning_rate": 0.001,
      "loss": 3.1675,
      "step": 16128
    },
    {
      "epoch": 3.1,
      "learning_rate": 0.001,
      "loss": 3.179,
      "step": 16140
    },
    {
      "epoch": 3.1,
      "learning_rate": 0.001,
      "loss": 3.1674,
      "step": 16152
    },
    {
      "epoch": 3.1,
      "learning_rate": 0.001,
      "loss": 3.159,
      "step": 16164
    },
    {
      "epoch": 3.11,
      "learning_rate": 0.001,
      "loss": 3.1536,
      "step": 16176
    },
    {
      "epoch": 3.11,
      "learning_rate": 0.001,
      "loss": 3.1582,
      "step": 16188
    },
    {
      "epoch": 3.11,
      "learning_rate": 0.001,
      "loss": 3.1621,
      "step": 16200
    },
    {
      "epoch": 3.11,
      "learning_rate": 0.001,
      "loss": 3.1679,
      "step": 16212
    },
    {
      "epoch": 3.12,
      "learning_rate": 0.001,
      "loss": 3.158,
      "step": 16224
    },
    {
      "epoch": 3.12,
      "learning_rate": 0.001,
      "loss": 3.1478,
      "step": 16236
    },
    {
      "epoch": 3.12,
      "learning_rate": 0.001,
      "loss": 3.1548,
      "step": 16248
    },
    {
      "epoch": 3.12,
      "eval_ag_news_accuracy": 0.2639375,
      "eval_ag_news_bleu_score": 3.1814057161788645,
      "eval_ag_news_bleu_score_sem": 0.11700248963789446,
      "eval_ag_news_emb_cos_sim": 0.6401171088218689,
      "eval_ag_news_emb_cos_sim_sem": 0.011685395507929573,
      "eval_ag_news_emb_top1_equal": 0.1015625,
      "eval_ag_news_emb_top1_equal_sem": 0.026804565886848545,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.290400505065918,
      "eval_ag_news_n_ngrams_match_1": 9.948,
      "eval_ag_news_n_ngrams_match_2": 1.774,
      "eval_ag_news_n_ngrams_match_3": 0.438,
      "eval_ag_news_num_pred_words": 44.398,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 72.99569779275832,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.23894300331323687,
      "eval_ag_news_runtime": 10.5754,
      "eval_ag_news_samples_per_second": 47.28,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.26816691817397154,
      "eval_ag_news_token_set_f1_sem": 0.004438205105594491,
      "eval_ag_news_token_set_precision": 0.22896652228378245,
      "eval_ag_news_token_set_recall": 0.357512889401445,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 16250
    },
    {
      "epoch": 3.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.08340625,
      "eval_anthropic_toxic_prompts_bleu_score": 1.936660342823424,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08287166779429654,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5021185874938965,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012252040129823304,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.9689595699310303,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.944,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.858,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.246,
      "eval_anthropic_toxic_prompts_num_pred_words": 44.708,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 52.92943280839483,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1385605902191907,
      "eval_anthropic_toxic_prompts_runtime": 10.123,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.392,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.25415028179892357,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005881566021876812,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.2693754887360548,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2811603069470758,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 16250
    },
    {
      "epoch": 3.12,
      "eval_arxiv_accuracy": 0.29425,
      "eval_arxiv_bleu_score": 2.5639858200829724,
      "eval_arxiv_bleu_score_sem": 0.07629532420099573,
      "eval_arxiv_emb_cos_sim": 0.5470216274261475,
      "eval_arxiv_emb_cos_sim_sem": 0.009226406120190291,
      "eval_arxiv_emb_top1_equal": 0.203125,
      "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.116957187652588,
      "eval_arxiv_n_ngrams_match_1": 9.904,
      "eval_arxiv_n_ngrams_match_2": 1.54,
      "eval_arxiv_n_ngrams_match_3": 0.236,
      "eval_arxiv_num_pred_words": 36.468,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 61.372213732635416,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.23360398366144824,
      "eval_arxiv_runtime": 10.4672,
      "eval_arxiv_samples_per_second": 47.768,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.24120323309675257,
      "eval_arxiv_token_set_f1_sem": 0.004272308361307677,
      "eval_arxiv_token_set_precision": 0.17836754131634255,
      "eval_arxiv_token_set_recall": 0.42066364898546915,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 16250
    },
    {
      "epoch": 3.12,
      "eval_python_code_alpaca_accuracy": 0.1181875,
      "eval_python_code_alpaca_bleu_score": 2.6655154167411714,
      "eval_python_code_alpaca_bleu_score_sem": 0.08819318507159991,
      "eval_python_code_alpaca_emb_cos_sim": 0.4437231421470642,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00913315226659289,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.8443946838378906,
      "eval_python_code_alpaca_n_ngrams_match_1": 5.638,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.044,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.264,
      "eval_python_code_alpaca_num_pred_words": 36.442,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 46.73038912819738,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.19533444208456124,
      "eval_python_code_alpaca_runtime": 10.6622,
      "eval_python_code_alpaca_samples_per_second": 46.894,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.3157780856226203,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005365401957471515,
      "eval_python_code_alpaca_token_set_precision": 0.28295346829435153,
      "eval_python_code_alpaca_token_set_recall": 0.408937565343303,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 16250
    },
    {
      "epoch": 3.12,
      "eval_wikibio_accuracy": 0.2725,
      "eval_wikibio_bleu_score": 4.485603617690607,
      "eval_wikibio_bleu_score_sem": 0.17564340019764493,
      "eval_wikibio_emb_cos_sim": 0.6322147846221924,
      "eval_wikibio_emb_cos_sim_sem": 0.012563671293534822,
      "eval_wikibio_emb_top1_equal": 0.078125,
      "eval_wikibio_emb_top1_equal_sem": 0.023813825516515504,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.520080089569092,
      "eval_wikibio_n_ngrams_match_1": 8.686,
      "eval_wikibio_n_ngrams_match_2": 2.554,
      "eval_wikibio_n_ngrams_match_3": 0.8,
      "eval_wikibio_num_pred_words": 36.01,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 91.84295334616633,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2866793295831782,
      "eval_wikibio_runtime": 11.7506,
      "eval_wikibio_samples_per_second": 42.551,
      "eval_wikibio_steps_per_second": 0.085,
      "eval_wikibio_token_set_f1": 0.2830691048525941,
      "eval_wikibio_token_set_f1_sem": 0.005442402733291205,
      "eval_wikibio_token_set_precision": 0.2795208030179828,
      "eval_wikibio_token_set_recall": 0.3099275265012032,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 16250
    },
    {
      "epoch": 3.12,
      "eval_nq_accuracy": 0.4508125,
      "eval_nq_bleu_score": 7.534441912378916,
      "eval_nq_bleu_score_sem": 0.3328234962053897,
      "eval_nq_emb_cos_sim": 0.7037692070007324,
      "eval_nq_emb_cos_sim_sem": 0.010573599181249144,
      "eval_nq_emb_top1_equal": 0.1875,
      "eval_nq_emb_top1_equal_sem": 0.034634623208270626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.8061931133270264,
      "eval_nq_n_ngrams_match_1": 18.704,
      "eval_nq_n_ngrams_match_2": 5.722,
      "eval_nq_n_ngrams_match_3": 2.24,
      "eval_nq_num_pred_words": 48.152,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 16.54680634856935,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3485770031977412,
      "eval_nq_runtime": 11.3821,
      "eval_nq_samples_per_second": 43.929,
      "eval_nq_steps_per_second": 0.088,
      "eval_nq_token_set_f1": 0.382374129631992,
      "eval_nq_token_set_f1_sem": 0.00479487791611988,
      "eval_nq_token_set_precision": 0.32179220544616005,
      "eval_nq_token_set_recall": 0.49008944412534033,
      "eval_nq_true_num_tokens": 64.0,
      "step": 16250
    },
    {
      "epoch": 3.12,
      "learning_rate": 0.001,
      "loss": 3.1707,
      "step": 16260
    },
    {
      "epoch": 3.12,
      "learning_rate": 0.001,
      "loss": 3.1629,
      "step": 16272
    },
    {
      "epoch": 3.13,
      "learning_rate": 0.001,
      "loss": 3.1492,
      "step": 16284
    },
    {
      "epoch": 3.13,
      "learning_rate": 0.001,
      "loss": 3.1674,
      "step": 16296
    },
    {
      "epoch": 3.13,
      "learning_rate": 0.001,
      "loss": 3.1614,
      "step": 16308
    },
    {
      "epoch": 3.13,
      "learning_rate": 0.001,
      "loss": 3.1557,
      "step": 16320
    },
    {
      "epoch": 3.14,
      "learning_rate": 0.001,
      "loss": 3.1557,
      "step": 16332
    },
    {
      "epoch": 3.14,
      "learning_rate": 0.001,
      "loss": 3.1664,
      "step": 16344
    },
    {
      "epoch": 3.14,
      "learning_rate": 0.001,
      "loss": 3.1599,
      "step": 16356
    },
    {
      "epoch": 3.14,
      "learning_rate": 0.001,
      "loss": 3.1541,
      "step": 16368
    },
    {
      "epoch": 3.15,
      "learning_rate": 0.001,
      "loss": 3.153,
      "step": 16380
    },
    {
      "epoch": 3.15,
      "learning_rate": 0.001,
      "loss": 3.1648,
      "step": 16392
    },
    {
      "epoch": 3.15,
      "learning_rate": 0.001,
      "loss": 3.1364,
      "step": 16404
    },
    {
      "epoch": 3.15,
      "learning_rate": 0.001,
      "loss": 3.155,
      "step": 16416
    },
    {
      "epoch": 3.15,
      "learning_rate": 0.001,
      "loss": 3.1549,
      "step": 16428
    },
    {
      "epoch": 3.16,
      "learning_rate": 0.001,
      "loss": 3.149,
      "step": 16440
    },
    {
      "epoch": 3.16,
      "learning_rate": 0.001,
      "loss": 3.1526,
      "step": 16452
    },
    {
      "epoch": 3.16,
      "learning_rate": 0.001,
      "loss": 3.1595,
      "step": 16464
    },
    {
      "epoch": 3.16,
      "learning_rate": 0.001,
      "loss": 3.1534,
      "step": 16476
    },
    {
      "epoch": 3.17,
      "learning_rate": 0.001,
      "loss": 3.1485,
      "step": 16488
    },
    {
      "epoch": 3.17,
      "learning_rate": 0.001,
      "loss": 3.1539,
      "step": 16500
    },
    {
      "epoch": 3.17,
      "learning_rate": 0.001,
      "loss": 3.1537,
      "step": 16512
    },
    {
      "epoch": 3.17,
      "learning_rate": 0.001,
      "loss": 3.1413,
      "step": 16524
    },
    {
      "epoch": 3.18,
      "learning_rate": 0.001,
      "loss": 3.1461,
      "step": 16536
    },
    {
      "epoch": 3.18,
      "learning_rate": 0.001,
      "loss": 3.1507,
      "step": 16548
    },
    {
      "epoch": 3.18,
      "learning_rate": 0.001,
      "loss": 3.1534,
      "step": 16560
    },
    {
      "epoch": 3.18,
      "learning_rate": 0.001,
      "loss": 3.1528,
      "step": 16572
    },
    {
      "epoch": 3.18,
      "learning_rate": 0.001,
      "loss": 3.1506,
      "step": 16584
    },
    {
      "epoch": 3.19,
      "learning_rate": 0.001,
      "loss": 3.154,
      "step": 16596
    },
    {
      "epoch": 3.19,
      "learning_rate": 0.001,
      "loss": 3.1533,
      "step": 16608
    },
    {
      "epoch": 3.19,
      "learning_rate": 0.001,
      "loss": 3.1509,
      "step": 16620
    },
    {
      "epoch": 3.19,
      "learning_rate": 0.001,
      "loss": 3.1583,
      "step": 16632
    },
    {
      "epoch": 3.2,
      "learning_rate": 0.001,
      "loss": 3.1663,
      "step": 16644
    },
    {
      "epoch": 3.2,
      "learning_rate": 0.001,
      "loss": 3.1597,
      "step": 16656
    },
    {
      "epoch": 3.2,
      "learning_rate": 0.001,
      "loss": 3.1477,
      "step": 16668
    },
    {
      "epoch": 3.2,
      "learning_rate": 0.001,
      "loss": 3.146,
      "step": 16680
    },
    {
      "epoch": 3.21,
      "learning_rate": 0.001,
      "loss": 3.1472,
      "step": 16692
    },
    {
      "epoch": 3.21,
      "learning_rate": 0.001,
      "loss": 3.1509,
      "step": 16704
    },
    {
      "epoch": 3.21,
      "learning_rate": 0.001,
      "loss": 3.151,
      "step": 16716
    },
    {
      "epoch": 3.21,
      "learning_rate": 0.001,
      "loss": 3.1417,
      "step": 16728
    },
    {
      "epoch": 3.21,
      "learning_rate": 0.001,
      "loss": 3.1468,
      "step": 16740
    },
    {
      "epoch": 3.22,
      "learning_rate": 0.001,
      "loss": 3.1415,
      "step": 16752
    },
    {
      "epoch": 3.22,
      "learning_rate": 0.001,
      "loss": 3.1549,
      "step": 16764
    },
    {
      "epoch": 3.22,
      "learning_rate": 0.001,
      "loss": 3.1339,
      "step": 16776
    },
    {
      "epoch": 3.22,
      "learning_rate": 0.001,
      "loss": 3.1514,
      "step": 16788
    },
    {
      "epoch": 3.23,
      "learning_rate": 0.001,
      "loss": 3.1544,
      "step": 16800
    },
    {
      "epoch": 3.23,
      "learning_rate": 0.001,
      "loss": 3.1501,
      "step": 16812
    },
    {
      "epoch": 3.23,
      "learning_rate": 0.001,
      "loss": 3.1483,
      "step": 16824
    },
    {
      "epoch": 3.23,
      "learning_rate": 0.001,
      "loss": 3.1459,
      "step": 16836
    },
    {
      "epoch": 3.24,
      "learning_rate": 0.001,
      "loss": 3.1415,
      "step": 16848
    },
    {
      "epoch": 3.24,
      "learning_rate": 0.001,
      "loss": 3.1496,
      "step": 16860
    },
    {
      "epoch": 3.24,
      "learning_rate": 0.001,
      "loss": 3.1354,
      "step": 16872
    },
    {
      "epoch": 3.24,
      "eval_ag_news_accuracy": 0.266,
      "eval_ag_news_bleu_score": 3.303569834022567,
      "eval_ag_news_bleu_score_sem": 0.12057156154320481,
      "eval_ag_news_emb_cos_sim": 0.6400679349899292,
      "eval_ag_news_emb_cos_sim_sem": 0.01229078493740596,
      "eval_ag_news_emb_top1_equal": 0.1171875,
      "eval_ag_news_emb_top1_equal_sem": 0.02854125312152025,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.255241394042969,
      "eval_ag_news_n_ngrams_match_1": 10.288,
      "eval_ag_news_n_ngrams_match_2": 1.854,
      "eval_ag_news_n_ngrams_match_3": 0.478,
      "eval_ag_news_num_pred_words": 44.574,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 70.47382709735112,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.24354215636578458,
      "eval_ag_news_runtime": 10.7818,
      "eval_ag_news_samples_per_second": 46.375,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.2760969694638503,
      "eval_ag_news_token_set_f1_sem": 0.004610602165884031,
      "eval_ag_news_token_set_precision": 0.23593322373411124,
      "eval_ag_news_token_set_recall": 0.3631982850513387,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 16875
    },
    {
      "epoch": 3.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.0853125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.9637353748833446,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07792872705331727,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5194891691207886,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012998227692195411,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.9574742317199707,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.016,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.934,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.276,
      "eval_anthropic_toxic_prompts_num_pred_words": 45.818,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 52.32499808427228,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.13946636146453192,
      "eval_anthropic_toxic_prompts_runtime": 9.9617,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.192,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.262844829612984,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00598170911421396,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.27494351000750017,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.29621014492748643,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 16875
    },
    {
      "epoch": 3.24,
      "eval_arxiv_accuracy": 0.29409375,
      "eval_arxiv_bleu_score": 2.7263133784274958,
      "eval_arxiv_bleu_score_sem": 0.08284921304694123,
      "eval_arxiv_emb_cos_sim": 0.5507416129112244,
      "eval_arxiv_emb_cos_sim_sem": 0.009650875592622109,
      "eval_arxiv_emb_top1_equal": 0.15625,
      "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.089741230010986,
      "eval_arxiv_n_ngrams_match_1": 10.364,
      "eval_arxiv_n_ngrams_match_2": 1.562,
      "eval_arxiv_n_ngrams_match_3": 0.266,
      "eval_arxiv_num_pred_words": 38.226,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 59.724434813001274,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2396616667609068,
      "eval_arxiv_runtime": 10.2405,
      "eval_arxiv_samples_per_second": 48.826,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.25243020170923214,
      "eval_arxiv_token_set_f1_sem": 0.004041100372068284,
      "eval_arxiv_token_set_precision": 0.18850721985936827,
      "eval_arxiv_token_set_recall": 0.41718000763134117,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 16875
    },
    {
      "epoch": 3.24,
      "eval_python_code_alpaca_accuracy": 0.11846875,
      "eval_python_code_alpaca_bleu_score": 2.7778647195056383,
      "eval_python_code_alpaca_bleu_score_sem": 0.08894850527051816,
      "eval_python_code_alpaca_emb_cos_sim": 0.47885242104530334,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009847563739365815,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.8238985538482666,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.128,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.15,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.298,
      "eval_python_code_alpaca_num_pred_words": 37.428,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 45.78234579634366,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2096884216673367,
      "eval_python_code_alpaca_runtime": 19.5585,
      "eval_python_code_alpaca_samples_per_second": 25.564,
      "eval_python_code_alpaca_steps_per_second": 0.051,
      "eval_python_code_alpaca_token_set_f1": 0.3299429882816963,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005303511076664485,
      "eval_python_code_alpaca_token_set_precision": 0.30877943636792743,
      "eval_python_code_alpaca_token_set_recall": 0.39720967992619755,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 16875
    },
    {
      "epoch": 3.24,
      "eval_wikibio_accuracy": 0.27390625,
      "eval_wikibio_bleu_score": 4.70871177857877,
      "eval_wikibio_bleu_score_sem": 0.1792521367134163,
      "eval_wikibio_emb_cos_sim": 0.6363104581832886,
      "eval_wikibio_emb_cos_sim_sem": 0.011500971435558028,
      "eval_wikibio_emb_top1_equal": 0.0625,
      "eval_wikibio_emb_top1_equal_sem": 0.02147948148198014,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.529972076416016,
      "eval_wikibio_n_ngrams_match_1": 8.762,
      "eval_wikibio_n_ngrams_match_2": 2.618,
      "eval_wikibio_n_ngrams_match_3": 0.834,
      "eval_wikibio_num_pred_words": 36.068,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 92.75597096680391,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2894653343321614,
      "eval_wikibio_runtime": 10.2644,
      "eval_wikibio_samples_per_second": 48.712,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.2810842430578845,
      "eval_wikibio_token_set_f1_sem": 0.005703281804297346,
      "eval_wikibio_token_set_precision": 0.2791533800186806,
      "eval_wikibio_token_set_recall": 0.298001260891474,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 16875
    },
    {
      "epoch": 3.24,
      "eval_nq_accuracy": 0.453625,
      "eval_nq_bleu_score": 7.799947295850558,
      "eval_nq_bleu_score_sem": 0.34341544883029496,
      "eval_nq_emb_cos_sim": 0.7063493728637695,
      "eval_nq_emb_cos_sim_sem": 0.010792150347113368,
      "eval_nq_emb_top1_equal": 0.125,
      "eval_nq_emb_top1_equal_sem": 0.02934655822437397,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.7839088439941406,
      "eval_nq_n_ngrams_match_1": 18.848,
      "eval_nq_n_ngrams_match_2": 5.8,
      "eval_nq_n_ngrams_match_3": 2.364,
      "eval_nq_num_pred_words": 48.476,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 16.182150988611035,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3511610581123732,
      "eval_nq_runtime": 11.575,
      "eval_nq_samples_per_second": 43.196,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.38475317847984153,
      "eval_nq_token_set_f1_sem": 0.004827815823807924,
      "eval_nq_token_set_precision": 0.3250383639370381,
      "eval_nq_token_set_recall": 0.492457164008117,
      "eval_nq_true_num_tokens": 64.0,
      "step": 16875
    },
    {
      "epoch": 3.24,
      "learning_rate": 0.001,
      "loss": 3.1462,
      "step": 16884
    },
    {
      "epoch": 3.24,
      "learning_rate": 0.001,
      "loss": 3.1494,
      "step": 16896
    },
    {
      "epoch": 3.25,
      "learning_rate": 0.001,
      "loss": 3.14,
      "step": 16908
    },
    {
      "epoch": 3.25,
      "learning_rate": 0.001,
      "loss": 3.154,
      "step": 16920
    },
    {
      "epoch": 3.25,
      "learning_rate": 0.001,
      "loss": 3.1333,
      "step": 16932
    },
    {
      "epoch": 3.25,
      "learning_rate": 0.001,
      "loss": 3.1471,
      "step": 16944
    },
    {
      "epoch": 3.26,
      "learning_rate": 0.001,
      "loss": 3.1428,
      "step": 16956
    },
    {
      "epoch": 3.26,
      "learning_rate": 0.001,
      "loss": 3.1304,
      "step": 16968
    },
    {
      "epoch": 3.26,
      "learning_rate": 0.001,
      "loss": 3.1343,
      "step": 16980
    },
    {
      "epoch": 3.26,
      "learning_rate": 0.001,
      "loss": 3.144,
      "step": 16992
    },
    {
      "epoch": 3.26,
      "learning_rate": 0.001,
      "loss": 3.1491,
      "step": 17004
    },
    {
      "epoch": 3.27,
      "learning_rate": 0.001,
      "loss": 3.1504,
      "step": 17016
    },
    {
      "epoch": 3.27,
      "learning_rate": 0.001,
      "loss": 3.1335,
      "step": 17028
    },
    {
      "epoch": 3.27,
      "learning_rate": 0.001,
      "loss": 3.136,
      "step": 17040
    },
    {
      "epoch": 3.27,
      "learning_rate": 0.001,
      "loss": 3.1514,
      "step": 17052
    },
    {
      "epoch": 3.28,
      "learning_rate": 0.001,
      "loss": 3.1317,
      "step": 17064
    },
    {
      "epoch": 3.28,
      "learning_rate": 0.001,
      "loss": 3.128,
      "step": 17076
    },
    {
      "epoch": 3.28,
      "learning_rate": 0.001,
      "loss": 3.1309,
      "step": 17088
    },
    {
      "epoch": 3.28,
      "learning_rate": 0.001,
      "loss": 3.1312,
      "step": 17100
    },
    {
      "epoch": 3.29,
      "learning_rate": 0.001,
      "loss": 3.1446,
      "step": 17112
    },
    {
      "epoch": 3.29,
      "learning_rate": 0.001,
      "loss": 3.1496,
      "step": 17124
    },
    {
      "epoch": 3.29,
      "learning_rate": 0.001,
      "loss": 3.1473,
      "step": 17136
    },
    {
      "epoch": 3.29,
      "learning_rate": 0.001,
      "loss": 3.1476,
      "step": 17148
    },
    {
      "epoch": 3.29,
      "learning_rate": 0.001,
      "loss": 3.1391,
      "step": 17160
    },
    {
      "epoch": 3.3,
      "learning_rate": 0.001,
      "loss": 3.1305,
      "step": 17172
    },
    {
      "epoch": 3.3,
      "learning_rate": 0.001,
      "loss": 3.1441,
      "step": 17184
    },
    {
      "epoch": 3.3,
      "learning_rate": 0.001,
      "loss": 3.1377,
      "step": 17196
    },
    {
      "epoch": 3.3,
      "learning_rate": 0.001,
      "loss": 3.1193,
      "step": 17208
    },
    {
      "epoch": 3.31,
      "learning_rate": 0.001,
      "loss": 3.1296,
      "step": 17220
    },
    {
      "epoch": 3.31,
      "learning_rate": 0.001,
      "loss": 3.1363,
      "step": 17232
    },
    {
      "epoch": 3.31,
      "learning_rate": 0.001,
      "loss": 3.1334,
      "step": 17244
    },
    {
      "epoch": 3.31,
      "learning_rate": 0.001,
      "loss": 3.1382,
      "step": 17256
    },
    {
      "epoch": 3.32,
      "learning_rate": 0.001,
      "loss": 3.1419,
      "step": 17268
    },
    {
      "epoch": 3.32,
      "learning_rate": 0.001,
      "loss": 3.1289,
      "step": 17280
    },
    {
      "epoch": 3.32,
      "learning_rate": 0.001,
      "loss": 3.1375,
      "step": 17292
    },
    {
      "epoch": 3.32,
      "learning_rate": 0.001,
      "loss": 3.1312,
      "step": 17304
    },
    {
      "epoch": 3.32,
      "learning_rate": 0.001,
      "loss": 3.1386,
      "step": 17316
    },
    {
      "epoch": 3.33,
      "learning_rate": 0.001,
      "loss": 3.1428,
      "step": 17328
    },
    {
      "epoch": 3.33,
      "learning_rate": 0.001,
      "loss": 3.1167,
      "step": 17340
    },
    {
      "epoch": 3.33,
      "learning_rate": 0.001,
      "loss": 3.1223,
      "step": 17352
    },
    {
      "epoch": 3.33,
      "learning_rate": 0.001,
      "loss": 3.1361,
      "step": 17364
    },
    {
      "epoch": 3.34,
      "learning_rate": 0.001,
      "loss": 3.1261,
      "step": 17376
    },
    {
      "epoch": 3.34,
      "learning_rate": 0.001,
      "loss": 3.1137,
      "step": 17388
    },
    {
      "epoch": 3.34,
      "learning_rate": 0.001,
      "loss": 3.125,
      "step": 17400
    },
    {
      "epoch": 3.34,
      "learning_rate": 0.001,
      "loss": 3.1294,
      "step": 17412
    },
    {
      "epoch": 3.35,
      "learning_rate": 0.001,
      "loss": 3.1268,
      "step": 17424
    },
    {
      "epoch": 3.35,
      "learning_rate": 0.001,
      "loss": 3.1255,
      "step": 17436
    },
    {
      "epoch": 3.35,
      "learning_rate": 0.001,
      "loss": 3.1188,
      "step": 17448
    },
    {
      "epoch": 3.35,
      "learning_rate": 0.001,
      "loss": 3.1168,
      "step": 17460
    },
    {
      "epoch": 3.35,
      "learning_rate": 0.001,
      "loss": 3.1214,
      "step": 17472
    },
    {
      "epoch": 3.36,
      "learning_rate": 0.001,
      "loss": 3.1382,
      "step": 17484
    },
    {
      "epoch": 3.36,
      "learning_rate": 0.001,
      "loss": 3.134,
      "step": 17496
    },
    {
      "epoch": 3.36,
      "eval_ag_news_accuracy": 0.26496875,
      "eval_ag_news_bleu_score": 3.2441573621282522,
      "eval_ag_news_bleu_score_sem": 0.12257956427677824,
      "eval_ag_news_emb_cos_sim": 0.641579270362854,
      "eval_ag_news_emb_cos_sim_sem": 0.011090903672971286,
      "eval_ag_news_emb_top1_equal": 0.1328125,
      "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.234482288360596,
      "eval_ag_news_n_ngrams_match_1": 10.064,
      "eval_ag_news_n_ngrams_match_2": 1.832,
      "eval_ag_news_n_ngrams_match_3": 0.45,
      "eval_ag_news_num_pred_words": 44.004,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 69.02593395214409,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.24424833659897957,
      "eval_ag_news_runtime": 10.9964,
      "eval_ag_news_samples_per_second": 45.469,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.2700086157637954,
      "eval_ag_news_token_set_f1_sem": 0.004523843277511473,
      "eval_ag_news_token_set_precision": 0.232610492572598,
      "eval_ag_news_token_set_recall": 0.3508298426646458,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 17500
    },
    {
      "epoch": 3.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.08559375,
      "eval_anthropic_toxic_prompts_bleu_score": 1.9210587842054136,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07415382958891022,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5235103368759155,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011947159047318608,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.9652905464172363,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.028,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.902,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.244,
      "eval_anthropic_toxic_prompts_num_pred_words": 44.714,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 52.735589300499036,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.14236314841014502,
      "eval_anthropic_toxic_prompts_runtime": 13.2336,
      "eval_anthropic_toxic_prompts_samples_per_second": 37.782,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.076,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.25788811721480054,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005946434149208624,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.2731150146252883,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.28795132078417035,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 17500
    },
    {
      "epoch": 3.36,
      "eval_arxiv_accuracy": 0.293375,
      "eval_arxiv_bleu_score": 2.589116157824733,
      "eval_arxiv_bleu_score_sem": 0.07323929283421372,
      "eval_arxiv_emb_cos_sim": 0.5551767349243164,
      "eval_arxiv_emb_cos_sim_sem": 0.009073357742357076,
      "eval_arxiv_emb_top1_equal": 0.1640625,
      "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.081957817077637,
      "eval_arxiv_n_ngrams_match_1": 10.096,
      "eval_arxiv_n_ngrams_match_2": 1.508,
      "eval_arxiv_n_ngrams_match_3": 0.228,
      "eval_arxiv_num_pred_words": 37.246,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 59.26137928853457,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.23836521185924064,
      "eval_arxiv_runtime": 12.7021,
      "eval_arxiv_samples_per_second": 39.363,
      "eval_arxiv_steps_per_second": 0.079,
      "eval_arxiv_token_set_f1": 0.2476084861650361,
      "eval_arxiv_token_set_f1_sem": 0.0041366555700042525,
      "eval_arxiv_token_set_precision": 0.18490047755050426,
      "eval_arxiv_token_set_recall": 0.4137503932332973,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 17500
    },
    {
      "epoch": 3.36,
      "eval_python_code_alpaca_accuracy": 0.11753125,
      "eval_python_code_alpaca_bleu_score": 2.813648902263996,
      "eval_python_code_alpaca_bleu_score_sem": 0.09021030212828986,
      "eval_python_code_alpaca_emb_cos_sim": 0.4602062404155731,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01032508885008326,
      "eval_python_code_alpaca_emb_top1_equal": 0.0234375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.79595685005188,
      "eval_python_code_alpaca_n_ngrams_match_1": 5.726,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.02,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.254,
      "eval_python_code_alpaca_num_pred_words": 34.27,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 44.52081577612871,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.20778673161659977,
      "eval_python_code_alpaca_runtime": 9.9902,
      "eval_python_code_alpaca_samples_per_second": 50.049,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.3189752979213727,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005422379906865857,
      "eval_python_code_alpaca_token_set_precision": 0.28872587777116143,
      "eval_python_code_alpaca_token_set_recall": 0.399636632201648,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 17500
    },
    {
      "epoch": 3.36,
      "eval_wikibio_accuracy": 0.2739375,
      "eval_wikibio_bleu_score": 4.387326107726963,
      "eval_wikibio_bleu_score_sem": 0.17358579823292636,
      "eval_wikibio_emb_cos_sim": 0.6526084542274475,
      "eval_wikibio_emb_cos_sim_sem": 0.012171598918173215,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.484732627868652,
      "eval_wikibio_n_ngrams_match_1": 8.788,
      "eval_wikibio_n_ngrams_match_2": 2.584,
      "eval_wikibio_n_ngrams_match_3": 0.79,
      "eval_wikibio_num_pred_words": 36.746,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 88.65324423700044,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.28826452861928664,
      "eval_wikibio_runtime": 19.9755,
      "eval_wikibio_samples_per_second": 25.031,
      "eval_wikibio_steps_per_second": 0.05,
      "eval_wikibio_token_set_f1": 0.2780186223496822,
      "eval_wikibio_token_set_f1_sem": 0.006014848414323814,
      "eval_wikibio_token_set_precision": 0.2802470958934345,
      "eval_wikibio_token_set_recall": 0.2878118735590263,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 17500
    },
    {
      "epoch": 3.36,
      "eval_nq_accuracy": 0.45309375,
      "eval_nq_bleu_score": 7.424779607535211,
      "eval_nq_bleu_score_sem": 0.33447787501293735,
      "eval_nq_emb_cos_sim": 0.7163922786712646,
      "eval_nq_emb_cos_sim_sem": 0.009680097846619334,
      "eval_nq_emb_top1_equal": 0.171875,
      "eval_nq_emb_top1_equal_sem": 0.03347745514062371,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.7687456607818604,
      "eval_nq_n_ngrams_match_1": 18.592,
      "eval_nq_n_ngrams_match_2": 5.604,
      "eval_nq_n_ngrams_match_3": 2.178,
      "eval_nq_num_pred_words": 47.842,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 15.93862901843315,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3510003130491518,
      "eval_nq_runtime": 11.0912,
      "eval_nq_samples_per_second": 45.081,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.37989460490433385,
      "eval_nq_token_set_f1_sem": 0.004829807244000491,
      "eval_nq_token_set_precision": 0.3192371971832584,
      "eval_nq_token_set_recall": 0.4916528337544238,
      "eval_nq_true_num_tokens": 64.0,
      "step": 17500
    },
    {
      "epoch": 3.36,
      "learning_rate": 0.001,
      "loss": 3.1351,
      "step": 17508
    },
    {
      "epoch": 3.36,
      "learning_rate": 0.001,
      "loss": 3.1163,
      "step": 17520
    },
    {
      "epoch": 3.37,
      "learning_rate": 0.001,
      "loss": 3.1215,
      "step": 17532
    },
    {
      "epoch": 3.37,
      "learning_rate": 0.001,
      "loss": 3.1281,
      "step": 17544
    },
    {
      "epoch": 3.37,
      "learning_rate": 0.001,
      "loss": 3.1285,
      "step": 17556
    },
    {
      "epoch": 3.37,
      "learning_rate": 0.001,
      "loss": 3.1262,
      "step": 17568
    },
    {
      "epoch": 3.38,
      "learning_rate": 0.001,
      "loss": 3.1244,
      "step": 17580
    },
    {
      "epoch": 3.38,
      "learning_rate": 0.001,
      "loss": 3.1269,
      "step": 17592
    },
    {
      "epoch": 3.38,
      "learning_rate": 0.001,
      "loss": 3.1296,
      "step": 17604
    },
    {
      "epoch": 3.38,
      "learning_rate": 0.001,
      "loss": 3.119,
      "step": 17616
    },
    {
      "epoch": 3.38,
      "learning_rate": 0.001,
      "loss": 3.1211,
      "step": 17628
    },
    {
      "epoch": 3.39,
      "learning_rate": 0.001,
      "loss": 3.1183,
      "step": 17640
    },
    {
      "epoch": 3.39,
      "learning_rate": 0.001,
      "loss": 3.1234,
      "step": 17652
    },
    {
      "epoch": 3.39,
      "learning_rate": 0.001,
      "loss": 3.1201,
      "step": 17664
    },
    {
      "epoch": 3.39,
      "learning_rate": 0.001,
      "loss": 3.128,
      "step": 17676
    },
    {
      "epoch": 3.4,
      "learning_rate": 0.001,
      "loss": 3.1192,
      "step": 17688
    },
    {
      "epoch": 3.4,
      "learning_rate": 0.001,
      "loss": 3.1265,
      "step": 17700
    },
    {
      "epoch": 3.4,
      "learning_rate": 0.001,
      "loss": 3.1276,
      "step": 17712
    },
    {
      "epoch": 3.4,
      "learning_rate": 0.001,
      "loss": 3.1162,
      "step": 17724
    },
    {
      "epoch": 3.41,
      "learning_rate": 0.001,
      "loss": 3.1165,
      "step": 17736
    },
    {
      "epoch": 3.41,
      "learning_rate": 0.001,
      "loss": 3.1288,
      "step": 17748
    },
    {
      "epoch": 3.41,
      "learning_rate": 0.001,
      "loss": 3.1183,
      "step": 17760
    },
    {
      "epoch": 3.41,
      "learning_rate": 0.001,
      "loss": 3.1217,
      "step": 17772
    },
    {
      "epoch": 3.41,
      "learning_rate": 0.001,
      "loss": 3.1272,
      "step": 17784
    },
    {
      "epoch": 3.42,
      "learning_rate": 0.001,
      "loss": 3.1272,
      "step": 17796
    },
    {
      "epoch": 3.42,
      "learning_rate": 0.001,
      "loss": 3.1151,
      "step": 17808
    },
    {
      "epoch": 3.42,
      "learning_rate": 0.001,
      "loss": 3.1173,
      "step": 17820
    },
    {
      "epoch": 3.42,
      "learning_rate": 0.001,
      "loss": 3.1184,
      "step": 17832
    },
    {
      "epoch": 3.43,
      "learning_rate": 0.001,
      "loss": 3.1184,
      "step": 17844
    },
    {
      "epoch": 3.43,
      "learning_rate": 0.001,
      "loss": 3.1173,
      "step": 17856
    },
    {
      "epoch": 3.43,
      "learning_rate": 0.001,
      "loss": 3.129,
      "step": 17868
    },
    {
      "epoch": 3.43,
      "learning_rate": 0.001,
      "loss": 3.1168,
      "step": 17880
    },
    {
      "epoch": 3.44,
      "learning_rate": 0.001,
      "loss": 3.1307,
      "step": 17892
    },
    {
      "epoch": 3.44,
      "learning_rate": 0.001,
      "loss": 3.1187,
      "step": 17904
    },
    {
      "epoch": 3.44,
      "learning_rate": 0.001,
      "loss": 3.1256,
      "step": 17916
    },
    {
      "epoch": 3.44,
      "learning_rate": 0.001,
      "loss": 3.1134,
      "step": 17928
    },
    {
      "epoch": 3.44,
      "learning_rate": 0.001,
      "loss": 3.1152,
      "step": 17940
    },
    {
      "epoch": 3.45,
      "learning_rate": 0.001,
      "loss": 3.1066,
      "step": 17952
    },
    {
      "epoch": 3.45,
      "learning_rate": 0.001,
      "loss": 3.118,
      "step": 17964
    },
    {
      "epoch": 3.45,
      "learning_rate": 0.001,
      "loss": 3.1183,
      "step": 17976
    },
    {
      "epoch": 3.45,
      "learning_rate": 0.001,
      "loss": 3.118,
      "step": 17988
    },
    {
      "epoch": 3.46,
      "learning_rate": 0.001,
      "loss": 3.1044,
      "step": 18000
    },
    {
      "epoch": 3.46,
      "learning_rate": 0.001,
      "loss": 3.1164,
      "step": 18012
    },
    {
      "epoch": 3.46,
      "learning_rate": 0.001,
      "loss": 3.1045,
      "step": 18024
    },
    {
      "epoch": 3.46,
      "learning_rate": 0.001,
      "loss": 3.1194,
      "step": 18036
    },
    {
      "epoch": 3.47,
      "learning_rate": 0.001,
      "loss": 3.1175,
      "step": 18048
    },
    {
      "epoch": 3.47,
      "learning_rate": 0.001,
      "loss": 3.1066,
      "step": 18060
    },
    {
      "epoch": 3.47,
      "learning_rate": 0.001,
      "loss": 3.1193,
      "step": 18072
    },
    {
      "epoch": 3.47,
      "learning_rate": 0.001,
      "loss": 3.1077,
      "step": 18084
    },
    {
      "epoch": 3.47,
      "learning_rate": 0.001,
      "loss": 3.1064,
      "step": 18096
    },
    {
      "epoch": 3.48,
      "learning_rate": 0.001,
      "loss": 3.1213,
      "step": 18108
    },
    {
      "epoch": 3.48,
      "learning_rate": 0.001,
      "loss": 3.1185,
      "step": 18120
    },
    {
      "epoch": 3.48,
      "eval_ag_news_accuracy": 0.26609375,
      "eval_ag_news_bleu_score": 3.3768854625270333,
      "eval_ag_news_bleu_score_sem": 0.12308929967473654,
      "eval_ag_news_emb_cos_sim": 0.6711300611495972,
      "eval_ag_news_emb_cos_sim_sem": 0.011022524362529406,
      "eval_ag_news_emb_top1_equal": 0.1328125,
      "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.211260795593262,
      "eval_ag_news_n_ngrams_match_1": 10.72,
      "eval_ag_news_n_ngrams_match_2": 1.94,
      "eval_ag_news_n_ngrams_match_3": 0.488,
      "eval_ag_news_num_pred_words": 45.73,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 67.4415161963566,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.25827675854082854,
      "eval_ag_news_runtime": 10.5988,
      "eval_ag_news_samples_per_second": 47.175,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.28390773708987077,
      "eval_ag_news_token_set_f1_sem": 0.004377102377260949,
      "eval_ag_news_token_set_precision": 0.24888274827597226,
      "eval_ag_news_token_set_recall": 0.352436476930113,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 18125
    },
    {
      "epoch": 3.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.086375,
      "eval_anthropic_toxic_prompts_bleu_score": 1.9462429816587172,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0691554359334003,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5473330020904541,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01091542461065167,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.9248125553131104,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.124,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.918,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.248,
      "eval_anthropic_toxic_prompts_num_pred_words": 45.424,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 50.64358429828718,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1488285552175686,
      "eval_anthropic_toxic_prompts_runtime": 10.3674,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.228,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2584258301129104,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005599863647917523,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.28120697096026415,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.27877339063580747,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 18125
    },
    {
      "epoch": 3.48,
      "eval_arxiv_accuracy": 0.29428125,
      "eval_arxiv_bleu_score": 2.8244090290872426,
      "eval_arxiv_bleu_score_sem": 0.08591733016095368,
      "eval_arxiv_emb_cos_sim": 0.5704166293144226,
      "eval_arxiv_emb_cos_sim_sem": 0.00989456801216759,
      "eval_arxiv_emb_top1_equal": 0.171875,
      "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.084709167480469,
      "eval_arxiv_n_ngrams_match_1": 10.464,
      "eval_arxiv_n_ngrams_match_2": 1.658,
      "eval_arxiv_n_ngrams_match_3": 0.294,
      "eval_arxiv_num_pred_words": 37.148,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 59.42465261638336,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.24941656184494082,
      "eval_arxiv_runtime": 10.5458,
      "eval_arxiv_samples_per_second": 47.412,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.2517879611899787,
      "eval_arxiv_token_set_f1_sem": 0.004217687107659366,
      "eval_arxiv_token_set_precision": 0.1921424543103267,
      "eval_arxiv_token_set_recall": 0.40228131402033,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 18125
    },
    {
      "epoch": 3.48,
      "eval_python_code_alpaca_accuracy": 0.12159375,
      "eval_python_code_alpaca_bleu_score": 2.831516167722937,
      "eval_python_code_alpaca_bleu_score_sem": 0.08430262502229126,
      "eval_python_code_alpaca_emb_cos_sim": 0.5020995140075684,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011259268478971162,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.7325639724731445,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.062,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.206,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.294,
      "eval_python_code_alpaca_num_pred_words": 36.598,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 41.78610936551627,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.21191340305227177,
      "eval_python_code_alpaca_runtime": 10.8731,
      "eval_python_code_alpaca_samples_per_second": 45.985,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.32598592610018756,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005098146004097936,
      "eval_python_code_alpaca_token_set_precision": 0.3087060931733973,
      "eval_python_code_alpaca_token_set_recall": 0.3889249388468435,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 18125
    },
    {
      "epoch": 3.48,
      "eval_wikibio_accuracy": 0.27421875,
      "eval_wikibio_bleu_score": 4.816185673964495,
      "eval_wikibio_bleu_score_sem": 0.17921660270328887,
      "eval_wikibio_emb_cos_sim": 0.648404598236084,
      "eval_wikibio_emb_cos_sim_sem": 0.012460465515332786,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.521198749542236,
      "eval_wikibio_n_ngrams_match_1": 9.024,
      "eval_wikibio_n_ngrams_match_2": 2.806,
      "eval_wikibio_n_ngrams_match_3": 0.932,
      "eval_wikibio_num_pred_words": 36.616,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 91.94575186946655,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.29600679232278354,
      "eval_wikibio_runtime": 10.3942,
      "eval_wikibio_samples_per_second": 48.104,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.286489916066079,
      "eval_wikibio_token_set_f1_sem": 0.005601602836704596,
      "eval_wikibio_token_set_precision": 0.2882670778193052,
      "eval_wikibio_token_set_recall": 0.2990335197270945,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 18125
    },
    {
      "epoch": 3.48,
      "eval_nq_accuracy": 0.455,
      "eval_nq_bleu_score": 7.91302147799092,
      "eval_nq_bleu_score_sem": 0.34844504143596244,
      "eval_nq_emb_cos_sim": 0.7135587334632874,
      "eval_nq_emb_cos_sim_sem": 0.010083692116692607,
      "eval_nq_emb_top1_equal": 0.1796875,
      "eval_nq_emb_top1_equal_sem": 0.034068008879424266,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.752685785293579,
      "eval_nq_n_ngrams_match_1": 19.176,
      "eval_nq_n_ngrams_match_2": 5.974,
      "eval_nq_n_ngrams_match_3": 2.362,
      "eval_nq_num_pred_words": 48.264,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 15.684701104012898,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.360912961980277,
      "eval_nq_runtime": 10.7574,
      "eval_nq_samples_per_second": 46.48,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.38839248311068214,
      "eval_nq_token_set_f1_sem": 0.004686219499329099,
      "eval_nq_token_set_precision": 0.3303860384492778,
      "eval_nq_token_set_recall": 0.48861715152053414,
      "eval_nq_true_num_tokens": 64.0,
      "step": 18125
    },
    {
      "epoch": 3.48,
      "learning_rate": 0.001,
      "loss": 3.1112,
      "step": 18132
    },
    {
      "epoch": 3.48,
      "learning_rate": 0.001,
      "loss": 3.1104,
      "step": 18144
    },
    {
      "epoch": 3.49,
      "learning_rate": 0.001,
      "loss": 3.1109,
      "step": 18156
    },
    {
      "epoch": 3.49,
      "learning_rate": 0.001,
      "loss": 3.1119,
      "step": 18168
    },
    {
      "epoch": 3.49,
      "learning_rate": 0.001,
      "loss": 3.1135,
      "step": 18180
    },
    {
      "epoch": 3.49,
      "learning_rate": 0.001,
      "loss": 3.1212,
      "step": 18192
    },
    {
      "epoch": 3.5,
      "learning_rate": 0.001,
      "loss": 3.1073,
      "step": 18204
    },
    {
      "epoch": 3.5,
      "learning_rate": 0.001,
      "loss": 3.099,
      "step": 18216
    },
    {
      "epoch": 3.5,
      "learning_rate": 0.001,
      "loss": 3.1044,
      "step": 18228
    },
    {
      "epoch": 3.5,
      "learning_rate": 0.001,
      "loss": 3.1012,
      "step": 18240
    },
    {
      "epoch": 3.5,
      "learning_rate": 0.001,
      "loss": 3.1142,
      "step": 18252
    },
    {
      "epoch": 3.51,
      "learning_rate": 0.001,
      "loss": 3.1098,
      "step": 18264
    },
    {
      "epoch": 3.51,
      "learning_rate": 0.001,
      "loss": 3.1116,
      "step": 18276
    },
    {
      "epoch": 3.51,
      "learning_rate": 0.001,
      "loss": 3.1114,
      "step": 18288
    },
    {
      "epoch": 3.51,
      "learning_rate": 0.001,
      "loss": 3.1079,
      "step": 18300
    },
    {
      "epoch": 3.52,
      "learning_rate": 0.001,
      "loss": 3.1098,
      "step": 18312
    },
    {
      "epoch": 3.52,
      "learning_rate": 0.001,
      "loss": 3.1181,
      "step": 18324
    },
    {
      "epoch": 3.52,
      "learning_rate": 0.001,
      "loss": 3.1233,
      "step": 18336
    },
    {
      "epoch": 3.52,
      "learning_rate": 0.001,
      "loss": 3.1056,
      "step": 18348
    },
    {
      "epoch": 3.53,
      "learning_rate": 0.001,
      "loss": 3.1071,
      "step": 18360
    },
    {
      "epoch": 3.53,
      "learning_rate": 0.001,
      "loss": 3.102,
      "step": 18372
    },
    {
      "epoch": 3.53,
      "learning_rate": 0.001,
      "loss": 3.1087,
      "step": 18384
    },
    {
      "epoch": 3.53,
      "learning_rate": 0.001,
      "loss": 3.1061,
      "step": 18396
    },
    {
      "epoch": 3.53,
      "learning_rate": 0.001,
      "loss": 3.1102,
      "step": 18408
    },
    {
      "epoch": 3.54,
      "learning_rate": 0.001,
      "loss": 3.1061,
      "step": 18420
    },
    {
      "epoch": 3.54,
      "learning_rate": 0.001,
      "loss": 3.105,
      "step": 18432
    },
    {
      "epoch": 3.54,
      "learning_rate": 0.001,
      "loss": 3.1028,
      "step": 18444
    },
    {
      "epoch": 3.54,
      "learning_rate": 0.001,
      "loss": 3.1017,
      "step": 18456
    },
    {
      "epoch": 3.55,
      "learning_rate": 0.001,
      "loss": 3.0994,
      "step": 18468
    },
    {
      "epoch": 3.55,
      "learning_rate": 0.001,
      "loss": 3.1058,
      "step": 18480
    },
    {
      "epoch": 3.55,
      "learning_rate": 0.001,
      "loss": 3.1029,
      "step": 18492
    },
    {
      "epoch": 3.55,
      "learning_rate": 0.001,
      "loss": 3.1074,
      "step": 18504
    },
    {
      "epoch": 3.56,
      "learning_rate": 0.001,
      "loss": 3.101,
      "step": 18516
    },
    {
      "epoch": 3.56,
      "learning_rate": 0.001,
      "loss": 3.1063,
      "step": 18528
    },
    {
      "epoch": 3.56,
      "learning_rate": 0.001,
      "loss": 3.0956,
      "step": 18540
    },
    {
      "epoch": 3.56,
      "learning_rate": 0.001,
      "loss": 3.1052,
      "step": 18552
    },
    {
      "epoch": 3.56,
      "learning_rate": 0.001,
      "loss": 3.0998,
      "step": 18564
    },
    {
      "epoch": 3.57,
      "learning_rate": 0.001,
      "loss": 3.0962,
      "step": 18576
    },
    {
      "epoch": 3.57,
      "learning_rate": 0.001,
      "loss": 3.1042,
      "step": 18588
    },
    {
      "epoch": 3.57,
      "learning_rate": 0.001,
      "loss": 3.0996,
      "step": 18600
    },
    {
      "epoch": 3.57,
      "learning_rate": 0.001,
      "loss": 3.0973,
      "step": 18612
    },
    {
      "epoch": 3.58,
      "learning_rate": 0.001,
      "loss": 3.1108,
      "step": 18624
    },
    {
      "epoch": 3.58,
      "learning_rate": 0.001,
      "loss": 3.1146,
      "step": 18636
    },
    {
      "epoch": 3.58,
      "learning_rate": 0.001,
      "loss": 3.1076,
      "step": 18648
    },
    {
      "epoch": 3.58,
      "learning_rate": 0.001,
      "loss": 3.1092,
      "step": 18660
    },
    {
      "epoch": 3.59,
      "learning_rate": 0.001,
      "loss": 3.111,
      "step": 18672
    },
    {
      "epoch": 3.59,
      "learning_rate": 0.001,
      "loss": 3.1088,
      "step": 18684
    },
    {
      "epoch": 3.59,
      "learning_rate": 0.001,
      "loss": 3.104,
      "step": 18696
    },
    {
      "epoch": 3.59,
      "learning_rate": 0.001,
      "loss": 3.1016,
      "step": 18708
    },
    {
      "epoch": 3.59,
      "learning_rate": 0.001,
      "loss": 3.096,
      "step": 18720
    },
    {
      "epoch": 3.6,
      "learning_rate": 0.001,
      "loss": 3.1003,
      "step": 18732
    },
    {
      "epoch": 3.6,
      "learning_rate": 0.001,
      "loss": 3.0904,
      "step": 18744
    },
    {
      "epoch": 3.6,
      "eval_ag_news_accuracy": 0.26890625,
      "eval_ag_news_bleu_score": 3.3306737212340427,
      "eval_ag_news_bleu_score_sem": 0.11451415513508868,
      "eval_ag_news_emb_cos_sim": 0.6727374792098999,
      "eval_ag_news_emb_cos_sim_sem": 0.01068550433240163,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.192138195037842,
      "eval_ag_news_n_ngrams_match_1": 10.45,
      "eval_ag_news_n_ngrams_match_2": 1.886,
      "eval_ag_news_n_ngrams_match_3": 0.448,
      "eval_ag_news_num_pred_words": 45.028,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 66.16411159612522,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2512756847015737,
      "eval_ag_news_runtime": 10.668,
      "eval_ag_news_samples_per_second": 46.869,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.27799062226419174,
      "eval_ag_news_token_set_f1_sem": 0.004415429101864606,
      "eval_ag_news_token_set_precision": 0.24184646106291946,
      "eval_ag_news_token_set_recall": 0.3554571349590701,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 18750
    },
    {
      "epoch": 3.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.088625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.0829403759575826,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08500098511040192,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5523356795310974,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011747851869066654,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.874500036239624,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.236,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.994,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.314,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.062,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 48.15861470903693,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.14965602792635907,
      "eval_anthropic_toxic_prompts_runtime": 10.0725,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.64,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2711838520748361,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005941723493908647,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.294925893721296,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2954815093920474,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 18750
    },
    {
      "epoch": 3.6,
      "eval_arxiv_accuracy": 0.29665625,
      "eval_arxiv_bleu_score": 2.902288499661674,
      "eval_arxiv_bleu_score_sem": 0.08611618824157984,
      "eval_arxiv_emb_cos_sim": 0.5800829529762268,
      "eval_arxiv_emb_cos_sim_sem": 0.008218194234803987,
      "eval_arxiv_emb_top1_equal": 0.15625,
      "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.063896656036377,
      "eval_arxiv_n_ngrams_match_1": 10.776,
      "eval_arxiv_n_ngrams_match_2": 1.746,
      "eval_arxiv_n_ngrams_match_3": 0.302,
      "eval_arxiv_num_pred_words": 38.522,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 58.20065773931756,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.254511440871516,
      "eval_arxiv_runtime": 10.3749,
      "eval_arxiv_samples_per_second": 48.193,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.2594643321632246,
      "eval_arxiv_token_set_f1_sem": 0.0039419367881206435,
      "eval_arxiv_token_set_precision": 0.19696856759428902,
      "eval_arxiv_token_set_recall": 0.4105121111739601,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 18750
    },
    {
      "epoch": 3.6,
      "eval_python_code_alpaca_accuracy": 0.12234375,
      "eval_python_code_alpaca_bleu_score": 3.0402250384275593,
      "eval_python_code_alpaca_bleu_score_sem": 0.09149842023625508,
      "eval_python_code_alpaca_emb_cos_sim": 0.5129662752151489,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010910445355626812,
      "eval_python_code_alpaca_emb_top1_equal": 0.0234375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.719010829925537,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.404,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.368,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.362,
      "eval_python_code_alpaca_num_pred_words": 38.562,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 41.223596785889626,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.21990430883874268,
      "eval_python_code_alpaca_runtime": 10.3607,
      "eval_python_code_alpaca_samples_per_second": 48.259,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.3434977363611162,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00533002193842187,
      "eval_python_code_alpaca_token_set_precision": 0.332952346210531,
      "eval_python_code_alpaca_token_set_recall": 0.3985187483902321,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 18750
    },
    {
      "epoch": 3.6,
      "eval_wikibio_accuracy": 0.28053125,
      "eval_wikibio_bleu_score": 4.86568482678552,
      "eval_wikibio_bleu_score_sem": 0.19821865716439607,
      "eval_wikibio_emb_cos_sim": 0.6397950649261475,
      "eval_wikibio_emb_cos_sim_sem": 0.011672202227256647,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.44827127456665,
      "eval_wikibio_n_ngrams_match_1": 8.874,
      "eval_wikibio_n_ngrams_match_2": 2.726,
      "eval_wikibio_n_ngrams_match_3": 0.912,
      "eval_wikibio_num_pred_words": 36.294,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 85.47904640032947,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.29585587354965814,
      "eval_wikibio_runtime": 10.0232,
      "eval_wikibio_samples_per_second": 49.884,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.2848981071731547,
      "eval_wikibio_token_set_f1_sem": 0.005397483888067579,
      "eval_wikibio_token_set_precision": 0.28393067120324256,
      "eval_wikibio_token_set_recall": 0.30555215692346377,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 18750
    },
    {
      "epoch": 3.6,
      "eval_nq_accuracy": 0.457375,
      "eval_nq_bleu_score": 7.63792443550545,
      "eval_nq_bleu_score_sem": 0.3387526478308195,
      "eval_nq_emb_cos_sim": 0.7198264598846436,
      "eval_nq_emb_cos_sim_sem": 0.009967450449955053,
      "eval_nq_emb_top1_equal": 0.1796875,
      "eval_nq_emb_top1_equal_sem": 0.034068008879424266,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.742191791534424,
      "eval_nq_n_ngrams_match_1": 19.086,
      "eval_nq_n_ngrams_match_2": 5.89,
      "eval_nq_n_ngrams_match_3": 2.254,
      "eval_nq_num_pred_words": 48.316,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 15.520966565712516,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.36122749924215347,
      "eval_nq_runtime": 10.5748,
      "eval_nq_samples_per_second": 47.282,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.3885224715429432,
      "eval_nq_token_set_f1_sem": 0.004676462222611935,
      "eval_nq_token_set_precision": 0.3311734521359436,
      "eval_nq_token_set_recall": 0.4873702363118083,
      "eval_nq_true_num_tokens": 64.0,
      "step": 18750
    },
    {
      "epoch": 3.6,
      "learning_rate": 0.001,
      "loss": 3.0942,
      "step": 18756
    },
    {
      "epoch": 3.6,
      "learning_rate": 0.001,
      "loss": 3.1,
      "step": 18768
    },
    {
      "epoch": 3.61,
      "learning_rate": 0.001,
      "loss": 3.0996,
      "step": 18780
    },
    {
      "epoch": 3.61,
      "learning_rate": 0.001,
      "loss": 3.1098,
      "step": 18792
    },
    {
      "epoch": 3.61,
      "learning_rate": 0.001,
      "loss": 3.1113,
      "step": 18804
    },
    {
      "epoch": 3.61,
      "learning_rate": 0.001,
      "loss": 3.0989,
      "step": 18816
    },
    {
      "epoch": 3.62,
      "learning_rate": 0.001,
      "loss": 3.1004,
      "step": 18828
    },
    {
      "epoch": 3.62,
      "learning_rate": 0.001,
      "loss": 3.0931,
      "step": 18840
    },
    {
      "epoch": 3.62,
      "learning_rate": 0.001,
      "loss": 3.0959,
      "step": 18852
    },
    {
      "epoch": 3.62,
      "learning_rate": 0.001,
      "loss": 3.1116,
      "step": 18864
    },
    {
      "epoch": 3.62,
      "learning_rate": 0.001,
      "loss": 3.0983,
      "step": 18876
    },
    {
      "epoch": 3.63,
      "learning_rate": 0.001,
      "loss": 3.1,
      "step": 18888
    },
    {
      "epoch": 3.63,
      "learning_rate": 0.001,
      "loss": 3.1089,
      "step": 18900
    },
    {
      "epoch": 3.63,
      "learning_rate": 0.001,
      "loss": 3.0966,
      "step": 18912
    },
    {
      "epoch": 3.63,
      "learning_rate": 0.001,
      "loss": 3.1052,
      "step": 18924
    },
    {
      "epoch": 3.64,
      "learning_rate": 0.001,
      "loss": 3.0952,
      "step": 18936
    },
    {
      "epoch": 3.64,
      "learning_rate": 0.001,
      "loss": 3.1044,
      "step": 18948
    },
    {
      "epoch": 3.64,
      "learning_rate": 0.001,
      "loss": 3.0945,
      "step": 18960
    },
    {
      "epoch": 3.64,
      "learning_rate": 0.001,
      "loss": 3.096,
      "step": 18972
    },
    {
      "epoch": 3.65,
      "learning_rate": 0.001,
      "loss": 3.1093,
      "step": 18984
    },
    {
      "epoch": 3.65,
      "learning_rate": 0.001,
      "loss": 3.1089,
      "step": 18996
    },
    {
      "epoch": 3.65,
      "learning_rate": 0.001,
      "loss": 3.0969,
      "step": 19008
    },
    {
      "epoch": 3.65,
      "learning_rate": 0.001,
      "loss": 3.0936,
      "step": 19020
    },
    {
      "epoch": 3.65,
      "learning_rate": 0.001,
      "loss": 3.0923,
      "step": 19032
    },
    {
      "epoch": 3.66,
      "learning_rate": 0.001,
      "loss": 3.0923,
      "step": 19044
    },
    {
      "epoch": 3.66,
      "learning_rate": 0.001,
      "loss": 3.0878,
      "step": 19056
    },
    {
      "epoch": 3.66,
      "learning_rate": 0.001,
      "loss": 3.0953,
      "step": 19068
    },
    {
      "epoch": 3.66,
      "learning_rate": 0.001,
      "loss": 3.0806,
      "step": 19080
    },
    {
      "epoch": 3.67,
      "learning_rate": 0.001,
      "loss": 3.0987,
      "step": 19092
    },
    {
      "epoch": 3.67,
      "learning_rate": 0.001,
      "loss": 3.0967,
      "step": 19104
    },
    {
      "epoch": 3.67,
      "learning_rate": 0.001,
      "loss": 3.1029,
      "step": 19116
    },
    {
      "epoch": 3.67,
      "learning_rate": 0.001,
      "loss": 3.0916,
      "step": 19128
    },
    {
      "epoch": 3.68,
      "learning_rate": 0.001,
      "loss": 3.1063,
      "step": 19140
    },
    {
      "epoch": 3.68,
      "learning_rate": 0.001,
      "loss": 3.089,
      "step": 19152
    },
    {
      "epoch": 3.68,
      "learning_rate": 0.001,
      "loss": 3.0841,
      "step": 19164
    },
    {
      "epoch": 3.68,
      "learning_rate": 0.001,
      "loss": 3.1062,
      "step": 19176
    },
    {
      "epoch": 3.68,
      "learning_rate": 0.001,
      "loss": 3.102,
      "step": 19188
    },
    {
      "epoch": 3.69,
      "learning_rate": 0.001,
      "loss": 3.0927,
      "step": 19200
    },
    {
      "epoch": 3.69,
      "learning_rate": 0.001,
      "loss": 3.0947,
      "step": 19212
    },
    {
      "epoch": 3.69,
      "learning_rate": 0.001,
      "loss": 3.0963,
      "step": 19224
    },
    {
      "epoch": 3.69,
      "learning_rate": 0.001,
      "loss": 3.0898,
      "step": 19236
    },
    {
      "epoch": 3.7,
      "learning_rate": 0.001,
      "loss": 3.0965,
      "step": 19248
    },
    {
      "epoch": 3.7,
      "learning_rate": 0.001,
      "loss": 3.0831,
      "step": 19260
    },
    {
      "epoch": 3.7,
      "learning_rate": 0.001,
      "loss": 3.089,
      "step": 19272
    },
    {
      "epoch": 3.7,
      "learning_rate": 0.001,
      "loss": 3.0967,
      "step": 19284
    },
    {
      "epoch": 3.71,
      "learning_rate": 0.001,
      "loss": 3.0961,
      "step": 19296
    },
    {
      "epoch": 3.71,
      "learning_rate": 0.001,
      "loss": 3.0953,
      "step": 19308
    },
    {
      "epoch": 3.71,
      "learning_rate": 0.001,
      "loss": 3.0798,
      "step": 19320
    },
    {
      "epoch": 3.71,
      "learning_rate": 0.001,
      "loss": 3.0965,
      "step": 19332
    },
    {
      "epoch": 3.71,
      "learning_rate": 0.001,
      "loss": 3.0928,
      "step": 19344
    },
    {
      "epoch": 3.72,
      "learning_rate": 0.001,
      "loss": 3.0933,
      "step": 19356
    },
    {
      "epoch": 3.72,
      "learning_rate": 0.001,
      "loss": 3.0874,
      "step": 19368
    },
    {
      "epoch": 3.72,
      "eval_ag_news_accuracy": 0.27,
      "eval_ag_news_bleu_score": 3.472754228337938,
      "eval_ag_news_bleu_score_sem": 0.12049368215361671,
      "eval_ag_news_emb_cos_sim": 0.6796576976776123,
      "eval_ag_news_emb_cos_sim_sem": 0.010592004183158244,
      "eval_ag_news_emb_top1_equal": 0.1328125,
      "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.175224781036377,
      "eval_ag_news_n_ngrams_match_1": 10.728,
      "eval_ag_news_n_ngrams_match_2": 1.962,
      "eval_ag_news_n_ngrams_match_3": 0.46,
      "eval_ag_news_num_pred_words": 44.62,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 65.05446102682308,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.25913545264538174,
      "eval_ag_news_runtime": 17.3813,
      "eval_ag_news_samples_per_second": 28.766,
      "eval_ag_news_steps_per_second": 0.058,
      "eval_ag_news_token_set_f1": 0.2834739402798457,
      "eval_ag_news_token_set_f1_sem": 0.00435891938822785,
      "eval_ag_news_token_set_precision": 0.2487709706036216,
      "eval_ag_news_token_set_recall": 0.35297619122472856,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 19375
    },
    {
      "epoch": 3.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.0891875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.0192076639250716,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07216344694835221,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5528053641319275,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012621590283011902,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.8718910217285156,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.31,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.992,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.28,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.138,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 48.03313194877702,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.15187383470242657,
      "eval_anthropic_toxic_prompts_runtime": 9.9967,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.016,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.26705965718786157,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005534154285558849,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.296262196589119,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.28415050166917083,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 19375
    },
    {
      "epoch": 3.72,
      "eval_arxiv_accuracy": 0.2963125,
      "eval_arxiv_bleu_score": 2.972708753679175,
      "eval_arxiv_bleu_score_sem": 0.0993640178338775,
      "eval_arxiv_emb_cos_sim": 0.5856300592422485,
      "eval_arxiv_emb_cos_sim_sem": 0.009791579553653187,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.035984516143799,
      "eval_arxiv_n_ngrams_match_1": 10.79,
      "eval_arxiv_n_ngrams_match_2": 1.808,
      "eval_arxiv_n_ngrams_match_3": 0.338,
      "eval_arxiv_num_pred_words": 37.514,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 56.59861506784591,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2537807955642004,
      "eval_arxiv_runtime": 14.7005,
      "eval_arxiv_samples_per_second": 34.012,
      "eval_arxiv_steps_per_second": 0.068,
      "eval_arxiv_token_set_f1": 0.25880794735075296,
      "eval_arxiv_token_set_f1_sem": 0.0043315438403489295,
      "eval_arxiv_token_set_precision": 0.1972017494362223,
      "eval_arxiv_token_set_recall": 0.40974570893421125,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 19375
    },
    {
      "epoch": 3.72,
      "eval_python_code_alpaca_accuracy": 0.1211875,
      "eval_python_code_alpaca_bleu_score": 2.908157101103175,
      "eval_python_code_alpaca_bleu_score_sem": 0.09723201187971059,
      "eval_python_code_alpaca_emb_cos_sim": 0.5246033668518066,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01038970721284005,
      "eval_python_code_alpaca_emb_top1_equal": 0.03125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.015439349450344106,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.6962592601776123,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.342,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.296,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.358,
      "eval_python_code_alpaca_num_pred_words": 38.294,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 40.29628415768576,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2123891805818689,
      "eval_python_code_alpaca_runtime": 11.0532,
      "eval_python_code_alpaca_samples_per_second": 45.236,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.3382246492097026,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005487002634017304,
      "eval_python_code_alpaca_token_set_precision": 0.3245677435905774,
      "eval_python_code_alpaca_token_set_recall": 0.39618136763514733,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 19375
    },
    {
      "epoch": 3.72,
      "eval_wikibio_accuracy": 0.28075,
      "eval_wikibio_bleu_score": 4.855825708587543,
      "eval_wikibio_bleu_score_sem": 0.1795522937569043,
      "eval_wikibio_emb_cos_sim": 0.6441939473152161,
      "eval_wikibio_emb_cos_sim_sem": 0.011822260813023622,
      "eval_wikibio_emb_top1_equal": 0.09375,
      "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.401398658752441,
      "eval_wikibio_n_ngrams_match_1": 8.954,
      "eval_wikibio_n_ngrams_match_2": 2.746,
      "eval_wikibio_n_ngrams_match_3": 0.91,
      "eval_wikibio_num_pred_words": 36.108,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 81.56487034145702,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.296123596479039,
      "eval_wikibio_runtime": 10.173,
      "eval_wikibio_samples_per_second": 49.15,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.28835538818950723,
      "eval_wikibio_token_set_f1_sem": 0.0057115422911795475,
      "eval_wikibio_token_set_precision": 0.2871581305250753,
      "eval_wikibio_token_set_recall": 0.304197334168239,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 19375
    },
    {
      "epoch": 3.72,
      "eval_nq_accuracy": 0.4594375,
      "eval_nq_bleu_score": 8.039485433057248,
      "eval_nq_bleu_score_sem": 0.35879975248025403,
      "eval_nq_emb_cos_sim": 0.7219055891036987,
      "eval_nq_emb_cos_sim_sem": 0.010759631419309182,
      "eval_nq_emb_top1_equal": 0.1875,
      "eval_nq_emb_top1_equal_sem": 0.034634623208270626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.72248911857605,
      "eval_nq_n_ngrams_match_1": 19.16,
      "eval_nq_n_ngrams_match_2": 5.964,
      "eval_nq_n_ngrams_match_3": 2.44,
      "eval_nq_num_pred_words": 48.076,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 15.218154932533585,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.36304378926611514,
      "eval_nq_runtime": 10.4358,
      "eval_nq_samples_per_second": 47.912,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.39067846873807627,
      "eval_nq_token_set_f1_sem": 0.00483756581787002,
      "eval_nq_token_set_precision": 0.3321573251141496,
      "eval_nq_token_set_recall": 0.4914988579958725,
      "eval_nq_true_num_tokens": 64.0,
      "step": 19375
    },
    {
      "epoch": 3.72,
      "learning_rate": 0.001,
      "loss": 3.0911,
      "step": 19380
    },
    {
      "epoch": 3.72,
      "learning_rate": 0.001,
      "loss": 3.0954,
      "step": 19392
    },
    {
      "epoch": 3.73,
      "learning_rate": 0.001,
      "loss": 3.0698,
      "step": 19404
    },
    {
      "epoch": 3.73,
      "learning_rate": 0.001,
      "loss": 3.0931,
      "step": 19416
    },
    {
      "epoch": 3.73,
      "learning_rate": 0.001,
      "loss": 3.0916,
      "step": 19428
    },
    {
      "epoch": 3.73,
      "learning_rate": 0.001,
      "loss": 3.0962,
      "step": 19440
    },
    {
      "epoch": 3.74,
      "learning_rate": 0.001,
      "loss": 3.0803,
      "step": 19452
    },
    {
      "epoch": 3.74,
      "learning_rate": 0.001,
      "loss": 3.0783,
      "step": 19464
    },
    {
      "epoch": 3.74,
      "learning_rate": 0.001,
      "loss": 3.09,
      "step": 19476
    },
    {
      "epoch": 3.74,
      "learning_rate": 0.001,
      "loss": 3.0775,
      "step": 19488
    },
    {
      "epoch": 3.74,
      "learning_rate": 0.001,
      "loss": 3.0802,
      "step": 19500
    },
    {
      "epoch": 3.75,
      "learning_rate": 0.001,
      "loss": 3.0825,
      "step": 19512
    },
    {
      "epoch": 3.75,
      "learning_rate": 0.001,
      "loss": 3.0802,
      "step": 19524
    },
    {
      "epoch": 3.75,
      "learning_rate": 0.001,
      "loss": 3.0916,
      "step": 19536
    },
    {
      "epoch": 3.75,
      "learning_rate": 0.001,
      "loss": 3.0741,
      "step": 19548
    },
    {
      "epoch": 3.76,
      "learning_rate": 0.001,
      "loss": 3.0918,
      "step": 19560
    },
    {
      "epoch": 3.76,
      "learning_rate": 0.001,
      "loss": 3.0813,
      "step": 19572
    },
    {
      "epoch": 3.76,
      "learning_rate": 0.001,
      "loss": 3.0862,
      "step": 19584
    },
    {
      "epoch": 3.76,
      "learning_rate": 0.001,
      "loss": 3.0845,
      "step": 19596
    },
    {
      "epoch": 3.76,
      "learning_rate": 0.001,
      "loss": 3.0903,
      "step": 19608
    },
    {
      "epoch": 3.77,
      "learning_rate": 0.001,
      "loss": 3.0822,
      "step": 19620
    },
    {
      "epoch": 3.77,
      "learning_rate": 0.001,
      "loss": 3.0755,
      "step": 19632
    },
    {
      "epoch": 3.77,
      "learning_rate": 0.001,
      "loss": 3.0845,
      "step": 19644
    },
    {
      "epoch": 3.77,
      "learning_rate": 0.001,
      "loss": 3.0773,
      "step": 19656
    },
    {
      "epoch": 3.78,
      "learning_rate": 0.001,
      "loss": 3.0736,
      "step": 19668
    },
    {
      "epoch": 3.78,
      "learning_rate": 0.001,
      "loss": 3.0945,
      "step": 19680
    },
    {
      "epoch": 3.78,
      "learning_rate": 0.001,
      "loss": 3.0816,
      "step": 19692
    },
    {
      "epoch": 3.78,
      "learning_rate": 0.001,
      "loss": 3.0707,
      "step": 19704
    },
    {
      "epoch": 3.79,
      "learning_rate": 0.001,
      "loss": 3.0762,
      "step": 19716
    },
    {
      "epoch": 3.79,
      "learning_rate": 0.001,
      "loss": 3.0924,
      "step": 19728
    },
    {
      "epoch": 3.79,
      "learning_rate": 0.001,
      "loss": 3.0716,
      "step": 19740
    },
    {
      "epoch": 3.79,
      "learning_rate": 0.001,
      "loss": 3.0803,
      "step": 19752
    },
    {
      "epoch": 3.79,
      "learning_rate": 0.001,
      "loss": 3.0682,
      "step": 19764
    },
    {
      "epoch": 3.8,
      "learning_rate": 0.001,
      "loss": 3.0855,
      "step": 19776
    },
    {
      "epoch": 3.8,
      "learning_rate": 0.001,
      "loss": 3.0878,
      "step": 19788
    },
    {
      "epoch": 3.8,
      "learning_rate": 0.001,
      "loss": 3.0884,
      "step": 19800
    },
    {
      "epoch": 3.8,
      "learning_rate": 0.001,
      "loss": 3.0944,
      "step": 19812
    },
    {
      "epoch": 3.81,
      "learning_rate": 0.001,
      "loss": 3.0813,
      "step": 19824
    },
    {
      "epoch": 3.81,
      "learning_rate": 0.001,
      "loss": 3.0714,
      "step": 19836
    },
    {
      "epoch": 3.81,
      "learning_rate": 0.001,
      "loss": 3.0757,
      "step": 19848
    },
    {
      "epoch": 3.81,
      "learning_rate": 0.001,
      "loss": 3.0772,
      "step": 19860
    },
    {
      "epoch": 3.82,
      "learning_rate": 0.001,
      "loss": 3.0861,
      "step": 19872
    },
    {
      "epoch": 3.82,
      "learning_rate": 0.001,
      "loss": 3.0902,
      "step": 19884
    },
    {
      "epoch": 3.82,
      "learning_rate": 0.001,
      "loss": 3.0751,
      "step": 19896
    },
    {
      "epoch": 3.82,
      "learning_rate": 0.001,
      "loss": 3.0858,
      "step": 19908
    },
    {
      "epoch": 3.82,
      "learning_rate": 0.001,
      "loss": 3.0863,
      "step": 19920
    },
    {
      "epoch": 3.83,
      "learning_rate": 0.001,
      "loss": 3.0842,
      "step": 19932
    },
    {
      "epoch": 3.83,
      "learning_rate": 0.001,
      "loss": 3.0762,
      "step": 19944
    },
    {
      "epoch": 3.83,
      "learning_rate": 0.001,
      "loss": 3.0898,
      "step": 19956
    },
    {
      "epoch": 3.83,
      "learning_rate": 0.001,
      "loss": 3.0837,
      "step": 19968
    },
    {
      "epoch": 3.84,
      "learning_rate": 0.001,
      "loss": 3.0808,
      "step": 19980
    },
    {
      "epoch": 3.84,
      "learning_rate": 0.001,
      "loss": 3.0948,
      "step": 19992
    },
    {
      "epoch": 3.84,
      "eval_ag_news_accuracy": 0.269125,
      "eval_ag_news_bleu_score": 3.5205719450799524,
      "eval_ag_news_bleu_score_sem": 0.13084593860321023,
      "eval_ag_news_emb_cos_sim": 0.6838691830635071,
      "eval_ag_news_emb_cos_sim_sem": 0.011532360300986517,
      "eval_ag_news_emb_top1_equal": 0.1484375,
      "eval_ag_news_emb_top1_equal_sem": 0.031548465007086954,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.1688456535339355,
      "eval_ag_news_n_ngrams_match_1": 10.702,
      "eval_ag_news_n_ngrams_match_2": 2.006,
      "eval_ag_news_n_ngrams_match_3": 0.534,
      "eval_ag_news_num_pred_words": 45.61,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 64.6407911545572,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2573117956030651,
      "eval_ag_news_runtime": 10.481,
      "eval_ag_news_samples_per_second": 47.705,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.2833631714930698,
      "eval_ag_news_token_set_f1_sem": 0.004298555133459661,
      "eval_ag_news_token_set_precision": 0.25019726274739207,
      "eval_ag_news_token_set_recall": 0.34880132857872753,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 20000
    },
    {
      "epoch": 3.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.0874375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.082524207080151,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07931001353876281,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5490853190422058,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011788019132786551,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.8917834758758545,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.336,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.032,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.292,
      "eval_anthropic_toxic_prompts_num_pred_words": 45.46,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 48.998195743714746,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1545424205150261,
      "eval_anthropic_toxic_prompts_runtime": 10.2736,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.668,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.26496288095514636,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005541832229746886,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.2988874207066744,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.27338358910120303,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 20000
    },
    {
      "epoch": 3.84,
      "eval_arxiv_accuracy": 0.29815625,
      "eval_arxiv_bleu_score": 2.983439874000031,
      "eval_arxiv_bleu_score_sem": 0.08992601673457186,
      "eval_arxiv_emb_cos_sim": 0.5861841440200806,
      "eval_arxiv_emb_cos_sim_sem": 0.008722870477041051,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.020208835601807,
      "eval_arxiv_n_ngrams_match_1": 11.05,
      "eval_arxiv_n_ngrams_match_2": 1.818,
      "eval_arxiv_n_ngrams_match_3": 0.316,
      "eval_arxiv_num_pred_words": 38.414,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 55.712739415463716,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.26075130061568297,
      "eval_arxiv_runtime": 10.1994,
      "eval_arxiv_samples_per_second": 49.022,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.26344510050212405,
      "eval_arxiv_token_set_f1_sem": 0.004076579819875716,
      "eval_arxiv_token_set_precision": 0.2033983566688797,
      "eval_arxiv_token_set_recall": 0.4034139858147007,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 20000
    },
    {
      "epoch": 3.84,
      "eval_python_code_alpaca_accuracy": 0.1219375,
      "eval_python_code_alpaca_bleu_score": 2.788880747633829,
      "eval_python_code_alpaca_bleu_score_sem": 0.0865622969943088,
      "eval_python_code_alpaca_emb_cos_sim": 0.49001336097717285,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01068028470863136,
      "eval_python_code_alpaca_emb_top1_equal": 0.0546875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.020175758285348722,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.738424777984619,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.16,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.172,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.31,
      "eval_python_code_alpaca_num_pred_words": 37.482,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 42.031728686057264,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2096404841062586,
      "eval_python_code_alpaca_runtime": 10.1415,
      "eval_python_code_alpaca_samples_per_second": 49.303,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.3266004332652199,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005286487401659509,
      "eval_python_code_alpaca_token_set_precision": 0.3146256880859559,
      "eval_python_code_alpaca_token_set_recall": 0.3794747305124554,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 20000
    },
    {
      "epoch": 3.84,
      "eval_wikibio_accuracy": 0.27809375,
      "eval_wikibio_bleu_score": 4.852052903618418,
      "eval_wikibio_bleu_score_sem": 0.17969183085346155,
      "eval_wikibio_emb_cos_sim": 0.6578832268714905,
      "eval_wikibio_emb_cos_sim_sem": 0.010880642923918054,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.403044700622559,
      "eval_wikibio_n_ngrams_match_1": 9.276,
      "eval_wikibio_n_ngrams_match_2": 2.822,
      "eval_wikibio_n_ngrams_match_3": 0.9,
      "eval_wikibio_num_pred_words": 37.478,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 81.69924009194838,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3046706090166992,
      "eval_wikibio_runtime": 14.0655,
      "eval_wikibio_samples_per_second": 35.548,
      "eval_wikibio_steps_per_second": 0.071,
      "eval_wikibio_token_set_f1": 0.29629518121620035,
      "eval_wikibio_token_set_f1_sem": 0.005372514249732754,
      "eval_wikibio_token_set_precision": 0.30051227063985675,
      "eval_wikibio_token_set_recall": 0.30685222842469106,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 20000
    },
    {
      "epoch": 3.84,
      "eval_nq_accuracy": 0.45965625,
      "eval_nq_bleu_score": 8.207424029190614,
      "eval_nq_bleu_score_sem": 0.3686875235712443,
      "eval_nq_emb_cos_sim": 0.7135041952133179,
      "eval_nq_emb_cos_sim_sem": 0.011079902028420926,
      "eval_nq_emb_top1_equal": 0.15625,
      "eval_nq_emb_top1_equal_sem": 0.03221922156442571,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.712360143661499,
      "eval_nq_n_ngrams_match_1": 19.526,
      "eval_nq_n_ngrams_match_2": 6.134,
      "eval_nq_n_ngrams_match_3": 2.472,
      "eval_nq_num_pred_words": 47.752,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 15.064788655791137,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3689419126746766,
      "eval_nq_runtime": 10.6154,
      "eval_nq_samples_per_second": 47.101,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.39564260143520746,
      "eval_nq_token_set_f1_sem": 0.004959118859659147,
      "eval_nq_token_set_precision": 0.33987234351582124,
      "eval_nq_token_set_recall": 0.49062797710110184,
      "eval_nq_true_num_tokens": 64.0,
      "step": 20000
    },
    {
      "epoch": 3.84,
      "learning_rate": 0.001,
      "loss": 3.0772,
      "step": 20004
    },
    {
      "epoch": 3.84,
      "learning_rate": 0.001,
      "loss": 3.0713,
      "step": 20016
    },
    {
      "epoch": 3.85,
      "learning_rate": 0.001,
      "loss": 3.0829,
      "step": 20028
    },
    {
      "epoch": 3.85,
      "learning_rate": 0.001,
      "loss": 3.0653,
      "step": 20040
    },
    {
      "epoch": 3.85,
      "learning_rate": 0.001,
      "loss": 3.0761,
      "step": 20052
    },
    {
      "epoch": 3.85,
      "learning_rate": 0.001,
      "loss": 3.0795,
      "step": 20064
    },
    {
      "epoch": 3.85,
      "learning_rate": 0.001,
      "loss": 3.0683,
      "step": 20076
    },
    {
      "epoch": 3.86,
      "learning_rate": 0.001,
      "loss": 3.0669,
      "step": 20088
    },
    {
      "epoch": 3.86,
      "learning_rate": 0.001,
      "loss": 3.0715,
      "step": 20100
    },
    {
      "epoch": 3.86,
      "learning_rate": 0.001,
      "loss": 3.0731,
      "step": 20112
    },
    {
      "epoch": 3.86,
      "learning_rate": 0.001,
      "loss": 3.0724,
      "step": 20124
    },
    {
      "epoch": 3.87,
      "learning_rate": 0.001,
      "loss": 3.0855,
      "step": 20136
    },
    {
      "epoch": 3.87,
      "learning_rate": 0.001,
      "loss": 3.0827,
      "step": 20148
    },
    {
      "epoch": 3.87,
      "learning_rate": 0.001,
      "loss": 3.0745,
      "step": 20160
    },
    {
      "epoch": 3.87,
      "learning_rate": 0.001,
      "loss": 3.0708,
      "step": 20172
    },
    {
      "epoch": 3.88,
      "learning_rate": 0.001,
      "loss": 3.0722,
      "step": 20184
    },
    {
      "epoch": 3.88,
      "learning_rate": 0.001,
      "loss": 3.0702,
      "step": 20196
    },
    {
      "epoch": 3.88,
      "learning_rate": 0.001,
      "loss": 3.0823,
      "step": 20208
    },
    {
      "epoch": 3.88,
      "learning_rate": 0.001,
      "loss": 3.0812,
      "step": 20220
    },
    {
      "epoch": 3.88,
      "learning_rate": 0.001,
      "loss": 3.0858,
      "step": 20232
    },
    {
      "epoch": 3.89,
      "learning_rate": 0.001,
      "loss": 3.0743,
      "step": 20244
    },
    {
      "epoch": 3.89,
      "learning_rate": 0.001,
      "loss": 3.08,
      "step": 20256
    },
    {
      "epoch": 3.89,
      "learning_rate": 0.001,
      "loss": 3.0741,
      "step": 20268
    },
    {
      "epoch": 3.89,
      "learning_rate": 0.001,
      "loss": 3.0641,
      "step": 20280
    },
    {
      "epoch": 3.9,
      "learning_rate": 0.001,
      "loss": 3.0639,
      "step": 20292
    },
    {
      "epoch": 3.9,
      "learning_rate": 0.001,
      "loss": 3.0685,
      "step": 20304
    },
    {
      "epoch": 3.9,
      "learning_rate": 0.001,
      "loss": 3.0775,
      "step": 20316
    },
    {
      "epoch": 3.9,
      "learning_rate": 0.001,
      "loss": 3.0615,
      "step": 20328
    },
    {
      "epoch": 3.91,
      "learning_rate": 0.001,
      "loss": 3.0698,
      "step": 20340
    },
    {
      "epoch": 3.91,
      "learning_rate": 0.001,
      "loss": 3.0754,
      "step": 20352
    },
    {
      "epoch": 3.91,
      "learning_rate": 0.001,
      "loss": 3.0719,
      "step": 20364
    },
    {
      "epoch": 3.91,
      "learning_rate": 0.001,
      "loss": 3.0746,
      "step": 20376
    },
    {
      "epoch": 3.91,
      "learning_rate": 0.001,
      "loss": 3.0733,
      "step": 20388
    },
    {
      "epoch": 3.92,
      "learning_rate": 0.001,
      "loss": 3.0721,
      "step": 20400
    },
    {
      "epoch": 3.92,
      "learning_rate": 0.001,
      "loss": 3.0735,
      "step": 20412
    },
    {
      "epoch": 3.92,
      "learning_rate": 0.001,
      "loss": 3.0711,
      "step": 20424
    },
    {
      "epoch": 3.92,
      "learning_rate": 0.001,
      "loss": 3.0725,
      "step": 20436
    },
    {
      "epoch": 3.93,
      "learning_rate": 0.001,
      "loss": 3.0776,
      "step": 20448
    },
    {
      "epoch": 3.93,
      "learning_rate": 0.001,
      "loss": 3.0676,
      "step": 20460
    },
    {
      "epoch": 3.93,
      "learning_rate": 0.001,
      "loss": 3.0667,
      "step": 20472
    },
    {
      "epoch": 3.93,
      "learning_rate": 0.001,
      "loss": 3.0786,
      "step": 20484
    },
    {
      "epoch": 3.94,
      "learning_rate": 0.001,
      "loss": 3.0771,
      "step": 20496
    },
    {
      "epoch": 3.94,
      "learning_rate": 0.001,
      "loss": 3.0751,
      "step": 20508
    },
    {
      "epoch": 3.94,
      "learning_rate": 0.001,
      "loss": 3.0708,
      "step": 20520
    },
    {
      "epoch": 3.94,
      "learning_rate": 0.001,
      "loss": 3.0639,
      "step": 20532
    },
    {
      "epoch": 3.94,
      "learning_rate": 0.001,
      "loss": 3.0663,
      "step": 20544
    },
    {
      "epoch": 3.95,
      "learning_rate": 0.001,
      "loss": 3.0689,
      "step": 20556
    },
    {
      "epoch": 3.95,
      "learning_rate": 0.001,
      "loss": 3.0638,
      "step": 20568
    },
    {
      "epoch": 3.95,
      "learning_rate": 0.001,
      "loss": 3.0769,
      "step": 20580
    },
    {
      "epoch": 3.95,
      "learning_rate": 0.001,
      "loss": 3.0643,
      "step": 20592
    },
    {
      "epoch": 3.96,
      "learning_rate": 0.001,
      "loss": 3.0708,
      "step": 20604
    },
    {
      "epoch": 3.96,
      "learning_rate": 0.001,
      "loss": 3.0661,
      "step": 20616
    },
    {
      "epoch": 3.96,
      "eval_ag_news_accuracy": 0.2705625,
      "eval_ag_news_bleu_score": 3.497625776562606,
      "eval_ag_news_bleu_score_sem": 0.1243416779453758,
      "eval_ag_news_emb_cos_sim": 0.6854905486106873,
      "eval_ag_news_emb_cos_sim_sem": 0.011165756475926632,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.142734527587891,
      "eval_ag_news_n_ngrams_match_1": 10.81,
      "eval_ag_news_n_ngrams_match_2": 2.016,
      "eval_ag_news_n_ngrams_match_3": 0.508,
      "eval_ag_news_num_pred_words": 45.486,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 62.974792519708714,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.25981317909377943,
      "eval_ag_news_runtime": 11.3131,
      "eval_ag_news_samples_per_second": 44.196,
      "eval_ag_news_steps_per_second": 0.088,
      "eval_ag_news_token_set_f1": 0.28457443987735725,
      "eval_ag_news_token_set_f1_sem": 0.004563032961620619,
      "eval_ag_news_token_set_precision": 0.2500429050994227,
      "eval_ag_news_token_set_recall": 0.3547567739726648,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 20625
    },
    {
      "epoch": 3.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.0889375,
      "eval_anthropic_toxic_prompts_bleu_score": 1.940812447781033,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07052932065667471,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5502721071243286,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010820657421774925,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.8554270267486572,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.244,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.97,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.276,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.462,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 47.24878913073644,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.14700750479400432,
      "eval_anthropic_toxic_prompts_runtime": 9.7977,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.032,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2650050236180496,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006014103829789257,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.29226082959480526,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.28099879207721473,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 20625
    },
    {
      "epoch": 3.96,
      "eval_arxiv_accuracy": 0.2989375,
      "eval_arxiv_bleu_score": 3.0788719675519833,
      "eval_arxiv_bleu_score_sem": 0.08817320989733096,
      "eval_arxiv_emb_cos_sim": 0.6014991998672485,
      "eval_arxiv_emb_cos_sim_sem": 0.0078075713191970144,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 4.004692077636719,
      "eval_arxiv_n_ngrams_match_1": 11.292,
      "eval_arxiv_n_ngrams_match_2": 1.916,
      "eval_arxiv_n_ngrams_match_3": 0.336,
      "eval_arxiv_num_pred_words": 38.952,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 54.854930738325024,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.26215141159546584,
      "eval_arxiv_runtime": 10.1275,
      "eval_arxiv_samples_per_second": 49.371,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.26786712182741457,
      "eval_arxiv_token_set_f1_sem": 0.004083838112782947,
      "eval_arxiv_token_set_precision": 0.20782117795380609,
      "eval_arxiv_token_set_recall": 0.40398350550246026,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 20625
    },
    {
      "epoch": 3.96,
      "eval_python_code_alpaca_accuracy": 0.1240625,
      "eval_python_code_alpaca_bleu_score": 2.9143636046107377,
      "eval_python_code_alpaca_bleu_score_sem": 0.08848970243259717,
      "eval_python_code_alpaca_emb_cos_sim": 0.5072420239448547,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01087854545966013,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.7139439582824707,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.358,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.294,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.318,
      "eval_python_code_alpaca_num_pred_words": 37.594,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 41.015250390274765,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.21857121612457775,
      "eval_python_code_alpaca_runtime": 9.9235,
      "eval_python_code_alpaca_samples_per_second": 50.385,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.3449749959334582,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005456353292137309,
      "eval_python_code_alpaca_token_set_precision": 0.3258423302136996,
      "eval_python_code_alpaca_token_set_recall": 0.4124755843303544,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 20625
    },
    {
      "epoch": 3.96,
      "eval_wikibio_accuracy": 0.27915625,
      "eval_wikibio_bleu_score": 4.900959285731773,
      "eval_wikibio_bleu_score_sem": 0.17996779787220374,
      "eval_wikibio_emb_cos_sim": 0.6767225861549377,
      "eval_wikibio_emb_cos_sim_sem": 0.010583971257249872,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.434642314910889,
      "eval_wikibio_n_ngrams_match_1": 9.508,
      "eval_wikibio_n_ngrams_match_2": 2.93,
      "eval_wikibio_n_ngrams_match_3": 0.972,
      "eval_wikibio_num_pred_words": 38.054,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 84.3219587862873,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3075396036762009,
      "eval_wikibio_runtime": 10.0767,
      "eval_wikibio_samples_per_second": 49.62,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.29780042871675627,
      "eval_wikibio_token_set_f1_sem": 0.005478605226638085,
      "eval_wikibio_token_set_precision": 0.3042194005019633,
      "eval_wikibio_token_set_recall": 0.30874025411839556,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 20625
    },
    {
      "epoch": 3.96,
      "eval_nq_accuracy": 0.46215625,
      "eval_nq_bleu_score": 8.28533813478615,
      "eval_nq_bleu_score_sem": 0.3660077465844488,
      "eval_nq_emb_cos_sim": 0.7317532896995544,
      "eval_nq_emb_cos_sim_sem": 0.009859092878067069,
      "eval_nq_emb_top1_equal": 0.1796875,
      "eval_nq_emb_top1_equal_sem": 0.034068008879424266,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.6991474628448486,
      "eval_nq_n_ngrams_match_1": 19.582,
      "eval_nq_n_ngrams_match_2": 6.286,
      "eval_nq_n_ngrams_match_3": 2.528,
      "eval_nq_num_pred_words": 48.442,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 14.86705160662268,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3683165596709803,
      "eval_nq_runtime": 10.9532,
      "eval_nq_samples_per_second": 45.649,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.39445822839836675,
      "eval_nq_token_set_f1_sem": 0.004759952568872269,
      "eval_nq_token_set_precision": 0.3384423186274052,
      "eval_nq_token_set_recall": 0.4880352088176547,
      "eval_nq_true_num_tokens": 64.0,
      "step": 20625
    },
    {
      "epoch": 3.96,
      "learning_rate": 0.001,
      "loss": 3.0717,
      "step": 20628
    },
    {
      "epoch": 3.96,
      "learning_rate": 0.001,
      "loss": 3.0771,
      "step": 20640
    },
    {
      "epoch": 3.97,
      "learning_rate": 0.001,
      "loss": 3.0727,
      "step": 20652
    },
    {
      "epoch": 3.97,
      "learning_rate": 0.001,
      "loss": 3.0653,
      "step": 20664
    },
    {
      "epoch": 3.97,
      "learning_rate": 0.001,
      "loss": 3.0626,
      "step": 20676
    },
    {
      "epoch": 3.97,
      "learning_rate": 0.001,
      "loss": 3.0562,
      "step": 20688
    },
    {
      "epoch": 3.97,
      "learning_rate": 0.001,
      "loss": 3.0685,
      "step": 20700
    },
    {
      "epoch": 3.98,
      "learning_rate": 0.001,
      "loss": 3.066,
      "step": 20712
    },
    {
      "epoch": 3.98,
      "learning_rate": 0.001,
      "loss": 3.073,
      "step": 20724
    },
    {
      "epoch": 3.98,
      "learning_rate": 0.001,
      "loss": 3.0674,
      "step": 20736
    },
    {
      "epoch": 3.98,
      "learning_rate": 0.001,
      "loss": 3.0645,
      "step": 20748
    },
    {
      "epoch": 3.99,
      "learning_rate": 0.001,
      "loss": 3.066,
      "step": 20760
    },
    {
      "epoch": 3.99,
      "learning_rate": 0.001,
      "loss": 3.0712,
      "step": 20772
    },
    {
      "epoch": 3.99,
      "learning_rate": 0.001,
      "loss": 3.0664,
      "step": 20784
    },
    {
      "epoch": 3.99,
      "learning_rate": 0.001,
      "loss": 3.0487,
      "step": 20796
    },
    {
      "epoch": 4.0,
      "learning_rate": 0.001,
      "loss": 3.0565,
      "step": 20808
    },
    {
      "epoch": 4.0,
      "learning_rate": 0.001,
      "loss": 3.07,
      "step": 20820
    },
    {
      "epoch": 4.0,
      "learning_rate": 0.001,
      "loss": 3.065,
      "step": 20832
    },
    {
      "epoch": 4.0,
      "learning_rate": 0.001,
      "loss": 3.0418,
      "step": 20844
    },
    {
      "epoch": 4.0,
      "learning_rate": 0.001,
      "loss": 3.0483,
      "step": 20856
    },
    {
      "epoch": 4.01,
      "learning_rate": 0.001,
      "loss": 3.0469,
      "step": 20868
    },
    {
      "epoch": 4.01,
      "learning_rate": 0.001,
      "loss": 3.0389,
      "step": 20880
    },
    {
      "epoch": 4.01,
      "learning_rate": 0.001,
      "loss": 3.0522,
      "step": 20892
    },
    {
      "epoch": 4.01,
      "learning_rate": 0.001,
      "loss": 3.0505,
      "step": 20904
    },
    {
      "epoch": 4.02,
      "learning_rate": 0.001,
      "loss": 3.0453,
      "step": 20916
    },
    {
      "epoch": 4.02,
      "learning_rate": 0.001,
      "loss": 3.039,
      "step": 20928
    },
    {
      "epoch": 4.02,
      "learning_rate": 0.001,
      "loss": 3.0363,
      "step": 20940
    },
    {
      "epoch": 4.02,
      "learning_rate": 0.001,
      "loss": 3.0476,
      "step": 20952
    },
    {
      "epoch": 4.03,
      "learning_rate": 0.001,
      "loss": 3.0522,
      "step": 20964
    },
    {
      "epoch": 4.03,
      "learning_rate": 0.001,
      "loss": 3.0349,
      "step": 20976
    },
    {
      "epoch": 4.03,
      "learning_rate": 0.001,
      "loss": 3.045,
      "step": 20988
    },
    {
      "epoch": 4.03,
      "learning_rate": 0.001,
      "loss": 3.0515,
      "step": 21000
    },
    {
      "epoch": 4.03,
      "learning_rate": 0.001,
      "loss": 3.0399,
      "step": 21012
    },
    {
      "epoch": 4.04,
      "learning_rate": 0.001,
      "loss": 3.0417,
      "step": 21024
    },
    {
      "epoch": 4.04,
      "learning_rate": 0.001,
      "loss": 3.0497,
      "step": 21036
    },
    {
      "epoch": 4.04,
      "learning_rate": 0.001,
      "loss": 3.038,
      "step": 21048
    },
    {
      "epoch": 4.04,
      "learning_rate": 0.001,
      "loss": 3.0283,
      "step": 21060
    },
    {
      "epoch": 4.05,
      "learning_rate": 0.001,
      "loss": 3.0494,
      "step": 21072
    },
    {
      "epoch": 4.05,
      "learning_rate": 0.001,
      "loss": 3.0447,
      "step": 21084
    },
    {
      "epoch": 4.05,
      "learning_rate": 0.001,
      "loss": 3.0374,
      "step": 21096
    },
    {
      "epoch": 4.05,
      "learning_rate": 0.001,
      "loss": 3.0452,
      "step": 21108
    },
    {
      "epoch": 4.06,
      "learning_rate": 0.001,
      "loss": 3.0368,
      "step": 21120
    },
    {
      "epoch": 4.06,
      "learning_rate": 0.001,
      "loss": 3.0386,
      "step": 21132
    },
    {
      "epoch": 4.06,
      "learning_rate": 0.001,
      "loss": 3.0436,
      "step": 21144
    },
    {
      "epoch": 4.06,
      "learning_rate": 0.001,
      "loss": 3.0521,
      "step": 21156
    },
    {
      "epoch": 4.06,
      "learning_rate": 0.001,
      "loss": 3.0326,
      "step": 21168
    },
    {
      "epoch": 4.07,
      "learning_rate": 0.001,
      "loss": 3.0423,
      "step": 21180
    },
    {
      "epoch": 4.07,
      "learning_rate": 0.001,
      "loss": 3.0463,
      "step": 21192
    },
    {
      "epoch": 4.07,
      "learning_rate": 0.001,
      "loss": 3.0457,
      "step": 21204
    },
    {
      "epoch": 4.07,
      "learning_rate": 0.001,
      "loss": 3.036,
      "step": 21216
    },
    {
      "epoch": 4.08,
      "learning_rate": 0.001,
      "loss": 3.0324,
      "step": 21228
    },
    {
      "epoch": 4.08,
      "learning_rate": 0.001,
      "loss": 3.0401,
      "step": 21240
    },
    {
      "epoch": 4.08,
      "eval_ag_news_accuracy": 0.27059375,
      "eval_ag_news_bleu_score": 3.4759116114975583,
      "eval_ag_news_bleu_score_sem": 0.12353977729368248,
      "eval_ag_news_emb_cos_sim": 0.6965416669845581,
      "eval_ag_news_emb_cos_sim_sem": 0.010154137939800535,
      "eval_ag_news_emb_top1_equal": 0.125,
      "eval_ag_news_emb_top1_equal_sem": 0.02934655822437397,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.144755840301514,
      "eval_ag_news_n_ngrams_match_1": 10.956,
      "eval_ag_news_n_ngrams_match_2": 2.052,
      "eval_ag_news_n_ngrams_match_3": 0.496,
      "eval_ag_news_num_pred_words": 45.744,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 63.10221300340487,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2660516703895089,
      "eval_ag_news_runtime": 10.3868,
      "eval_ag_news_samples_per_second": 48.138,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.2865905061593258,
      "eval_ag_news_token_set_f1_sem": 0.004343457012985784,
      "eval_ag_news_token_set_precision": 0.25481817659442135,
      "eval_ag_news_token_set_recall": 0.3522420521165349,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 21250
    },
    {
      "epoch": 4.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.08903125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.9651381662392637,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07138236548140361,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.565031886100769,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011376758087551344,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.8321046829223633,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.342,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.992,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.268,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.912,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 46.15958736479582,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.15272833744161227,
      "eval_anthropic_toxic_prompts_runtime": 9.9557,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.222,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.273758563134554,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005968473785356642,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.29467451304701114,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.29285949299857483,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 21250
    },
    {
      "epoch": 4.08,
      "eval_arxiv_accuracy": 0.29784375,
      "eval_arxiv_bleu_score": 2.98095021356775,
      "eval_arxiv_bleu_score_sem": 0.08693911428074183,
      "eval_arxiv_emb_cos_sim": 0.6017891764640808,
      "eval_arxiv_emb_cos_sim_sem": 0.008590137538734595,
      "eval_arxiv_emb_top1_equal": 0.171875,
      "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.9887657165527344,
      "eval_arxiv_n_ngrams_match_1": 11.018,
      "eval_arxiv_n_ngrams_match_2": 1.814,
      "eval_arxiv_n_ngrams_match_3": 0.312,
      "eval_arxiv_num_pred_words": 38.388,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 53.988211466319015,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2594239905479414,
      "eval_arxiv_runtime": 10.2082,
      "eval_arxiv_samples_per_second": 48.98,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.2642451292254428,
      "eval_arxiv_token_set_f1_sem": 0.0038802082355823574,
      "eval_arxiv_token_set_precision": 0.2039543512501798,
      "eval_arxiv_token_set_recall": 0.408623030396844,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 21250
    },
    {
      "epoch": 4.08,
      "eval_python_code_alpaca_accuracy": 0.125,
      "eval_python_code_alpaca_bleu_score": 2.8776236589230293,
      "eval_python_code_alpaca_bleu_score_sem": 0.0943106397693027,
      "eval_python_code_alpaca_emb_cos_sim": 0.5189061164855957,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009752914429956,
      "eval_python_code_alpaca_emb_top1_equal": 0.046875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.6420161724090576,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.198,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.266,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.334,
      "eval_python_code_alpaca_num_pred_words": 37.466,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 38.16871390840089,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.21139995276143378,
      "eval_python_code_alpaca_runtime": 10.5356,
      "eval_python_code_alpaca_samples_per_second": 47.458,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.3317504191173044,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054573483567321,
      "eval_python_code_alpaca_token_set_precision": 0.31876378129823757,
      "eval_python_code_alpaca_token_set_recall": 0.3905314766738178,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 21250
    },
    {
      "epoch": 4.08,
      "eval_wikibio_accuracy": 0.27596875,
      "eval_wikibio_bleu_score": 4.723717154191338,
      "eval_wikibio_bleu_score_sem": 0.1826744504726285,
      "eval_wikibio_emb_cos_sim": 0.6692360639572144,
      "eval_wikibio_emb_cos_sim_sem": 0.011303136762825924,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.416079521179199,
      "eval_wikibio_n_ngrams_match_1": 8.93,
      "eval_wikibio_n_ngrams_match_2": 2.736,
      "eval_wikibio_n_ngrams_match_3": 0.892,
      "eval_wikibio_num_pred_words": 37.176,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 82.77114589914449,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2953368064200208,
      "eval_wikibio_runtime": 10.2446,
      "eval_wikibio_samples_per_second": 48.806,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.2822498487406837,
      "eval_wikibio_token_set_f1_sem": 0.005807926081839543,
      "eval_wikibio_token_set_precision": 0.28459061256356083,
      "eval_wikibio_token_set_recall": 0.29633119494668225,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 21250
    },
    {
      "epoch": 4.08,
      "eval_nq_accuracy": 0.464875,
      "eval_nq_bleu_score": 8.341417445829022,
      "eval_nq_bleu_score_sem": 0.3837464849029816,
      "eval_nq_emb_cos_sim": 0.7356172800064087,
      "eval_nq_emb_cos_sim_sem": 0.010324239327667963,
      "eval_nq_emb_top1_equal": 0.203125,
      "eval_nq_emb_top1_equal_sem": 0.03570055125142555,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.688774347305298,
      "eval_nq_n_ngrams_match_1": 19.506,
      "eval_nq_n_ngrams_match_2": 6.232,
      "eval_nq_n_ngrams_match_3": 2.568,
      "eval_nq_num_pred_words": 48.698,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 14.713631062776367,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3659405393239539,
      "eval_nq_runtime": 10.5198,
      "eval_nq_samples_per_second": 47.53,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.39202691823958985,
      "eval_nq_token_set_f1_sem": 0.005032378170226777,
      "eval_nq_token_set_precision": 0.33906263724145974,
      "eval_nq_token_set_recall": 0.48259763210815115,
      "eval_nq_true_num_tokens": 64.0,
      "step": 21250
    },
    {
      "epoch": 4.08,
      "learning_rate": 0.001,
      "loss": 3.0532,
      "step": 21252
    },
    {
      "epoch": 4.08,
      "learning_rate": 0.001,
      "loss": 3.0389,
      "step": 21264
    },
    {
      "epoch": 4.09,
      "learning_rate": 0.001,
      "loss": 3.036,
      "step": 21276
    },
    {
      "epoch": 4.09,
      "learning_rate": 0.001,
      "loss": 3.0452,
      "step": 21288
    },
    {
      "epoch": 4.09,
      "learning_rate": 0.001,
      "loss": 3.0483,
      "step": 21300
    },
    {
      "epoch": 4.09,
      "learning_rate": 0.001,
      "loss": 3.0275,
      "step": 21312
    },
    {
      "epoch": 4.09,
      "learning_rate": 0.001,
      "loss": 3.0406,
      "step": 21324
    },
    {
      "epoch": 4.1,
      "learning_rate": 0.001,
      "loss": 3.0345,
      "step": 21336
    },
    {
      "epoch": 4.1,
      "learning_rate": 0.001,
      "loss": 3.0377,
      "step": 21348
    },
    {
      "epoch": 4.1,
      "learning_rate": 0.001,
      "loss": 3.0301,
      "step": 21360
    },
    {
      "epoch": 4.1,
      "learning_rate": 0.001,
      "loss": 3.0438,
      "step": 21372
    },
    {
      "epoch": 4.11,
      "learning_rate": 0.001,
      "loss": 3.0442,
      "step": 21384
    },
    {
      "epoch": 4.11,
      "learning_rate": 0.001,
      "loss": 3.0437,
      "step": 21396
    },
    {
      "epoch": 4.11,
      "learning_rate": 0.001,
      "loss": 3.0391,
      "step": 21408
    },
    {
      "epoch": 4.11,
      "learning_rate": 0.001,
      "loss": 3.0375,
      "step": 21420
    },
    {
      "epoch": 4.12,
      "learning_rate": 0.001,
      "loss": 3.0438,
      "step": 21432
    },
    {
      "epoch": 4.12,
      "learning_rate": 0.001,
      "loss": 3.0337,
      "step": 21444
    },
    {
      "epoch": 4.12,
      "learning_rate": 0.001,
      "loss": 3.0285,
      "step": 21456
    },
    {
      "epoch": 4.12,
      "learning_rate": 0.001,
      "loss": 3.0454,
      "step": 21468
    },
    {
      "epoch": 4.12,
      "learning_rate": 0.001,
      "loss": 3.0475,
      "step": 21480
    },
    {
      "epoch": 4.13,
      "learning_rate": 0.001,
      "loss": 3.0364,
      "step": 21492
    },
    {
      "epoch": 4.13,
      "learning_rate": 0.001,
      "loss": 3.0506,
      "step": 21504
    },
    {
      "epoch": 4.13,
      "learning_rate": 0.001,
      "loss": 3.0315,
      "step": 21516
    },
    {
      "epoch": 4.13,
      "learning_rate": 0.001,
      "loss": 3.0295,
      "step": 21528
    },
    {
      "epoch": 4.14,
      "learning_rate": 0.001,
      "loss": 3.0315,
      "step": 21540
    },
    {
      "epoch": 4.14,
      "learning_rate": 0.001,
      "loss": 3.0351,
      "step": 21552
    },
    {
      "epoch": 4.14,
      "learning_rate": 0.001,
      "loss": 3.0351,
      "step": 21564
    },
    {
      "epoch": 4.14,
      "learning_rate": 0.001,
      "loss": 3.0465,
      "step": 21576
    },
    {
      "epoch": 4.15,
      "learning_rate": 0.001,
      "loss": 3.0398,
      "step": 21588
    },
    {
      "epoch": 4.15,
      "learning_rate": 0.001,
      "loss": 3.028,
      "step": 21600
    },
    {
      "epoch": 4.15,
      "learning_rate": 0.001,
      "loss": 3.0413,
      "step": 21612
    },
    {
      "epoch": 4.15,
      "learning_rate": 0.001,
      "loss": 3.0389,
      "step": 21624
    },
    {
      "epoch": 4.15,
      "learning_rate": 0.001,
      "loss": 3.0374,
      "step": 21636
    },
    {
      "epoch": 4.16,
      "learning_rate": 0.001,
      "loss": 3.0321,
      "step": 21648
    },
    {
      "epoch": 4.16,
      "learning_rate": 0.001,
      "loss": 3.0385,
      "step": 21660
    },
    {
      "epoch": 4.16,
      "learning_rate": 0.001,
      "loss": 3.0372,
      "step": 21672
    },
    {
      "epoch": 4.16,
      "learning_rate": 0.001,
      "loss": 3.0316,
      "step": 21684
    },
    {
      "epoch": 4.17,
      "learning_rate": 0.001,
      "loss": 3.0331,
      "step": 21696
    },
    {
      "epoch": 4.17,
      "learning_rate": 0.001,
      "loss": 3.0242,
      "step": 21708
    },
    {
      "epoch": 4.17,
      "learning_rate": 0.001,
      "loss": 3.0321,
      "step": 21720
    },
    {
      "epoch": 4.17,
      "learning_rate": 0.001,
      "loss": 3.0334,
      "step": 21732
    },
    {
      "epoch": 4.18,
      "learning_rate": 0.001,
      "loss": 3.0271,
      "step": 21744
    },
    {
      "epoch": 4.18,
      "learning_rate": 0.001,
      "loss": 3.036,
      "step": 21756
    },
    {
      "epoch": 4.18,
      "learning_rate": 0.001,
      "loss": 3.0285,
      "step": 21768
    },
    {
      "epoch": 4.18,
      "learning_rate": 0.001,
      "loss": 3.0339,
      "step": 21780
    },
    {
      "epoch": 4.18,
      "learning_rate": 0.001,
      "loss": 3.0349,
      "step": 21792
    },
    {
      "epoch": 4.19,
      "learning_rate": 0.001,
      "loss": 3.0377,
      "step": 21804
    },
    {
      "epoch": 4.19,
      "learning_rate": 0.001,
      "loss": 3.0341,
      "step": 21816
    },
    {
      "epoch": 4.19,
      "learning_rate": 0.001,
      "loss": 3.0324,
      "step": 21828
    },
    {
      "epoch": 4.19,
      "learning_rate": 0.001,
      "loss": 3.0356,
      "step": 21840
    },
    {
      "epoch": 4.2,
      "learning_rate": 0.001,
      "loss": 3.0243,
      "step": 21852
    },
    {
      "epoch": 4.2,
      "learning_rate": 0.001,
      "loss": 3.0221,
      "step": 21864
    },
    {
      "epoch": 4.2,
      "eval_ag_news_accuracy": 0.27284375,
      "eval_ag_news_bleu_score": 3.4996654310486885,
      "eval_ag_news_bleu_score_sem": 0.12056780449144557,
      "eval_ag_news_emb_cos_sim": 0.7136399149894714,
      "eval_ag_news_emb_cos_sim_sem": 0.008702499061231115,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.116077899932861,
      "eval_ag_news_n_ngrams_match_1": 11.198,
      "eval_ag_news_n_ngrams_match_2": 2.082,
      "eval_ag_news_n_ngrams_match_3": 0.51,
      "eval_ag_news_num_pred_words": 46.234,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 61.31827361668396,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.26791186121739585,
      "eval_ag_news_runtime": 11.0522,
      "eval_ag_news_samples_per_second": 45.24,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.2911938956083088,
      "eval_ag_news_token_set_f1_sem": 0.004323212925036001,
      "eval_ag_news_token_set_precision": 0.25939685552279124,
      "eval_ag_news_token_set_recall": 0.3509315539008144,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 21875
    },
    {
      "epoch": 4.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.09128125,
      "eval_anthropic_toxic_prompts_bleu_score": 1.9570257695194715,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07294093462571513,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.561536431312561,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011565628652440787,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.8141582012176514,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.43,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.014,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.272,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.624,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 45.338574357723054,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.15432672773448186,
      "eval_anthropic_toxic_prompts_runtime": 10.1394,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.313,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2672176719490324,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005943945345494677,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.29800353577576116,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.28088067934522803,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 21875
    },
    {
      "epoch": 4.2,
      "eval_arxiv_accuracy": 0.301,
      "eval_arxiv_bleu_score": 3.1479586906083488,
      "eval_arxiv_bleu_score_sem": 0.0944101131232911,
      "eval_arxiv_emb_cos_sim": 0.6080456376075745,
      "eval_arxiv_emb_cos_sim_sem": 0.008475128008771477,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.970264196395874,
      "eval_arxiv_n_ngrams_match_1": 11.418,
      "eval_arxiv_n_ngrams_match_2": 1.94,
      "eval_arxiv_n_ngrams_match_3": 0.346,
      "eval_arxiv_num_pred_words": 39.672,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 52.998531011077056,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.26787391631613255,
      "eval_arxiv_runtime": 10.4506,
      "eval_arxiv_samples_per_second": 47.844,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.2688418724035355,
      "eval_arxiv_token_set_f1_sem": 0.003939178956185688,
      "eval_arxiv_token_set_precision": 0.21069150806535167,
      "eval_arxiv_token_set_recall": 0.39311801139098385,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 21875
    },
    {
      "epoch": 4.2,
      "eval_python_code_alpaca_accuracy": 0.12534375,
      "eval_python_code_alpaca_bleu_score": 2.896494337010263,
      "eval_python_code_alpaca_bleu_score_sem": 0.09235661841466093,
      "eval_python_code_alpaca_emb_cos_sim": 0.540421187877655,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01026512968972045,
      "eval_python_code_alpaca_emb_top1_equal": 0.0234375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.6180341243743896,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.56,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.306,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.336,
      "eval_python_code_alpaca_num_pred_words": 38.536,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 37.26423890893017,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.22576526748087372,
      "eval_python_code_alpaca_runtime": 11.6631,
      "eval_python_code_alpaca_samples_per_second": 42.87,
      "eval_python_code_alpaca_steps_per_second": 0.086,
      "eval_python_code_alpaca_token_set_f1": 0.3462301535756837,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005242461543095605,
      "eval_python_code_alpaca_token_set_precision": 0.3430515376847243,
      "eval_python_code_alpaca_token_set_recall": 0.3867008615388679,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 21875
    },
    {
      "epoch": 4.2,
      "eval_wikibio_accuracy": 0.27775,
      "eval_wikibio_bleu_score": 4.848883945765031,
      "eval_wikibio_bleu_score_sem": 0.17057304405972518,
      "eval_wikibio_emb_cos_sim": 0.687816321849823,
      "eval_wikibio_emb_cos_sim_sem": 0.009560151867024377,
      "eval_wikibio_emb_top1_equal": 0.109375,
      "eval_wikibio_emb_top1_equal_sem": 0.027695207821224692,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.434406757354736,
      "eval_wikibio_n_ngrams_match_1": 9.464,
      "eval_wikibio_n_ngrams_match_2": 2.87,
      "eval_wikibio_n_ngrams_match_3": 0.928,
      "eval_wikibio_num_pred_words": 38.342,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 84.30209845096348,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.31197704945570526,
      "eval_wikibio_runtime": 10.0717,
      "eval_wikibio_samples_per_second": 49.644,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.2942061068802919,
      "eval_wikibio_token_set_f1_sem": 0.0053270369418450285,
      "eval_wikibio_token_set_precision": 0.30313770407388707,
      "eval_wikibio_token_set_recall": 0.2981083597781698,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 21875
    },
    {
      "epoch": 4.2,
      "eval_nq_accuracy": 0.46346875,
      "eval_nq_bleu_score": 8.24216455551241,
      "eval_nq_bleu_score_sem": 0.3651753215587689,
      "eval_nq_emb_cos_sim": 0.7345483303070068,
      "eval_nq_emb_cos_sim_sem": 0.011004716660489047,
      "eval_nq_emb_top1_equal": 0.171875,
      "eval_nq_emb_top1_equal_sem": 0.03347745514062371,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.680889129638672,
      "eval_nq_n_ngrams_match_1": 19.792,
      "eval_nq_n_ngrams_match_2": 6.292,
      "eval_nq_n_ngrams_match_3": 2.508,
      "eval_nq_num_pred_words": 48.768,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 14.598067101455777,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3747689920413043,
      "eval_nq_runtime": 10.5887,
      "eval_nq_samples_per_second": 47.22,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.398663761084407,
      "eval_nq_token_set_f1_sem": 0.004841490243342143,
      "eval_nq_token_set_precision": 0.34388651679642374,
      "eval_nq_token_set_recall": 0.48906836026594014,
      "eval_nq_true_num_tokens": 64.0,
      "step": 21875
    },
    {
      "epoch": 4.2,
      "learning_rate": 0.001,
      "loss": 3.0364,
      "step": 21876
    },
    {
      "epoch": 4.2,
      "learning_rate": 0.001,
      "loss": 3.0372,
      "step": 21888
    },
    {
      "epoch": 4.21,
      "learning_rate": 0.001,
      "loss": 3.0341,
      "step": 21900
    },
    {
      "epoch": 4.21,
      "learning_rate": 0.001,
      "loss": 3.038,
      "step": 21912
    },
    {
      "epoch": 4.21,
      "learning_rate": 0.001,
      "loss": 3.0426,
      "step": 21924
    },
    {
      "epoch": 4.21,
      "learning_rate": 0.001,
      "loss": 3.0261,
      "step": 21936
    },
    {
      "epoch": 4.21,
      "learning_rate": 0.001,
      "loss": 3.0327,
      "step": 21948
    },
    {
      "epoch": 4.22,
      "learning_rate": 0.001,
      "loss": 3.0332,
      "step": 21960
    },
    {
      "epoch": 4.22,
      "learning_rate": 0.001,
      "loss": 3.023,
      "step": 21972
    },
    {
      "epoch": 4.22,
      "learning_rate": 0.001,
      "loss": 3.0322,
      "step": 21984
    },
    {
      "epoch": 4.22,
      "learning_rate": 0.001,
      "loss": 3.0373,
      "step": 21996
    },
    {
      "epoch": 4.23,
      "learning_rate": 0.001,
      "loss": 3.0294,
      "step": 22008
    },
    {
      "epoch": 4.23,
      "learning_rate": 0.001,
      "loss": 3.042,
      "step": 22020
    },
    {
      "epoch": 4.23,
      "learning_rate": 0.001,
      "loss": 3.0404,
      "step": 22032
    },
    {
      "epoch": 4.23,
      "learning_rate": 0.001,
      "loss": 3.0186,
      "step": 22044
    },
    {
      "epoch": 4.24,
      "learning_rate": 0.001,
      "loss": 3.0258,
      "step": 22056
    },
    {
      "epoch": 4.24,
      "learning_rate": 0.001,
      "loss": 3.0196,
      "step": 22068
    },
    {
      "epoch": 4.24,
      "learning_rate": 0.001,
      "loss": 3.0265,
      "step": 22080
    },
    {
      "epoch": 4.24,
      "learning_rate": 0.001,
      "loss": 3.0331,
      "step": 22092
    },
    {
      "epoch": 4.24,
      "learning_rate": 0.001,
      "loss": 3.017,
      "step": 22104
    },
    {
      "epoch": 4.25,
      "learning_rate": 0.001,
      "loss": 3.0206,
      "step": 22116
    },
    {
      "epoch": 4.25,
      "learning_rate": 0.001,
      "loss": 3.0167,
      "step": 22128
    },
    {
      "epoch": 4.25,
      "learning_rate": 0.001,
      "loss": 3.0315,
      "step": 22140
    },
    {
      "epoch": 4.25,
      "learning_rate": 0.001,
      "loss": 3.0302,
      "step": 22152
    },
    {
      "epoch": 4.26,
      "learning_rate": 0.001,
      "loss": 3.0257,
      "step": 22164
    },
    {
      "epoch": 4.26,
      "learning_rate": 0.001,
      "loss": 3.0379,
      "step": 22176
    },
    {
      "epoch": 4.26,
      "learning_rate": 0.001,
      "loss": 3.0312,
      "step": 22188
    },
    {
      "epoch": 4.26,
      "learning_rate": 0.001,
      "loss": 3.0286,
      "step": 22200
    },
    {
      "epoch": 4.26,
      "learning_rate": 0.001,
      "loss": 3.0219,
      "step": 22212
    },
    {
      "epoch": 4.27,
      "learning_rate": 0.001,
      "loss": 3.0249,
      "step": 22224
    },
    {
      "epoch": 4.27,
      "learning_rate": 0.001,
      "loss": 3.0273,
      "step": 22236
    },
    {
      "epoch": 4.27,
      "learning_rate": 0.001,
      "loss": 3.0318,
      "step": 22248
    },
    {
      "epoch": 4.27,
      "learning_rate": 0.001,
      "loss": 3.0245,
      "step": 22260
    },
    {
      "epoch": 4.28,
      "learning_rate": 0.001,
      "loss": 3.0199,
      "step": 22272
    },
    {
      "epoch": 4.28,
      "learning_rate": 0.001,
      "loss": 3.0305,
      "step": 22284
    },
    {
      "epoch": 4.28,
      "learning_rate": 0.001,
      "loss": 3.0338,
      "step": 22296
    },
    {
      "epoch": 4.28,
      "learning_rate": 0.001,
      "loss": 3.0244,
      "step": 22308
    },
    {
      "epoch": 4.29,
      "learning_rate": 0.001,
      "loss": 3.0136,
      "step": 22320
    },
    {
      "epoch": 4.29,
      "learning_rate": 0.001,
      "loss": 3.0282,
      "step": 22332
    },
    {
      "epoch": 4.29,
      "learning_rate": 0.001,
      "loss": 3.0258,
      "step": 22344
    },
    {
      "epoch": 4.29,
      "learning_rate": 0.001,
      "loss": 3.028,
      "step": 22356
    },
    {
      "epoch": 4.29,
      "learning_rate": 0.001,
      "loss": 3.0239,
      "step": 22368
    },
    {
      "epoch": 4.3,
      "learning_rate": 0.001,
      "loss": 3.0295,
      "step": 22380
    },
    {
      "epoch": 4.3,
      "learning_rate": 0.001,
      "loss": 3.0172,
      "step": 22392
    },
    {
      "epoch": 4.3,
      "learning_rate": 0.001,
      "loss": 3.0118,
      "step": 22404
    },
    {
      "epoch": 4.3,
      "learning_rate": 0.001,
      "loss": 3.0217,
      "step": 22416
    },
    {
      "epoch": 4.31,
      "learning_rate": 0.001,
      "loss": 3.0167,
      "step": 22428
    },
    {
      "epoch": 4.31,
      "learning_rate": 0.001,
      "loss": 3.0256,
      "step": 22440
    },
    {
      "epoch": 4.31,
      "learning_rate": 0.001,
      "loss": 3.0411,
      "step": 22452
    },
    {
      "epoch": 4.31,
      "learning_rate": 0.001,
      "loss": 3.0303,
      "step": 22464
    },
    {
      "epoch": 4.32,
      "learning_rate": 0.001,
      "loss": 3.0146,
      "step": 22476
    },
    {
      "epoch": 4.32,
      "learning_rate": 0.001,
      "loss": 3.0345,
      "step": 22488
    },
    {
      "epoch": 4.32,
      "learning_rate": 0.001,
      "loss": 3.0236,
      "step": 22500
    },
    {
      "epoch": 4.32,
      "eval_ag_news_accuracy": 0.27275,
      "eval_ag_news_bleu_score": 3.479657594119364,
      "eval_ag_news_bleu_score_sem": 0.10953250121045172,
      "eval_ag_news_emb_cos_sim": 0.6979045867919922,
      "eval_ag_news_emb_cos_sim_sem": 0.010230112244052977,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.114940166473389,
      "eval_ag_news_n_ngrams_match_1": 11.128,
      "eval_ag_news_n_ngrams_match_2": 2.028,
      "eval_ag_news_n_ngrams_match_3": 0.48,
      "eval_ag_news_num_pred_words": 45.164,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 61.24854943640071,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.26991540463157826,
      "eval_ag_news_runtime": 10.5735,
      "eval_ag_news_samples_per_second": 47.288,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.29149963746302726,
      "eval_ag_news_token_set_f1_sem": 0.004084346860474833,
      "eval_ag_news_token_set_precision": 0.2607596166810238,
      "eval_ag_news_token_set_recall": 0.35559723609736654,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 22500
    },
    {
      "epoch": 4.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.091,
      "eval_anthropic_toxic_prompts_bleu_score": 2.1467354899679276,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08375725211606826,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5755442380905151,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0104705165508701,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.811479091644287,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.412,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.066,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.318,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.702,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 45.217269915621166,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.15658175970509802,
      "eval_anthropic_toxic_prompts_runtime": 10.7134,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.671,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.093,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2766725885325113,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005507780084356059,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3088017380334575,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.29196960139901595,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 22500
    },
    {
      "epoch": 4.32,
      "eval_arxiv_accuracy": 0.29853125,
      "eval_arxiv_bleu_score": 3.0582062513732997,
      "eval_arxiv_bleu_score_sem": 0.09108078413523835,
      "eval_arxiv_emb_cos_sim": 0.6037775278091431,
      "eval_arxiv_emb_cos_sim_sem": 0.008646464826135881,
      "eval_arxiv_emb_top1_equal": 0.1640625,
      "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.9810023307800293,
      "eval_arxiv_n_ngrams_match_1": 11.232,
      "eval_arxiv_n_ngrams_match_2": 1.84,
      "eval_arxiv_n_ngrams_match_3": 0.334,
      "eval_arxiv_num_pred_words": 37.932,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 53.570702890536445,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2680710291792464,
      "eval_arxiv_runtime": 10.3676,
      "eval_arxiv_samples_per_second": 48.227,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.26446141481814944,
      "eval_arxiv_token_set_f1_sem": 0.003932504418393081,
      "eval_arxiv_token_set_precision": 0.20663165098964145,
      "eval_arxiv_token_set_recall": 0.39558942508157624,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 22500
    },
    {
      "epoch": 4.32,
      "eval_python_code_alpaca_accuracy": 0.12871875,
      "eval_python_code_alpaca_bleu_score": 3.0923369481222225,
      "eval_python_code_alpaca_bleu_score_sem": 0.10502875370452475,
      "eval_python_code_alpaca_emb_cos_sim": 0.5569471120834351,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010260473121504503,
      "eval_python_code_alpaca_emb_top1_equal": 0.046875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.567882776260376,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.786,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.412,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.396,
      "eval_python_code_alpaca_num_pred_words": 38.674,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 35.44147612529317,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.232692482621829,
      "eval_python_code_alpaca_runtime": 10.2045,
      "eval_python_code_alpaca_samples_per_second": 48.998,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.3519745599321546,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005175825417515168,
      "eval_python_code_alpaca_token_set_precision": 0.35034291983209737,
      "eval_python_code_alpaca_token_set_recall": 0.38106866854812355,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 22500
    },
    {
      "epoch": 4.32,
      "eval_wikibio_accuracy": 0.2784375,
      "eval_wikibio_bleu_score": 4.851032126013448,
      "eval_wikibio_bleu_score_sem": 0.16954693795931916,
      "eval_wikibio_emb_cos_sim": 0.6652008891105652,
      "eval_wikibio_emb_cos_sim_sem": 0.010656725278858277,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.453147888183594,
      "eval_wikibio_n_ngrams_match_1": 9.05,
      "eval_wikibio_n_ngrams_match_2": 2.796,
      "eval_wikibio_n_ngrams_match_3": 0.932,
      "eval_wikibio_num_pred_words": 36.74,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 85.89691274019133,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3030333520574503,
      "eval_wikibio_runtime": 10.2186,
      "eval_wikibio_samples_per_second": 48.93,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.2890650687520406,
      "eval_wikibio_token_set_f1_sem": 0.005533575726017881,
      "eval_wikibio_token_set_precision": 0.2931095583767006,
      "eval_wikibio_token_set_recall": 0.30019058243190055,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 22500
    },
    {
      "epoch": 4.32,
      "eval_nq_accuracy": 0.46415625,
      "eval_nq_bleu_score": 8.012690074462503,
      "eval_nq_bleu_score_sem": 0.36003050024183963,
      "eval_nq_emb_cos_sim": 0.7436613440513611,
      "eval_nq_emb_cos_sim_sem": 0.009756727402657218,
      "eval_nq_emb_top1_equal": 0.140625,
      "eval_nq_emb_top1_equal_sem": 0.030847557647994725,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.6690025329589844,
      "eval_nq_n_ngrams_match_1": 19.476,
      "eval_nq_n_ngrams_match_2": 6.082,
      "eval_nq_n_ngrams_match_3": 2.396,
      "eval_nq_num_pred_words": 48.134,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 14.425572980527024,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3703957760453091,
      "eval_nq_runtime": 10.4749,
      "eval_nq_samples_per_second": 47.733,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.39134056474466017,
      "eval_nq_token_set_f1_sem": 0.00496001197015542,
      "eval_nq_token_set_precision": 0.33770094633683134,
      "eval_nq_token_set_recall": 0.4800431626741018,
      "eval_nq_true_num_tokens": 64.0,
      "step": 22500
    },
    {
      "epoch": 4.32,
      "learning_rate": 0.001,
      "loss": 3.0399,
      "step": 22512
    },
    {
      "epoch": 4.32,
      "learning_rate": 0.001,
      "loss": 3.0207,
      "step": 22524
    },
    {
      "epoch": 4.33,
      "learning_rate": 0.001,
      "loss": 3.0294,
      "step": 22536
    },
    {
      "epoch": 4.33,
      "learning_rate": 0.001,
      "loss": 3.0149,
      "step": 22548
    },
    {
      "epoch": 4.33,
      "learning_rate": 0.001,
      "loss": 3.032,
      "step": 22560
    },
    {
      "epoch": 4.33,
      "learning_rate": 0.001,
      "loss": 3.0277,
      "step": 22572
    },
    {
      "epoch": 4.34,
      "learning_rate": 0.001,
      "loss": 3.0223,
      "step": 22584
    },
    {
      "epoch": 4.34,
      "learning_rate": 0.001,
      "loss": 3.026,
      "step": 22596
    },
    {
      "epoch": 4.34,
      "learning_rate": 0.001,
      "loss": 3.0252,
      "step": 22608
    },
    {
      "epoch": 4.34,
      "learning_rate": 0.001,
      "loss": 3.0221,
      "step": 22620
    },
    {
      "epoch": 4.35,
      "learning_rate": 0.001,
      "loss": 3.0218,
      "step": 22632
    },
    {
      "epoch": 4.35,
      "learning_rate": 0.001,
      "loss": 3.029,
      "step": 22644
    },
    {
      "epoch": 4.35,
      "learning_rate": 0.001,
      "loss": 3.0148,
      "step": 22656
    },
    {
      "epoch": 4.35,
      "learning_rate": 0.001,
      "loss": 3.0192,
      "step": 22668
    },
    {
      "epoch": 4.35,
      "learning_rate": 0.001,
      "loss": 3.0275,
      "step": 22680
    },
    {
      "epoch": 4.36,
      "learning_rate": 0.001,
      "loss": 3.0207,
      "step": 22692
    },
    {
      "epoch": 4.36,
      "learning_rate": 0.001,
      "loss": 3.0241,
      "step": 22704
    },
    {
      "epoch": 4.36,
      "learning_rate": 0.001,
      "loss": 3.0322,
      "step": 22716
    },
    {
      "epoch": 4.36,
      "learning_rate": 0.001,
      "loss": 3.0211,
      "step": 22728
    },
    {
      "epoch": 4.37,
      "learning_rate": 0.001,
      "loss": 3.0285,
      "step": 22740
    },
    {
      "epoch": 4.37,
      "learning_rate": 0.001,
      "loss": 3.0168,
      "step": 22752
    },
    {
      "epoch": 4.37,
      "learning_rate": 0.001,
      "loss": 3.0208,
      "step": 22764
    },
    {
      "epoch": 4.37,
      "learning_rate": 0.001,
      "loss": 3.0253,
      "step": 22776
    },
    {
      "epoch": 4.38,
      "learning_rate": 0.001,
      "loss": 3.0054,
      "step": 22788
    },
    {
      "epoch": 4.38,
      "learning_rate": 0.001,
      "loss": 3.0273,
      "step": 22800
    },
    {
      "epoch": 4.38,
      "learning_rate": 0.001,
      "loss": 3.0118,
      "step": 22812
    },
    {
      "epoch": 4.38,
      "learning_rate": 0.001,
      "loss": 3.0144,
      "step": 22824
    },
    {
      "epoch": 4.38,
      "learning_rate": 0.001,
      "loss": 3.0147,
      "step": 22836
    },
    {
      "epoch": 4.39,
      "learning_rate": 0.001,
      "loss": 3.0187,
      "step": 22848
    },
    {
      "epoch": 4.39,
      "learning_rate": 0.001,
      "loss": 3.0337,
      "step": 22860
    },
    {
      "epoch": 4.39,
      "learning_rate": 0.001,
      "loss": 3.0271,
      "step": 22872
    },
    {
      "epoch": 4.39,
      "learning_rate": 0.001,
      "loss": 3.0217,
      "step": 22884
    },
    {
      "epoch": 4.4,
      "learning_rate": 0.001,
      "loss": 3.0146,
      "step": 22896
    },
    {
      "epoch": 4.4,
      "learning_rate": 0.001,
      "loss": 3.0197,
      "step": 22908
    },
    {
      "epoch": 4.4,
      "learning_rate": 0.001,
      "loss": 3.0147,
      "step": 22920
    },
    {
      "epoch": 4.4,
      "learning_rate": 0.001,
      "loss": 3.0297,
      "step": 22932
    },
    {
      "epoch": 4.41,
      "learning_rate": 0.001,
      "loss": 3.0136,
      "step": 22944
    },
    {
      "epoch": 4.41,
      "learning_rate": 0.001,
      "loss": 3.0322,
      "step": 22956
    },
    {
      "epoch": 4.41,
      "learning_rate": 0.001,
      "loss": 3.0137,
      "step": 22968
    },
    {
      "epoch": 4.41,
      "learning_rate": 0.001,
      "loss": 3.0146,
      "step": 22980
    },
    {
      "epoch": 4.41,
      "learning_rate": 0.001,
      "loss": 3.0311,
      "step": 22992
    },
    {
      "epoch": 4.42,
      "learning_rate": 0.001,
      "loss": 3.0246,
      "step": 23004
    },
    {
      "epoch": 4.42,
      "learning_rate": 0.001,
      "loss": 3.0264,
      "step": 23016
    },
    {
      "epoch": 4.42,
      "learning_rate": 0.001,
      "loss": 3.0253,
      "step": 23028
    },
    {
      "epoch": 4.42,
      "learning_rate": 0.001,
      "loss": 3.0061,
      "step": 23040
    },
    {
      "epoch": 4.43,
      "learning_rate": 0.001,
      "loss": 3.0181,
      "step": 23052
    },
    {
      "epoch": 4.43,
      "learning_rate": 0.001,
      "loss": 3.0234,
      "step": 23064
    },
    {
      "epoch": 4.43,
      "learning_rate": 0.001,
      "loss": 3.0178,
      "step": 23076
    },
    {
      "epoch": 4.43,
      "learning_rate": 0.001,
      "loss": 3.0152,
      "step": 23088
    },
    {
      "epoch": 4.44,
      "learning_rate": 0.001,
      "loss": 3.0074,
      "step": 23100
    },
    {
      "epoch": 4.44,
      "learning_rate": 0.001,
      "loss": 3.0077,
      "step": 23112
    },
    {
      "epoch": 4.44,
      "learning_rate": 0.001,
      "loss": 3.0109,
      "step": 23124
    },
    {
      "epoch": 4.44,
      "eval_ag_news_accuracy": 0.27584375,
      "eval_ag_news_bleu_score": 3.5716758476163055,
      "eval_ag_news_bleu_score_sem": 0.11865045689571037,
      "eval_ag_news_emb_cos_sim": 0.7046859860420227,
      "eval_ag_news_emb_cos_sim_sem": 0.010416290678993786,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.088851451873779,
      "eval_ag_news_n_ngrams_match_1": 11.364,
      "eval_ag_news_n_ngrams_match_2": 2.094,
      "eval_ag_news_n_ngrams_match_3": 0.508,
      "eval_ag_news_num_pred_words": 45.782,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 59.67131695170793,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2746066976798046,
      "eval_ag_news_runtime": 10.4302,
      "eval_ag_news_samples_per_second": 47.938,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.2950589386254385,
      "eval_ag_news_token_set_f1_sem": 0.004184535708399377,
      "eval_ag_news_token_set_precision": 0.2623980523801081,
      "eval_ag_news_token_set_recall": 0.3553461423306421,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 23125
    },
    {
      "epoch": 4.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.09171875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.041813598444441,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07717302756597874,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5595167875289917,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011354339915410596,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.78218936920166,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.55,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.098,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.296,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.714,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 43.91207631710553,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1570886418900885,
      "eval_anthropic_toxic_prompts_runtime": 10.181,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.111,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.28102924922991435,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005959128838424839,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.31281680572408405,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.29645718755402817,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 23125
    },
    {
      "epoch": 4.44,
      "eval_arxiv_accuracy": 0.30075,
      "eval_arxiv_bleu_score": 3.040229451995313,
      "eval_arxiv_bleu_score_sem": 0.0907469743646014,
      "eval_arxiv_emb_cos_sim": 0.5961027145385742,
      "eval_arxiv_emb_cos_sim_sem": 0.00931088223404079,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.9455220699310303,
      "eval_arxiv_n_ngrams_match_1": 11.344,
      "eval_arxiv_n_ngrams_match_2": 1.888,
      "eval_arxiv_n_ngrams_match_3": 0.322,
      "eval_arxiv_num_pred_words": 38.826,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 51.703323817980994,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2663048317626505,
      "eval_arxiv_runtime": 13.7247,
      "eval_arxiv_samples_per_second": 36.431,
      "eval_arxiv_steps_per_second": 0.073,
      "eval_arxiv_token_set_f1": 0.27179114807822197,
      "eval_arxiv_token_set_f1_sem": 0.004030174693569544,
      "eval_arxiv_token_set_precision": 0.21050964701086558,
      "eval_arxiv_token_set_recall": 0.4164400239696585,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 23125
    },
    {
      "epoch": 4.44,
      "eval_python_code_alpaca_accuracy": 0.126625,
      "eval_python_code_alpaca_bleu_score": 3.100755500958702,
      "eval_python_code_alpaca_bleu_score_sem": 0.09742404613250283,
      "eval_python_code_alpaca_emb_cos_sim": 0.5470049381256104,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009650884153700712,
      "eval_python_code_alpaca_emb_top1_equal": 0.0625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.6031405925750732,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.7,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.444,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.382,
      "eval_python_code_alpaca_num_pred_words": 38.786,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 36.71335526621004,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2311987525560811,
      "eval_python_code_alpaca_runtime": 9.9326,
      "eval_python_code_alpaca_samples_per_second": 50.339,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.35653511157681234,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005007514582943103,
      "eval_python_code_alpaca_token_set_precision": 0.3500110018012393,
      "eval_python_code_alpaca_token_set_recall": 0.3983983015540674,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 23125
    },
    {
      "epoch": 4.44,
      "eval_wikibio_accuracy": 0.2800625,
      "eval_wikibio_bleu_score": 5.066281975791866,
      "eval_wikibio_bleu_score_sem": 0.19559487236539871,
      "eval_wikibio_emb_cos_sim": 0.6641095280647278,
      "eval_wikibio_emb_cos_sim_sem": 0.011152282655293307,
      "eval_wikibio_emb_top1_equal": 0.0703125,
      "eval_wikibio_emb_top1_equal_sem": 0.022687306110270106,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.403389930725098,
      "eval_wikibio_n_ngrams_match_1": 9.242,
      "eval_wikibio_n_ngrams_match_2": 2.836,
      "eval_wikibio_n_ngrams_match_3": 0.962,
      "eval_wikibio_num_pred_words": 36.508,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 81.72744999815691,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.31267778558798526,
      "eval_wikibio_runtime": 10.3121,
      "eval_wikibio_samples_per_second": 48.487,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.29345869824051796,
      "eval_wikibio_token_set_f1_sem": 0.005511322827209962,
      "eval_wikibio_token_set_precision": 0.29730328149697105,
      "eval_wikibio_token_set_recall": 0.305075295519826,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 23125
    },
    {
      "epoch": 4.44,
      "eval_nq_accuracy": 0.46690625,
      "eval_nq_bleu_score": 8.289383059373723,
      "eval_nq_bleu_score_sem": 0.3546366116227681,
      "eval_nq_emb_cos_sim": 0.7408619523048401,
      "eval_nq_emb_cos_sim_sem": 0.01016624396349078,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.6497559547424316,
      "eval_nq_n_ngrams_match_1": 19.884,
      "eval_nq_n_ngrams_match_2": 6.282,
      "eval_nq_n_ngrams_match_3": 2.506,
      "eval_nq_num_pred_words": 48.782,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 14.150584840828412,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.37610081675690465,
      "eval_nq_runtime": 10.4976,
      "eval_nq_samples_per_second": 47.63,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4011719137086193,
      "eval_nq_token_set_f1_sem": 0.0048304294110281375,
      "eval_nq_token_set_precision": 0.3474386134157962,
      "eval_nq_token_set_recall": 0.4929625483803897,
      "eval_nq_true_num_tokens": 64.0,
      "step": 23125
    },
    {
      "epoch": 4.44,
      "learning_rate": 0.001,
      "loss": 3.0158,
      "step": 23136
    },
    {
      "epoch": 4.44,
      "learning_rate": 0.001,
      "loss": 3.0107,
      "step": 23148
    },
    {
      "epoch": 4.45,
      "learning_rate": 0.001,
      "loss": 3.0186,
      "step": 23160
    },
    {
      "epoch": 4.45,
      "learning_rate": 0.001,
      "loss": 3.018,
      "step": 23172
    },
    {
      "epoch": 4.45,
      "learning_rate": 0.001,
      "loss": 3.0184,
      "step": 23184
    },
    {
      "epoch": 4.45,
      "learning_rate": 0.001,
      "loss": 3.0122,
      "step": 23196
    },
    {
      "epoch": 4.46,
      "learning_rate": 0.001,
      "loss": 3.0046,
      "step": 23208
    },
    {
      "epoch": 4.46,
      "learning_rate": 0.001,
      "loss": 3.0081,
      "step": 23220
    },
    {
      "epoch": 4.46,
      "learning_rate": 0.001,
      "loss": 3.0036,
      "step": 23232
    },
    {
      "epoch": 4.46,
      "learning_rate": 0.001,
      "loss": 2.9979,
      "step": 23244
    },
    {
      "epoch": 4.47,
      "learning_rate": 0.001,
      "loss": 3.0157,
      "step": 23256
    },
    {
      "epoch": 4.47,
      "learning_rate": 0.001,
      "loss": 2.995,
      "step": 23268
    },
    {
      "epoch": 4.47,
      "learning_rate": 0.001,
      "loss": 3.0125,
      "step": 23280
    },
    {
      "epoch": 4.47,
      "learning_rate": 0.001,
      "loss": 3.0147,
      "step": 23292
    },
    {
      "epoch": 4.47,
      "learning_rate": 0.001,
      "loss": 3.0167,
      "step": 23304
    },
    {
      "epoch": 4.48,
      "learning_rate": 0.001,
      "loss": 3.0188,
      "step": 23316
    },
    {
      "epoch": 4.48,
      "learning_rate": 0.001,
      "loss": 3.024,
      "step": 23328
    },
    {
      "epoch": 4.48,
      "learning_rate": 0.001,
      "loss": 3.0122,
      "step": 23340
    },
    {
      "epoch": 4.48,
      "learning_rate": 0.001,
      "loss": 3.0071,
      "step": 23352
    },
    {
      "epoch": 4.49,
      "learning_rate": 0.001,
      "loss": 3.0132,
      "step": 23364
    },
    {
      "epoch": 4.49,
      "learning_rate": 0.001,
      "loss": 3.013,
      "step": 23376
    },
    {
      "epoch": 4.49,
      "learning_rate": 0.001,
      "loss": 3.0137,
      "step": 23388
    },
    {
      "epoch": 4.49,
      "learning_rate": 0.001,
      "loss": 3.0051,
      "step": 23400
    },
    {
      "epoch": 4.5,
      "learning_rate": 0.001,
      "loss": 3.0003,
      "step": 23412
    },
    {
      "epoch": 4.5,
      "learning_rate": 0.001,
      "loss": 3.0071,
      "step": 23424
    },
    {
      "epoch": 4.5,
      "learning_rate": 0.001,
      "loss": 3.0141,
      "step": 23436
    },
    {
      "epoch": 4.5,
      "learning_rate": 0.001,
      "loss": 3.0149,
      "step": 23448
    },
    {
      "epoch": 4.5,
      "learning_rate": 0.001,
      "loss": 3.0186,
      "step": 23460
    },
    {
      "epoch": 4.51,
      "learning_rate": 0.001,
      "loss": 3.0047,
      "step": 23472
    },
    {
      "epoch": 4.51,
      "learning_rate": 0.001,
      "loss": 3.012,
      "step": 23484
    },
    {
      "epoch": 4.51,
      "learning_rate": 0.001,
      "loss": 3.0037,
      "step": 23496
    },
    {
      "epoch": 4.51,
      "learning_rate": 0.001,
      "loss": 3.0127,
      "step": 23508
    },
    {
      "epoch": 4.52,
      "learning_rate": 0.001,
      "loss": 3.0153,
      "step": 23520
    },
    {
      "epoch": 4.52,
      "learning_rate": 0.001,
      "loss": 2.9896,
      "step": 23532
    },
    {
      "epoch": 4.52,
      "learning_rate": 0.001,
      "loss": 3.0096,
      "step": 23544
    },
    {
      "epoch": 4.52,
      "learning_rate": 0.001,
      "loss": 3.0086,
      "step": 23556
    },
    {
      "epoch": 4.53,
      "learning_rate": 0.001,
      "loss": 3.0142,
      "step": 23568
    },
    {
      "epoch": 4.53,
      "learning_rate": 0.001,
      "loss": 3.0054,
      "step": 23580
    },
    {
      "epoch": 4.53,
      "learning_rate": 0.001,
      "loss": 3.0078,
      "step": 23592
    },
    {
      "epoch": 4.53,
      "learning_rate": 0.001,
      "loss": 3.0056,
      "step": 23604
    },
    {
      "epoch": 4.53,
      "learning_rate": 0.001,
      "loss": 3.0137,
      "step": 23616
    },
    {
      "epoch": 4.54,
      "learning_rate": 0.001,
      "loss": 3.01,
      "step": 23628
    },
    {
      "epoch": 4.54,
      "learning_rate": 0.001,
      "loss": 3.0035,
      "step": 23640
    },
    {
      "epoch": 4.54,
      "learning_rate": 0.001,
      "loss": 3.0047,
      "step": 23652
    },
    {
      "epoch": 4.54,
      "learning_rate": 0.001,
      "loss": 2.9867,
      "step": 23664
    },
    {
      "epoch": 4.55,
      "learning_rate": 0.001,
      "loss": 3.0107,
      "step": 23676
    },
    {
      "epoch": 4.55,
      "learning_rate": 0.001,
      "loss": 3.0122,
      "step": 23688
    },
    {
      "epoch": 4.55,
      "learning_rate": 0.001,
      "loss": 3.0137,
      "step": 23700
    },
    {
      "epoch": 4.55,
      "learning_rate": 0.001,
      "loss": 3.0156,
      "step": 23712
    },
    {
      "epoch": 4.56,
      "learning_rate": 0.001,
      "loss": 3.0039,
      "step": 23724
    },
    {
      "epoch": 4.56,
      "learning_rate": 0.001,
      "loss": 3.0087,
      "step": 23736
    },
    {
      "epoch": 4.56,
      "learning_rate": 0.001,
      "loss": 3.0042,
      "step": 23748
    },
    {
      "epoch": 4.56,
      "eval_ag_news_accuracy": 0.275875,
      "eval_ag_news_bleu_score": 3.5397518539873793,
      "eval_ag_news_bleu_score_sem": 0.12067641621710735,
      "eval_ag_news_emb_cos_sim": 0.6891865134239197,
      "eval_ag_news_emb_cos_sim_sem": 0.012085329587627478,
      "eval_ag_news_emb_top1_equal": 0.15625,
      "eval_ag_news_emb_top1_equal_sem": 0.03221922156442571,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.0836076736450195,
      "eval_ag_news_n_ngrams_match_1": 11.06,
      "eval_ag_news_n_ngrams_match_2": 2.022,
      "eval_ag_news_n_ngrams_match_3": 0.504,
      "eval_ag_news_num_pred_words": 45.242,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 59.35923276424905,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.26684539389394224,
      "eval_ag_news_runtime": 10.4582,
      "eval_ag_news_samples_per_second": 47.809,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.2902814941135996,
      "eval_ag_news_token_set_f1_sem": 0.0043703756993541,
      "eval_ag_news_token_set_precision": 0.25647585702989034,
      "eval_ag_news_token_set_recall": 0.3601458055025252,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 23750
    },
    {
      "epoch": 4.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.094,
      "eval_anthropic_toxic_prompts_bleu_score": 2.193721829876199,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08145666558247794,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5685354471206665,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011214567794858311,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.7131049633026123,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.572,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.132,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.344,
      "eval_anthropic_toxic_prompts_num_pred_words": 45.96,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 40.980853232638694,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.16141705535505432,
      "eval_anthropic_toxic_prompts_runtime": 9.789,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.078,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.28932568749256143,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00625910267907374,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.314695707907464,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.30503461660418046,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 23750
    },
    {
      "epoch": 4.56,
      "eval_arxiv_accuracy": 0.3011875,
      "eval_arxiv_bleu_score": 3.011845975896167,
      "eval_arxiv_bleu_score_sem": 0.08987892206460524,
      "eval_arxiv_emb_cos_sim": 0.613021969795227,
      "eval_arxiv_emb_cos_sim_sem": 0.008598133586150521,
      "eval_arxiv_emb_top1_equal": 0.171875,
      "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.9512102603912354,
      "eval_arxiv_n_ngrams_match_1": 11.304,
      "eval_arxiv_n_ngrams_match_2": 1.814,
      "eval_arxiv_n_ngrams_match_3": 0.324,
      "eval_arxiv_num_pred_words": 38.038,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 51.9982602032154,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2663286013534136,
      "eval_arxiv_runtime": 10.6499,
      "eval_arxiv_samples_per_second": 46.949,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.2689519282328701,
      "eval_arxiv_token_set_f1_sem": 0.00411081549883377,
      "eval_arxiv_token_set_precision": 0.21111625711114063,
      "eval_arxiv_token_set_recall": 0.39833791665110174,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 23750
    },
    {
      "epoch": 4.56,
      "eval_python_code_alpaca_accuracy": 0.12703125,
      "eval_python_code_alpaca_bleu_score": 3.0343890236566384,
      "eval_python_code_alpaca_bleu_score_sem": 0.09849310413209293,
      "eval_python_code_alpaca_emb_cos_sim": 0.54156094789505,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010395470794373914,
      "eval_python_code_alpaca_emb_top1_equal": 0.0390625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.017191973462108996,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.565164089202881,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.524,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.414,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.394,
      "eval_python_code_alpaca_num_pred_words": 38.316,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 35.345252702807166,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.22337682857220353,
      "eval_python_code_alpaca_runtime": 9.6829,
      "eval_python_code_alpaca_samples_per_second": 51.638,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.3470054609207911,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005621856247773359,
      "eval_python_code_alpaca_token_set_precision": 0.33591384926898127,
      "eval_python_code_alpaca_token_set_recall": 0.3999205686455455,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 23750
    },
    {
      "epoch": 4.56,
      "eval_wikibio_accuracy": 0.279125,
      "eval_wikibio_bleu_score": 4.8945065719545875,
      "eval_wikibio_bleu_score_sem": 0.19082108832708544,
      "eval_wikibio_emb_cos_sim": 0.6529830694198608,
      "eval_wikibio_emb_cos_sim_sem": 0.01165056377181335,
      "eval_wikibio_emb_top1_equal": 0.1171875,
      "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.377610206604004,
      "eval_wikibio_n_ngrams_match_1": 8.778,
      "eval_wikibio_n_ngrams_match_2": 2.776,
      "eval_wikibio_n_ngrams_match_3": 0.95,
      "eval_wikibio_num_pred_words": 35.324,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 79.64746480059652,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.2988186803748457,
      "eval_wikibio_runtime": 9.9426,
      "eval_wikibio_samples_per_second": 50.289,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.28439968007820415,
      "eval_wikibio_token_set_f1_sem": 0.005827573427971639,
      "eval_wikibio_token_set_precision": 0.28420933076874544,
      "eval_wikibio_token_set_recall": 0.3034326327683142,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 23750
    },
    {
      "epoch": 4.56,
      "eval_nq_accuracy": 0.46890625,
      "eval_nq_bleu_score": 8.609746563204862,
      "eval_nq_bleu_score_sem": 0.38085086835226667,
      "eval_nq_emb_cos_sim": 0.739669919013977,
      "eval_nq_emb_cos_sim_sem": 0.00985908234135494,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.642179250717163,
      "eval_nq_n_ngrams_match_1": 19.79,
      "eval_nq_n_ngrams_match_2": 6.48,
      "eval_nq_n_ngrams_match_3": 2.642,
      "eval_nq_num_pred_words": 48.38,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 14.043775191219092,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.37494113202813445,
      "eval_nq_runtime": 10.3068,
      "eval_nq_samples_per_second": 48.512,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.40122709431653997,
      "eval_nq_token_set_f1_sem": 0.0048887649521433645,
      "eval_nq_token_set_precision": 0.3444154276079394,
      "eval_nq_token_set_recall": 0.4984478763313491,
      "eval_nq_true_num_tokens": 64.0,
      "step": 23750
    },
    {
      "epoch": 4.56,
      "learning_rate": 0.001,
      "loss": 3.0077,
      "step": 23760
    },
    {
      "epoch": 4.56,
      "learning_rate": 0.001,
      "loss": 3.0059,
      "step": 23772
    },
    {
      "epoch": 4.57,
      "learning_rate": 0.001,
      "loss": 3.0079,
      "step": 23784
    },
    {
      "epoch": 4.57,
      "learning_rate": 0.001,
      "loss": 2.998,
      "step": 23796
    },
    {
      "epoch": 4.57,
      "learning_rate": 0.001,
      "loss": 2.9974,
      "step": 23808
    },
    {
      "epoch": 4.57,
      "learning_rate": 0.001,
      "loss": 2.9964,
      "step": 23820
    },
    {
      "epoch": 4.58,
      "learning_rate": 0.001,
      "loss": 3.0073,
      "step": 23832
    },
    {
      "epoch": 4.58,
      "learning_rate": 0.001,
      "loss": 3.0082,
      "step": 23844
    },
    {
      "epoch": 4.58,
      "learning_rate": 0.001,
      "loss": 3.0082,
      "step": 23856
    },
    {
      "epoch": 4.58,
      "learning_rate": 0.001,
      "loss": 3.0003,
      "step": 23868
    },
    {
      "epoch": 4.59,
      "learning_rate": 0.001,
      "loss": 2.9925,
      "step": 23880
    },
    {
      "epoch": 4.59,
      "learning_rate": 0.001,
      "loss": 2.9918,
      "step": 23892
    },
    {
      "epoch": 4.59,
      "learning_rate": 0.001,
      "loss": 2.9889,
      "step": 23904
    },
    {
      "epoch": 4.59,
      "learning_rate": 0.001,
      "loss": 3.0067,
      "step": 23916
    },
    {
      "epoch": 4.59,
      "learning_rate": 0.001,
      "loss": 2.9942,
      "step": 23928
    },
    {
      "epoch": 4.6,
      "learning_rate": 0.001,
      "loss": 3.0069,
      "step": 23940
    },
    {
      "epoch": 4.6,
      "learning_rate": 0.001,
      "loss": 3.0039,
      "step": 23952
    },
    {
      "epoch": 4.6,
      "learning_rate": 0.001,
      "loss": 2.9926,
      "step": 23964
    },
    {
      "epoch": 4.6,
      "learning_rate": 0.001,
      "loss": 3.0109,
      "step": 23976
    },
    {
      "epoch": 4.61,
      "learning_rate": 0.001,
      "loss": 3.0086,
      "step": 23988
    },
    {
      "epoch": 4.61,
      "learning_rate": 0.001,
      "loss": 2.994,
      "step": 24000
    },
    {
      "epoch": 4.61,
      "learning_rate": 0.001,
      "loss": 3.0047,
      "step": 24012
    },
    {
      "epoch": 4.61,
      "learning_rate": 0.001,
      "loss": 2.9932,
      "step": 24024
    },
    {
      "epoch": 4.62,
      "learning_rate": 0.001,
      "loss": 3.0083,
      "step": 24036
    },
    {
      "epoch": 4.62,
      "learning_rate": 0.001,
      "loss": 3.0119,
      "step": 24048
    },
    {
      "epoch": 4.62,
      "learning_rate": 0.001,
      "loss": 3.0039,
      "step": 24060
    },
    {
      "epoch": 4.62,
      "learning_rate": 0.001,
      "loss": 3.0,
      "step": 24072
    },
    {
      "epoch": 4.62,
      "learning_rate": 0.001,
      "loss": 3.0063,
      "step": 24084
    },
    {
      "epoch": 4.63,
      "learning_rate": 0.001,
      "loss": 2.9986,
      "step": 24096
    },
    {
      "epoch": 4.63,
      "learning_rate": 0.001,
      "loss": 3.003,
      "step": 24108
    },
    {
      "epoch": 4.63,
      "learning_rate": 0.001,
      "loss": 2.9984,
      "step": 24120
    },
    {
      "epoch": 4.63,
      "learning_rate": 0.001,
      "loss": 2.9971,
      "step": 24132
    },
    {
      "epoch": 4.64,
      "learning_rate": 0.001,
      "loss": 2.9948,
      "step": 24144
    },
    {
      "epoch": 4.64,
      "learning_rate": 0.001,
      "loss": 2.988,
      "step": 24156
    },
    {
      "epoch": 4.64,
      "learning_rate": 0.001,
      "loss": 2.9984,
      "step": 24168
    },
    {
      "epoch": 4.64,
      "learning_rate": 0.001,
      "loss": 3.0062,
      "step": 24180
    },
    {
      "epoch": 4.65,
      "learning_rate": 0.001,
      "loss": 3.0138,
      "step": 24192
    },
    {
      "epoch": 4.65,
      "learning_rate": 0.001,
      "loss": 3.0036,
      "step": 24204
    },
    {
      "epoch": 4.65,
      "learning_rate": 0.001,
      "loss": 2.9957,
      "step": 24216
    },
    {
      "epoch": 4.65,
      "learning_rate": 0.001,
      "loss": 2.9934,
      "step": 24228
    },
    {
      "epoch": 4.65,
      "learning_rate": 0.001,
      "loss": 2.99,
      "step": 24240
    },
    {
      "epoch": 4.66,
      "learning_rate": 0.001,
      "loss": 3.0084,
      "step": 24252
    },
    {
      "epoch": 4.66,
      "learning_rate": 0.001,
      "loss": 2.9995,
      "step": 24264
    },
    {
      "epoch": 4.66,
      "learning_rate": 0.001,
      "loss": 2.9944,
      "step": 24276
    },
    {
      "epoch": 4.66,
      "learning_rate": 0.001,
      "loss": 3.003,
      "step": 24288
    },
    {
      "epoch": 4.67,
      "learning_rate": 0.001,
      "loss": 2.9999,
      "step": 24300
    },
    {
      "epoch": 4.67,
      "learning_rate": 0.001,
      "loss": 2.9963,
      "step": 24312
    },
    {
      "epoch": 4.67,
      "learning_rate": 0.001,
      "loss": 2.9861,
      "step": 24324
    },
    {
      "epoch": 4.67,
      "learning_rate": 0.001,
      "loss": 2.9948,
      "step": 24336
    },
    {
      "epoch": 4.68,
      "learning_rate": 0.001,
      "loss": 2.9936,
      "step": 24348
    },
    {
      "epoch": 4.68,
      "learning_rate": 0.001,
      "loss": 3.0048,
      "step": 24360
    },
    {
      "epoch": 4.68,
      "learning_rate": 0.001,
      "loss": 2.9855,
      "step": 24372
    },
    {
      "epoch": 4.68,
      "eval_ag_news_accuracy": 0.27665625,
      "eval_ag_news_bleu_score": 3.644798243689116,
      "eval_ag_news_bleu_score_sem": 0.1299041355323708,
      "eval_ag_news_emb_cos_sim": 0.6989701986312866,
      "eval_ag_news_emb_cos_sim_sem": 0.010000469213040798,
      "eval_ag_news_emb_top1_equal": 0.1171875,
      "eval_ag_news_emb_top1_equal_sem": 0.02854125312152025,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.0787811279296875,
      "eval_ag_news_n_ngrams_match_1": 11.234,
      "eval_ag_news_n_ngrams_match_2": 2.106,
      "eval_ag_news_n_ngrams_match_3": 0.546,
      "eval_ag_news_num_pred_words": 44.924,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 59.07342300545964,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.27277121295924545,
      "eval_ag_news_runtime": 10.424,
      "eval_ag_news_samples_per_second": 47.966,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.29059431810262737,
      "eval_ag_news_token_set_f1_sem": 0.004478048907414098,
      "eval_ag_news_token_set_precision": 0.2588923037101447,
      "eval_ag_news_token_set_recall": 0.35042289837613494,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 24375
    },
    {
      "epoch": 4.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.09340625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.1054566808337136,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08245272589551196,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5708047747612,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011311717597764939,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.742683172225952,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.5,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.12,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.326,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.772,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 42.21109799901625,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1562143835309157,
      "eval_anthropic_toxic_prompts_runtime": 10.9041,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.854,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.092,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2862036345018115,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005856753564749602,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3087704328041896,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.30892651621646117,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 24375
    },
    {
      "epoch": 4.68,
      "eval_arxiv_accuracy": 0.30275,
      "eval_arxiv_bleu_score": 3.1402423639370554,
      "eval_arxiv_bleu_score_sem": 0.09006107100832386,
      "eval_arxiv_emb_cos_sim": 0.619462251663208,
      "eval_arxiv_emb_cos_sim_sem": 0.008330594611426477,
      "eval_arxiv_emb_top1_equal": 0.1953125,
      "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.937114953994751,
      "eval_arxiv_n_ngrams_match_1": 11.604,
      "eval_arxiv_n_ngrams_match_2": 1.886,
      "eval_arxiv_n_ngrams_match_3": 0.356,
      "eval_arxiv_num_pred_words": 39.162,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 51.27047005572478,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2718459092513821,
      "eval_arxiv_runtime": 10.1367,
      "eval_arxiv_samples_per_second": 49.326,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.2743544716040028,
      "eval_arxiv_token_set_f1_sem": 0.003996136267723946,
      "eval_arxiv_token_set_precision": 0.21419747038897358,
      "eval_arxiv_token_set_recall": 0.40853309748221917,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 24375
    },
    {
      "epoch": 4.68,
      "eval_python_code_alpaca_accuracy": 0.12659375,
      "eval_python_code_alpaca_bleu_score": 3.072292444778623,
      "eval_python_code_alpaca_bleu_score_sem": 0.1001348948944544,
      "eval_python_code_alpaca_emb_cos_sim": 0.5532311201095581,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010581854036656707,
      "eval_python_code_alpaca_emb_top1_equal": 0.046875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.5399537086486816,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.662,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.446,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.408,
      "eval_python_code_alpaca_num_pred_words": 38.43,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 34.46532370752109,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.22826250412062343,
      "eval_python_code_alpaca_runtime": 9.9313,
      "eval_python_code_alpaca_samples_per_second": 50.346,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.35498431158216953,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005443706258915265,
      "eval_python_code_alpaca_token_set_precision": 0.3443360342008259,
      "eval_python_code_alpaca_token_set_recall": 0.40491970019595763,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 24375
    },
    {
      "epoch": 4.68,
      "eval_wikibio_accuracy": 0.284375,
      "eval_wikibio_bleu_score": 5.154139234359491,
      "eval_wikibio_bleu_score_sem": 0.1850111456141721,
      "eval_wikibio_emb_cos_sim": 0.6732481718063354,
      "eval_wikibio_emb_cos_sim_sem": 0.010341499779222189,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.316174030303955,
      "eval_wikibio_n_ngrams_match_1": 9.688,
      "eval_wikibio_n_ngrams_match_2": 3.034,
      "eval_wikibio_n_ngrams_match_3": 1.022,
      "eval_wikibio_num_pred_words": 37.874,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 74.90150848477074,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.31958321186210603,
      "eval_wikibio_runtime": 10.0591,
      "eval_wikibio_samples_per_second": 49.706,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.30491646176297693,
      "eval_wikibio_token_set_f1_sem": 0.005066329455645657,
      "eval_wikibio_token_set_precision": 0.3104134109672024,
      "eval_wikibio_token_set_recall": 0.31529480944426247,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 24375
    },
    {
      "epoch": 4.68,
      "eval_nq_accuracy": 0.4685625,
      "eval_nq_bleu_score": 8.24652177095543,
      "eval_nq_bleu_score_sem": 0.3709734844268983,
      "eval_nq_emb_cos_sim": 0.7405206561088562,
      "eval_nq_emb_cos_sim_sem": 0.010417774379770272,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.6306822299957275,
      "eval_nq_n_ngrams_match_1": 19.568,
      "eval_nq_n_ngrams_match_2": 6.206,
      "eval_nq_n_ngrams_match_3": 2.514,
      "eval_nq_num_pred_words": 47.938,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 13.883238233533564,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3706542074447209,
      "eval_nq_runtime": 10.4574,
      "eval_nq_samples_per_second": 47.813,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.39825322857668644,
      "eval_nq_token_set_f1_sem": 0.004999985940615028,
      "eval_nq_token_set_precision": 0.3407341508509416,
      "eval_nq_token_set_recall": 0.4980007119901405,
      "eval_nq_true_num_tokens": 64.0,
      "step": 24375
    },
    {
      "epoch": 4.68,
      "learning_rate": 0.001,
      "loss": 2.9834,
      "step": 24384
    },
    {
      "epoch": 4.68,
      "learning_rate": 0.001,
      "loss": 2.9895,
      "step": 24396
    },
    {
      "epoch": 4.69,
      "learning_rate": 0.001,
      "loss": 3.003,
      "step": 24408
    },
    {
      "epoch": 4.69,
      "learning_rate": 0.001,
      "loss": 2.9974,
      "step": 24420
    },
    {
      "epoch": 4.69,
      "learning_rate": 0.001,
      "loss": 3.0005,
      "step": 24432
    },
    {
      "epoch": 4.69,
      "learning_rate": 0.001,
      "loss": 2.9975,
      "step": 24444
    },
    {
      "epoch": 4.7,
      "learning_rate": 0.001,
      "loss": 2.9831,
      "step": 24456
    },
    {
      "epoch": 4.7,
      "learning_rate": 0.001,
      "loss": 2.9955,
      "step": 24468
    },
    {
      "epoch": 4.7,
      "learning_rate": 0.001,
      "loss": 2.9911,
      "step": 24480
    },
    {
      "epoch": 4.7,
      "learning_rate": 0.001,
      "loss": 2.9894,
      "step": 24492
    },
    {
      "epoch": 4.71,
      "learning_rate": 0.001,
      "loss": 2.9917,
      "step": 24504
    },
    {
      "epoch": 4.71,
      "learning_rate": 0.001,
      "loss": 2.9945,
      "step": 24516
    },
    {
      "epoch": 4.71,
      "learning_rate": 0.001,
      "loss": 2.9969,
      "step": 24528
    },
    {
      "epoch": 4.71,
      "learning_rate": 0.001,
      "loss": 2.9967,
      "step": 24540
    },
    {
      "epoch": 4.71,
      "learning_rate": 0.001,
      "loss": 2.9911,
      "step": 24552
    },
    {
      "epoch": 4.72,
      "learning_rate": 0.001,
      "loss": 3.0025,
      "step": 24564
    },
    {
      "epoch": 4.72,
      "learning_rate": 0.001,
      "loss": 2.9916,
      "step": 24576
    },
    {
      "epoch": 4.72,
      "learning_rate": 0.001,
      "loss": 2.9874,
      "step": 24588
    },
    {
      "epoch": 4.72,
      "learning_rate": 0.001,
      "loss": 3.0062,
      "step": 24600
    },
    {
      "epoch": 4.73,
      "learning_rate": 0.001,
      "loss": 2.9975,
      "step": 24612
    },
    {
      "epoch": 4.73,
      "learning_rate": 0.001,
      "loss": 2.996,
      "step": 24624
    },
    {
      "epoch": 4.73,
      "learning_rate": 0.001,
      "loss": 2.9879,
      "step": 24636
    },
    {
      "epoch": 4.73,
      "learning_rate": 0.001,
      "loss": 2.9809,
      "step": 24648
    },
    {
      "epoch": 4.74,
      "learning_rate": 0.001,
      "loss": 2.9995,
      "step": 24660
    },
    {
      "epoch": 4.74,
      "learning_rate": 0.001,
      "loss": 2.993,
      "step": 24672
    },
    {
      "epoch": 4.74,
      "learning_rate": 0.001,
      "loss": 2.9959,
      "step": 24684
    },
    {
      "epoch": 4.74,
      "learning_rate": 0.001,
      "loss": 2.9916,
      "step": 24696
    },
    {
      "epoch": 4.74,
      "learning_rate": 0.001,
      "loss": 2.9916,
      "step": 24708
    },
    {
      "epoch": 4.75,
      "learning_rate": 0.001,
      "loss": 2.9848,
      "step": 24720
    },
    {
      "epoch": 4.75,
      "learning_rate": 0.001,
      "loss": 2.988,
      "step": 24732
    },
    {
      "epoch": 4.75,
      "learning_rate": 0.001,
      "loss": 2.9829,
      "step": 24744
    },
    {
      "epoch": 4.75,
      "learning_rate": 0.001,
      "loss": 2.9838,
      "step": 24756
    },
    {
      "epoch": 4.76,
      "learning_rate": 0.001,
      "loss": 2.9878,
      "step": 24768
    },
    {
      "epoch": 4.76,
      "learning_rate": 0.001,
      "loss": 3.0004,
      "step": 24780
    },
    {
      "epoch": 4.76,
      "learning_rate": 0.001,
      "loss": 2.9915,
      "step": 24792
    },
    {
      "epoch": 4.76,
      "learning_rate": 0.001,
      "loss": 2.9997,
      "step": 24804
    },
    {
      "epoch": 4.76,
      "learning_rate": 0.001,
      "loss": 2.9891,
      "step": 24816
    },
    {
      "epoch": 4.77,
      "learning_rate": 0.001,
      "loss": 2.988,
      "step": 24828
    },
    {
      "epoch": 4.77,
      "learning_rate": 0.001,
      "loss": 2.9948,
      "step": 24840
    },
    {
      "epoch": 4.77,
      "learning_rate": 0.001,
      "loss": 2.9882,
      "step": 24852
    },
    {
      "epoch": 4.77,
      "learning_rate": 0.001,
      "loss": 2.9821,
      "step": 24864
    },
    {
      "epoch": 4.78,
      "learning_rate": 0.001,
      "loss": 2.9825,
      "step": 24876
    },
    {
      "epoch": 4.78,
      "learning_rate": 0.001,
      "loss": 2.9981,
      "step": 24888
    },
    {
      "epoch": 4.78,
      "learning_rate": 0.001,
      "loss": 2.9728,
      "step": 24900
    },
    {
      "epoch": 4.78,
      "learning_rate": 0.001,
      "loss": 2.981,
      "step": 24912
    },
    {
      "epoch": 4.79,
      "learning_rate": 0.001,
      "loss": 2.9841,
      "step": 24924
    },
    {
      "epoch": 4.79,
      "learning_rate": 0.001,
      "loss": 2.9882,
      "step": 24936
    },
    {
      "epoch": 4.79,
      "learning_rate": 0.001,
      "loss": 2.9811,
      "step": 24948
    },
    {
      "epoch": 4.79,
      "learning_rate": 0.001,
      "loss": 2.9894,
      "step": 24960
    },
    {
      "epoch": 4.79,
      "learning_rate": 0.001,
      "loss": 2.981,
      "step": 24972
    },
    {
      "epoch": 4.8,
      "learning_rate": 0.001,
      "loss": 2.9852,
      "step": 24984
    },
    {
      "epoch": 4.8,
      "learning_rate": 0.001,
      "loss": 2.9903,
      "step": 24996
    },
    {
      "epoch": 4.8,
      "eval_ag_news_accuracy": 0.27865625,
      "eval_ag_news_bleu_score": 3.747678119171919,
      "eval_ag_news_bleu_score_sem": 0.13037092337861464,
      "eval_ag_news_emb_cos_sim": 0.7191460132598877,
      "eval_ag_news_emb_cos_sim_sem": 0.009588980969950336,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.050067901611328,
      "eval_ag_news_n_ngrams_match_1": 11.504,
      "eval_ag_news_n_ngrams_match_2": 2.24,
      "eval_ag_news_n_ngrams_match_3": 0.59,
      "eval_ag_news_num_pred_words": 45.704,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 57.4013545575879,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2807045959733674,
      "eval_ag_news_runtime": 10.3313,
      "eval_ag_news_samples_per_second": 48.397,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.30009010921783813,
      "eval_ag_news_token_set_f1_sem": 0.004246243981141805,
      "eval_ag_news_token_set_precision": 0.26924921208357205,
      "eval_ag_news_token_set_recall": 0.3620972837105612,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 25000
    },
    {
      "epoch": 4.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.093375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.0508207428638174,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07412518839495048,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.570429801940918,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011918807389160936,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.737182378768921,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.434,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.064,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.292,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.018,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 41.979540925032836,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.15830046017744343,
      "eval_anthropic_toxic_prompts_runtime": 9.9591,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.206,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.27491261021400826,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005592796108221736,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3121017692273368,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.28282212773580107,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 25000
    },
    {
      "epoch": 4.8,
      "eval_arxiv_accuracy": 0.3051875,
      "eval_arxiv_bleu_score": 3.1548723493531314,
      "eval_arxiv_bleu_score_sem": 0.09704557643732598,
      "eval_arxiv_emb_cos_sim": 0.6206685304641724,
      "eval_arxiv_emb_cos_sim_sem": 0.009327608605999044,
      "eval_arxiv_emb_top1_equal": 0.1953125,
      "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.902249574661255,
      "eval_arxiv_n_ngrams_match_1": 11.626,
      "eval_arxiv_n_ngrams_match_2": 1.958,
      "eval_arxiv_n_ngrams_match_3": 0.356,
      "eval_arxiv_num_pred_words": 39.038,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 49.513708699703955,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.27366911104957703,
      "eval_arxiv_runtime": 10.4192,
      "eval_arxiv_samples_per_second": 47.988,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.27777530300356346,
      "eval_arxiv_token_set_f1_sem": 0.004012616347432226,
      "eval_arxiv_token_set_precision": 0.21788463919753517,
      "eval_arxiv_token_set_recall": 0.41003887398809874,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 25000
    },
    {
      "epoch": 4.8,
      "eval_python_code_alpaca_accuracy": 0.12925,
      "eval_python_code_alpaca_bleu_score": 3.2897174079593485,
      "eval_python_code_alpaca_bleu_score_sem": 0.11101639920129172,
      "eval_python_code_alpaca_emb_cos_sim": 0.5781171917915344,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011459346154297457,
      "eval_python_code_alpaca_emb_top1_equal": 0.0390625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.017191973462108996,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.478679895401001,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.912,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.528,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.434,
      "eval_python_code_alpaca_num_pred_words": 38.444,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 32.416900118419434,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.24115608277986825,
      "eval_python_code_alpaca_runtime": 10.0311,
      "eval_python_code_alpaca_samples_per_second": 49.845,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.364292714766359,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005946908469769408,
      "eval_python_code_alpaca_token_set_precision": 0.35964730621929325,
      "eval_python_code_alpaca_token_set_recall": 0.40374706398445964,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 25000
    },
    {
      "epoch": 4.8,
      "eval_wikibio_accuracy": 0.2849375,
      "eval_wikibio_bleu_score": 5.055257940902527,
      "eval_wikibio_bleu_score_sem": 0.18018427425628075,
      "eval_wikibio_emb_cos_sim": 0.6791350245475769,
      "eval_wikibio_emb_cos_sim_sem": 0.010853030811242847,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.277873992919922,
      "eval_wikibio_n_ngrams_match_1": 9.44,
      "eval_wikibio_n_ngrams_match_2": 3.014,
      "eval_wikibio_n_ngrams_match_3": 0.97,
      "eval_wikibio_num_pred_words": 37.816,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 72.08701946476889,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3152798645478886,
      "eval_wikibio_runtime": 10.3973,
      "eval_wikibio_samples_per_second": 48.089,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.29810819329785637,
      "eval_wikibio_token_set_f1_sem": 0.005348598253871104,
      "eval_wikibio_token_set_precision": 0.3038744686734838,
      "eval_wikibio_token_set_recall": 0.3066718056248075,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 25000
    },
    {
      "epoch": 4.8,
      "eval_nq_accuracy": 0.4716875,
      "eval_nq_bleu_score": 8.662555415990717,
      "eval_nq_bleu_score_sem": 0.38224303326992015,
      "eval_nq_emb_cos_sim": 0.7517282962799072,
      "eval_nq_emb_cos_sim_sem": 0.010072708252843962,
      "eval_nq_emb_top1_equal": 0.203125,
      "eval_nq_emb_top1_equal_sem": 0.03570055125142555,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.6186509132385254,
      "eval_nq_n_ngrams_match_1": 20.014,
      "eval_nq_n_ngrams_match_2": 6.522,
      "eval_nq_n_ngrams_match_3": 2.694,
      "eval_nq_num_pred_words": 47.998,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 13.71720539636029,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3819900413593659,
      "eval_nq_runtime": 11.0052,
      "eval_nq_samples_per_second": 45.433,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4037235175729149,
      "eval_nq_token_set_f1_sem": 0.004969695235582259,
      "eval_nq_token_set_precision": 0.3498406145879189,
      "eval_nq_token_set_recall": 0.49303426319603655,
      "eval_nq_true_num_tokens": 64.0,
      "step": 25000
    },
    {
      "epoch": 4.8,
      "learning_rate": 0.001,
      "loss": 2.9872,
      "step": 25008
    },
    {
      "epoch": 4.8,
      "learning_rate": 0.001,
      "loss": 2.9775,
      "step": 25020
    },
    {
      "epoch": 4.81,
      "learning_rate": 0.001,
      "loss": 2.9838,
      "step": 25032
    },
    {
      "epoch": 4.81,
      "learning_rate": 0.001,
      "loss": 2.9922,
      "step": 25044
    },
    {
      "epoch": 4.81,
      "learning_rate": 0.001,
      "loss": 2.9837,
      "step": 25056
    },
    {
      "epoch": 4.81,
      "learning_rate": 0.001,
      "loss": 2.9925,
      "step": 25068
    },
    {
      "epoch": 4.82,
      "learning_rate": 0.001,
      "loss": 2.9751,
      "step": 25080
    },
    {
      "epoch": 4.82,
      "learning_rate": 0.001,
      "loss": 2.9843,
      "step": 25092
    },
    {
      "epoch": 4.82,
      "learning_rate": 0.001,
      "loss": 2.97,
      "step": 25104
    },
    {
      "epoch": 4.82,
      "learning_rate": 0.001,
      "loss": 2.9744,
      "step": 25116
    },
    {
      "epoch": 4.82,
      "learning_rate": 0.001,
      "loss": 2.9828,
      "step": 25128
    },
    {
      "epoch": 4.83,
      "learning_rate": 0.001,
      "loss": 2.9771,
      "step": 25140
    },
    {
      "epoch": 4.83,
      "learning_rate": 0.001,
      "loss": 2.9746,
      "step": 25152
    },
    {
      "epoch": 4.83,
      "learning_rate": 0.001,
      "loss": 2.9837,
      "step": 25164
    },
    {
      "epoch": 4.83,
      "learning_rate": 0.001,
      "loss": 2.9931,
      "step": 25176
    },
    {
      "epoch": 4.84,
      "learning_rate": 0.001,
      "loss": 2.9874,
      "step": 25188
    },
    {
      "epoch": 4.84,
      "learning_rate": 0.001,
      "loss": 2.9817,
      "step": 25200
    },
    {
      "epoch": 4.84,
      "learning_rate": 0.001,
      "loss": 2.9874,
      "step": 25212
    },
    {
      "epoch": 4.84,
      "learning_rate": 0.001,
      "loss": 2.9911,
      "step": 25224
    },
    {
      "epoch": 4.85,
      "learning_rate": 0.001,
      "loss": 2.9712,
      "step": 25236
    },
    {
      "epoch": 4.85,
      "learning_rate": 0.001,
      "loss": 2.9869,
      "step": 25248
    },
    {
      "epoch": 4.85,
      "learning_rate": 0.001,
      "loss": 2.9764,
      "step": 25260
    },
    {
      "epoch": 4.85,
      "learning_rate": 0.001,
      "loss": 2.9789,
      "step": 25272
    },
    {
      "epoch": 4.85,
      "learning_rate": 0.001,
      "loss": 2.9787,
      "step": 25284
    },
    {
      "epoch": 4.86,
      "learning_rate": 0.001,
      "loss": 2.9921,
      "step": 25296
    },
    {
      "epoch": 4.86,
      "learning_rate": 0.001,
      "loss": 2.9915,
      "step": 25308
    },
    {
      "epoch": 4.86,
      "learning_rate": 0.001,
      "loss": 2.9751,
      "step": 25320
    },
    {
      "epoch": 4.86,
      "learning_rate": 0.001,
      "loss": 2.975,
      "step": 25332
    },
    {
      "epoch": 4.87,
      "learning_rate": 0.001,
      "loss": 2.9876,
      "step": 25344
    },
    {
      "epoch": 4.87,
      "learning_rate": 0.001,
      "loss": 2.9802,
      "step": 25356
    },
    {
      "epoch": 4.87,
      "learning_rate": 0.001,
      "loss": 2.9779,
      "step": 25368
    },
    {
      "epoch": 4.87,
      "learning_rate": 0.001,
      "loss": 2.9722,
      "step": 25380
    },
    {
      "epoch": 4.88,
      "learning_rate": 0.001,
      "loss": 2.9733,
      "step": 25392
    },
    {
      "epoch": 4.88,
      "learning_rate": 0.001,
      "loss": 2.9747,
      "step": 25404
    },
    {
      "epoch": 4.88,
      "learning_rate": 0.001,
      "loss": 2.9793,
      "step": 25416
    },
    {
      "epoch": 4.88,
      "learning_rate": 0.001,
      "loss": 2.9686,
      "step": 25428
    },
    {
      "epoch": 4.88,
      "learning_rate": 0.001,
      "loss": 2.9787,
      "step": 25440
    },
    {
      "epoch": 4.89,
      "learning_rate": 0.001,
      "loss": 2.9734,
      "step": 25452
    },
    {
      "epoch": 4.89,
      "learning_rate": 0.001,
      "loss": 2.9644,
      "step": 25464
    },
    {
      "epoch": 4.89,
      "learning_rate": 0.001,
      "loss": 2.9722,
      "step": 25476
    },
    {
      "epoch": 4.89,
      "learning_rate": 0.001,
      "loss": 2.9885,
      "step": 25488
    },
    {
      "epoch": 4.9,
      "learning_rate": 0.001,
      "loss": 2.9745,
      "step": 25500
    },
    {
      "epoch": 4.9,
      "learning_rate": 0.001,
      "loss": 2.9847,
      "step": 25512
    },
    {
      "epoch": 4.9,
      "learning_rate": 0.001,
      "loss": 2.9844,
      "step": 25524
    },
    {
      "epoch": 4.9,
      "learning_rate": 0.001,
      "loss": 2.9834,
      "step": 25536
    },
    {
      "epoch": 4.91,
      "learning_rate": 0.001,
      "loss": 2.9788,
      "step": 25548
    },
    {
      "epoch": 4.91,
      "learning_rate": 0.001,
      "loss": 2.9791,
      "step": 25560
    },
    {
      "epoch": 4.91,
      "learning_rate": 0.001,
      "loss": 2.9729,
      "step": 25572
    },
    {
      "epoch": 4.91,
      "learning_rate": 0.001,
      "loss": 2.9709,
      "step": 25584
    },
    {
      "epoch": 4.91,
      "learning_rate": 0.001,
      "loss": 2.9661,
      "step": 25596
    },
    {
      "epoch": 4.92,
      "learning_rate": 0.001,
      "loss": 2.9617,
      "step": 25608
    },
    {
      "epoch": 4.92,
      "learning_rate": 0.001,
      "loss": 2.9788,
      "step": 25620
    },
    {
      "epoch": 4.92,
      "eval_ag_news_accuracy": 0.27896875,
      "eval_ag_news_bleu_score": 3.7022313908477926,
      "eval_ag_news_bleu_score_sem": 0.12702515343757315,
      "eval_ag_news_emb_cos_sim": 0.7072823643684387,
      "eval_ag_news_emb_cos_sim_sem": 0.01164344753986009,
      "eval_ag_news_emb_top1_equal": 0.1640625,
      "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.034488201141357,
      "eval_ag_news_n_ngrams_match_1": 11.304,
      "eval_ag_news_n_ngrams_match_2": 2.178,
      "eval_ag_news_n_ngrams_match_3": 0.58,
      "eval_ag_news_num_pred_words": 45.416,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 56.51398904038924,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2743784357785968,
      "eval_ag_news_runtime": 11.0411,
      "eval_ag_news_samples_per_second": 45.285,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.2949216520780335,
      "eval_ag_news_token_set_f1_sem": 0.004517282991213544,
      "eval_ag_news_token_set_precision": 0.26115752691664934,
      "eval_ag_news_token_set_recall": 0.36121393688133735,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 25625
    },
    {
      "epoch": 4.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.0935625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.1606053533585676,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07841458245842338,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5771173238754272,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010939913905270023,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.7294044494628906,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.628,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.184,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.344,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.612,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 41.65429353869113,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.16121912497112084,
      "eval_anthropic_toxic_prompts_runtime": 10.0604,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.7,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2870803417654264,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006006123260005836,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3190048185939165,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.298282717306593,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 25625
    },
    {
      "epoch": 4.92,
      "eval_arxiv_accuracy": 0.3046875,
      "eval_arxiv_bleu_score": 3.2052067354864637,
      "eval_arxiv_bleu_score_sem": 0.09327914875363522,
      "eval_arxiv_emb_cos_sim": 0.6312928795814514,
      "eval_arxiv_emb_cos_sim_sem": 0.008048950930430461,
      "eval_arxiv_emb_top1_equal": 0.171875,
      "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.9027645587921143,
      "eval_arxiv_n_ngrams_match_1": 11.724,
      "eval_arxiv_n_ngrams_match_2": 1.978,
      "eval_arxiv_n_ngrams_match_3": 0.37,
      "eval_arxiv_num_pred_words": 38.498,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 49.539214040803564,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.27526463518161226,
      "eval_arxiv_runtime": 10.4375,
      "eval_arxiv_samples_per_second": 47.904,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.2777970846073231,
      "eval_arxiv_token_set_f1_sem": 0.0041335483794326805,
      "eval_arxiv_token_set_precision": 0.218081399165317,
      "eval_arxiv_token_set_recall": 0.40698031821118064,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 25625
    },
    {
      "epoch": 4.92,
      "eval_python_code_alpaca_accuracy": 0.13034375,
      "eval_python_code_alpaca_bleu_score": 3.1045425556691137,
      "eval_python_code_alpaca_bleu_score_sem": 0.0982175518651635,
      "eval_python_code_alpaca_emb_cos_sim": 0.5631523132324219,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.012085411247146467,
      "eval_python_code_alpaca_emb_top1_equal": 0.046875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.4732348918914795,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.852,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.516,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.398,
      "eval_python_code_alpaca_num_pred_words": 38.776,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 32.24086966164758,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.23800618869223422,
      "eval_python_code_alpaca_runtime": 10.3898,
      "eval_python_code_alpaca_samples_per_second": 48.124,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.36076042420434024,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005528680813819971,
      "eval_python_code_alpaca_token_set_precision": 0.35556375357612696,
      "eval_python_code_alpaca_token_set_recall": 0.39794591627232334,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 25625
    },
    {
      "epoch": 4.92,
      "eval_wikibio_accuracy": 0.2818125,
      "eval_wikibio_bleu_score": 5.2276160731931,
      "eval_wikibio_bleu_score_sem": 0.17925560898671766,
      "eval_wikibio_emb_cos_sim": 0.7021856307983398,
      "eval_wikibio_emb_cos_sim_sem": 0.008435994659929112,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.346080303192139,
      "eval_wikibio_n_ngrams_match_1": 9.65,
      "eval_wikibio_n_ngrams_match_2": 3.01,
      "eval_wikibio_n_ngrams_match_3": 1.014,
      "eval_wikibio_num_pred_words": 37.714,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 77.17536525500653,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.324683110435965,
      "eval_wikibio_runtime": 9.9746,
      "eval_wikibio_samples_per_second": 50.127,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3040230656977922,
      "eval_wikibio_token_set_f1_sem": 0.005016679036750332,
      "eval_wikibio_token_set_precision": 0.3101032576976257,
      "eval_wikibio_token_set_recall": 0.3109929570823593,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 25625
    },
    {
      "epoch": 4.92,
      "eval_nq_accuracy": 0.472125,
      "eval_nq_bleu_score": 8.57318964466987,
      "eval_nq_bleu_score_sem": 0.3817501462323276,
      "eval_nq_emb_cos_sim": 0.7569206357002258,
      "eval_nq_emb_cos_sim_sem": 0.009972043797898233,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.6137852668762207,
      "eval_nq_n_ngrams_match_1": 19.866,
      "eval_nq_n_ngrams_match_2": 6.468,
      "eval_nq_n_ngrams_match_3": 2.628,
      "eval_nq_num_pred_words": 48.162,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 13.65062443688009,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.37813734112369435,
      "eval_nq_runtime": 10.4245,
      "eval_nq_samples_per_second": 47.964,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4038984316695101,
      "eval_nq_token_set_f1_sem": 0.0048201729970428365,
      "eval_nq_token_set_precision": 0.34842539672564526,
      "eval_nq_token_set_recall": 0.4960380513072947,
      "eval_nq_true_num_tokens": 64.0,
      "step": 25625
    },
    {
      "epoch": 4.92,
      "learning_rate": 0.001,
      "loss": 2.9764,
      "step": 25632
    },
    {
      "epoch": 4.92,
      "learning_rate": 0.001,
      "loss": 2.9652,
      "step": 25644
    },
    {
      "epoch": 4.93,
      "learning_rate": 0.001,
      "loss": 2.9755,
      "step": 25656
    },
    {
      "epoch": 4.93,
      "learning_rate": 0.001,
      "loss": 2.9691,
      "step": 25668
    },
    {
      "epoch": 4.93,
      "learning_rate": 0.001,
      "loss": 2.9727,
      "step": 25680
    },
    {
      "epoch": 4.93,
      "learning_rate": 0.001,
      "loss": 2.9732,
      "step": 25692
    },
    {
      "epoch": 4.94,
      "learning_rate": 0.001,
      "loss": 2.9714,
      "step": 25704
    },
    {
      "epoch": 4.94,
      "learning_rate": 0.001,
      "loss": 2.9798,
      "step": 25716
    },
    {
      "epoch": 4.94,
      "learning_rate": 0.001,
      "loss": 2.9718,
      "step": 25728
    },
    {
      "epoch": 4.94,
      "learning_rate": 0.001,
      "loss": 2.9744,
      "step": 25740
    },
    {
      "epoch": 4.94,
      "learning_rate": 0.001,
      "loss": 2.9746,
      "step": 25752
    },
    {
      "epoch": 4.95,
      "learning_rate": 0.001,
      "loss": 2.9751,
      "step": 25764
    },
    {
      "epoch": 4.95,
      "learning_rate": 0.001,
      "loss": 2.9773,
      "step": 25776
    },
    {
      "epoch": 4.95,
      "learning_rate": 0.001,
      "loss": 2.9768,
      "step": 25788
    },
    {
      "epoch": 4.95,
      "learning_rate": 0.001,
      "loss": 2.9673,
      "step": 25800
    },
    {
      "epoch": 4.96,
      "learning_rate": 0.001,
      "loss": 2.9802,
      "step": 25812
    },
    {
      "epoch": 4.96,
      "learning_rate": 0.001,
      "loss": 2.9793,
      "step": 25824
    },
    {
      "epoch": 4.96,
      "learning_rate": 0.001,
      "loss": 2.9696,
      "step": 25836
    },
    {
      "epoch": 4.96,
      "learning_rate": 0.001,
      "loss": 2.9765,
      "step": 25848
    },
    {
      "epoch": 4.97,
      "learning_rate": 0.001,
      "loss": 2.9648,
      "step": 25860
    },
    {
      "epoch": 4.97,
      "learning_rate": 0.001,
      "loss": 2.9728,
      "step": 25872
    },
    {
      "epoch": 4.97,
      "learning_rate": 0.001,
      "loss": 2.9693,
      "step": 25884
    },
    {
      "epoch": 4.97,
      "learning_rate": 0.001,
      "loss": 2.9768,
      "step": 25896
    },
    {
      "epoch": 4.97,
      "learning_rate": 0.001,
      "loss": 2.9594,
      "step": 25908
    },
    {
      "epoch": 4.98,
      "learning_rate": 0.001,
      "loss": 2.9719,
      "step": 25920
    },
    {
      "epoch": 4.98,
      "learning_rate": 0.001,
      "loss": 2.9769,
      "step": 25932
    },
    {
      "epoch": 4.98,
      "learning_rate": 0.001,
      "loss": 2.9644,
      "step": 25944
    },
    {
      "epoch": 4.98,
      "learning_rate": 0.001,
      "loss": 2.9695,
      "step": 25956
    },
    {
      "epoch": 4.99,
      "learning_rate": 0.001,
      "loss": 2.9589,
      "step": 25968
    },
    {
      "epoch": 4.99,
      "learning_rate": 0.001,
      "loss": 2.9766,
      "step": 25980
    },
    {
      "epoch": 4.99,
      "learning_rate": 0.001,
      "loss": 2.9715,
      "step": 25992
    },
    {
      "epoch": 4.99,
      "learning_rate": 0.001,
      "loss": 2.9821,
      "step": 26004
    },
    {
      "epoch": 5.0,
      "learning_rate": 0.001,
      "loss": 2.9693,
      "step": 26016
    },
    {
      "epoch": 5.0,
      "learning_rate": 0.001,
      "loss": 2.9704,
      "step": 26028
    },
    {
      "epoch": 5.0,
      "learning_rate": 0.001,
      "loss": 2.987,
      "step": 26040
    },
    {
      "epoch": 5.0,
      "learning_rate": 0.001,
      "loss": 2.9567,
      "step": 26052
    },
    {
      "epoch": 5.0,
      "learning_rate": 0.001,
      "loss": 2.9465,
      "step": 26064
    },
    {
      "epoch": 5.01,
      "learning_rate": 0.001,
      "loss": 2.9494,
      "step": 26076
    },
    {
      "epoch": 5.01,
      "learning_rate": 0.001,
      "loss": 2.9596,
      "step": 26088
    },
    {
      "epoch": 5.01,
      "learning_rate": 0.001,
      "loss": 2.9562,
      "step": 26100
    },
    {
      "epoch": 5.01,
      "learning_rate": 0.001,
      "loss": 2.9507,
      "step": 26112
    },
    {
      "epoch": 5.02,
      "learning_rate": 0.001,
      "loss": 2.9448,
      "step": 26124
    },
    {
      "epoch": 5.02,
      "learning_rate": 0.001,
      "loss": 2.9441,
      "step": 26136
    },
    {
      "epoch": 5.02,
      "learning_rate": 0.001,
      "loss": 2.9503,
      "step": 26148
    },
    {
      "epoch": 5.02,
      "learning_rate": 0.001,
      "loss": 2.9529,
      "step": 26160
    },
    {
      "epoch": 5.03,
      "learning_rate": 0.001,
      "loss": 2.9468,
      "step": 26172
    },
    {
      "epoch": 5.03,
      "learning_rate": 0.001,
      "loss": 2.9545,
      "step": 26184
    },
    {
      "epoch": 5.03,
      "learning_rate": 0.001,
      "loss": 2.9493,
      "step": 26196
    },
    {
      "epoch": 5.03,
      "learning_rate": 0.001,
      "loss": 2.9541,
      "step": 26208
    },
    {
      "epoch": 5.03,
      "learning_rate": 0.001,
      "loss": 2.9392,
      "step": 26220
    },
    {
      "epoch": 5.04,
      "learning_rate": 0.001,
      "loss": 2.943,
      "step": 26232
    },
    {
      "epoch": 5.04,
      "learning_rate": 0.001,
      "loss": 2.9556,
      "step": 26244
    },
    {
      "epoch": 5.04,
      "eval_ag_news_accuracy": 0.28015625,
      "eval_ag_news_bleu_score": 3.736718599536026,
      "eval_ag_news_bleu_score_sem": 0.1368179259557509,
      "eval_ag_news_emb_cos_sim": 0.7063626050949097,
      "eval_ag_news_emb_cos_sim_sem": 0.01123980322040421,
      "eval_ag_news_emb_top1_equal": 0.1640625,
      "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.027685642242432,
      "eval_ag_news_n_ngrams_match_1": 11.524,
      "eval_ag_news_n_ngrams_match_2": 2.202,
      "eval_ag_news_n_ngrams_match_3": 0.56,
      "eval_ag_news_num_pred_words": 45.846,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 56.13085392836912,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.27783993693430153,
      "eval_ag_news_runtime": 10.1232,
      "eval_ag_news_samples_per_second": 49.392,
      "eval_ag_news_steps_per_second": 0.099,
      "eval_ag_news_token_set_f1": 0.29647039914663714,
      "eval_ag_news_token_set_f1_sem": 0.004571673438081544,
      "eval_ag_news_token_set_precision": 0.2680941037969084,
      "eval_ag_news_token_set_recall": 0.35446756968113474,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 26250
    },
    {
      "epoch": 5.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.0965,
      "eval_anthropic_toxic_prompts_bleu_score": 2.147582886640655,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07966992712992176,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5788730382919312,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010722127309579567,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.7207133769989014,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.706,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.176,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.328,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.68,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 41.29384168060291,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1654803392623658,
      "eval_anthropic_toxic_prompts_runtime": 9.2523,
      "eval_anthropic_toxic_prompts_samples_per_second": 54.041,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.108,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2937395978643475,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0061159346555026035,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3280652517015705,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3076924694441835,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 26250
    },
    {
      "epoch": 5.04,
      "eval_arxiv_accuracy": 0.3036875,
      "eval_arxiv_bleu_score": 3.2569561892362464,
      "eval_arxiv_bleu_score_sem": 0.0994423042261703,
      "eval_arxiv_emb_cos_sim": 0.6460261344909668,
      "eval_arxiv_emb_cos_sim_sem": 0.008178726345351568,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8972249031066895,
      "eval_arxiv_n_ngrams_match_1": 11.862,
      "eval_arxiv_n_ngrams_match_2": 2.13,
      "eval_arxiv_n_ngrams_match_3": 0.39,
      "eval_arxiv_num_pred_words": 39.218,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 49.265542574801145,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2814910108717771,
      "eval_arxiv_runtime": 9.74,
      "eval_arxiv_samples_per_second": 51.335,
      "eval_arxiv_steps_per_second": 0.103,
      "eval_arxiv_token_set_f1": 0.2803151791516185,
      "eval_arxiv_token_set_f1_sem": 0.004112967583742836,
      "eval_arxiv_token_set_precision": 0.22161137838009956,
      "eval_arxiv_token_set_recall": 0.4078977490691022,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 26250
    },
    {
      "epoch": 5.04,
      "eval_python_code_alpaca_accuracy": 0.12959375,
      "eval_python_code_alpaca_bleu_score": 3.0913667214150835,
      "eval_python_code_alpaca_bleu_score_sem": 0.10517243681692368,
      "eval_python_code_alpaca_emb_cos_sim": 0.5842911005020142,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011832732987789562,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.462963342666626,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.734,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.478,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.408,
      "eval_python_code_alpaca_num_pred_words": 38.17,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 31.911400955585695,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.23211424000125105,
      "eval_python_code_alpaca_runtime": 10.5042,
      "eval_python_code_alpaca_samples_per_second": 47.6,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.3534046559038142,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005877893579494761,
      "eval_python_code_alpaca_token_set_precision": 0.3486447101786397,
      "eval_python_code_alpaca_token_set_recall": 0.39743856447173775,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 26250
    },
    {
      "epoch": 5.04,
      "eval_wikibio_accuracy": 0.28428125,
      "eval_wikibio_bleu_score": 5.136802617005691,
      "eval_wikibio_bleu_score_sem": 0.1775725642513426,
      "eval_wikibio_emb_cos_sim": 0.6929802894592285,
      "eval_wikibio_emb_cos_sim_sem": 0.008306888326228116,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.312255859375,
      "eval_wikibio_n_ngrams_match_1": 9.482,
      "eval_wikibio_n_ngrams_match_2": 3.032,
      "eval_wikibio_n_ngrams_match_3": 1.018,
      "eval_wikibio_num_pred_words": 37.272,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 74.60860576786769,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3142983480408808,
      "eval_wikibio_runtime": 9.4538,
      "eval_wikibio_samples_per_second": 52.889,
      "eval_wikibio_steps_per_second": 0.106,
      "eval_wikibio_token_set_f1": 0.29996201287412866,
      "eval_wikibio_token_set_f1_sem": 0.005286970493581309,
      "eval_wikibio_token_set_precision": 0.30351962058207854,
      "eval_wikibio_token_set_recall": 0.3143049085469827,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 26250
    },
    {
      "epoch": 5.04,
      "eval_nq_accuracy": 0.4734375,
      "eval_nq_bleu_score": 8.626090223326957,
      "eval_nq_bleu_score_sem": 0.3803047162378017,
      "eval_nq_emb_cos_sim": 0.7519087195396423,
      "eval_nq_emb_cos_sim_sem": 0.009378460095816454,
      "eval_nq_emb_top1_equal": 0.15625,
      "eval_nq_emb_top1_equal_sem": 0.03221922156442571,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5983827114105225,
      "eval_nq_n_ngrams_match_1": 20.114,
      "eval_nq_n_ngrams_match_2": 6.486,
      "eval_nq_n_ngrams_match_3": 2.66,
      "eval_nq_num_pred_words": 48.74,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 13.441980883644657,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3812398567184436,
      "eval_nq_runtime": 9.9152,
      "eval_nq_samples_per_second": 50.428,
      "eval_nq_steps_per_second": 0.101,
      "eval_nq_token_set_f1": 0.4052964394282942,
      "eval_nq_token_set_f1_sem": 0.0048446113674483204,
      "eval_nq_token_set_precision": 0.3521700946763356,
      "eval_nq_token_set_recall": 0.493707980858585,
      "eval_nq_true_num_tokens": 64.0,
      "step": 26250
    },
    {
      "epoch": 5.04,
      "learning_rate": 0.001,
      "loss": 2.9539,
      "step": 26256
    },
    {
      "epoch": 5.04,
      "learning_rate": 0.001,
      "loss": 2.9453,
      "step": 26268
    },
    {
      "epoch": 5.05,
      "learning_rate": 0.001,
      "loss": 2.9525,
      "step": 26280
    },
    {
      "epoch": 5.05,
      "learning_rate": 0.001,
      "loss": 2.9562,
      "step": 26292
    },
    {
      "epoch": 5.05,
      "learning_rate": 0.001,
      "loss": 2.9446,
      "step": 26304
    },
    {
      "epoch": 5.05,
      "learning_rate": 0.001,
      "loss": 2.9547,
      "step": 26316
    },
    {
      "epoch": 5.06,
      "learning_rate": 0.001,
      "loss": 2.9443,
      "step": 26328
    },
    {
      "epoch": 5.06,
      "learning_rate": 0.001,
      "loss": 2.9527,
      "step": 26340
    },
    {
      "epoch": 5.06,
      "learning_rate": 0.001,
      "loss": 2.9498,
      "step": 26352
    },
    {
      "epoch": 5.06,
      "learning_rate": 0.001,
      "loss": 2.9438,
      "step": 26364
    },
    {
      "epoch": 5.06,
      "learning_rate": 0.001,
      "loss": 2.9558,
      "step": 26376
    },
    {
      "epoch": 5.07,
      "learning_rate": 0.001,
      "loss": 2.9402,
      "step": 26388
    },
    {
      "epoch": 5.07,
      "learning_rate": 0.001,
      "loss": 2.9529,
      "step": 26400
    },
    {
      "epoch": 5.07,
      "learning_rate": 0.001,
      "loss": 2.9486,
      "step": 26412
    },
    {
      "epoch": 5.07,
      "learning_rate": 0.001,
      "loss": 2.9571,
      "step": 26424
    },
    {
      "epoch": 5.08,
      "learning_rate": 0.001,
      "loss": 2.9421,
      "step": 26436
    },
    {
      "epoch": 5.08,
      "learning_rate": 0.001,
      "loss": 2.9374,
      "step": 26448
    },
    {
      "epoch": 5.08,
      "learning_rate": 0.001,
      "loss": 2.9444,
      "step": 26460
    },
    {
      "epoch": 5.08,
      "learning_rate": 0.001,
      "loss": 2.9479,
      "step": 26472
    },
    {
      "epoch": 5.09,
      "learning_rate": 0.001,
      "loss": 2.9473,
      "step": 26484
    },
    {
      "epoch": 5.09,
      "learning_rate": 0.001,
      "loss": 2.9517,
      "step": 26496
    },
    {
      "epoch": 5.09,
      "learning_rate": 0.001,
      "loss": 2.9535,
      "step": 26508
    },
    {
      "epoch": 5.09,
      "learning_rate": 0.001,
      "loss": 2.9437,
      "step": 26520
    },
    {
      "epoch": 5.09,
      "learning_rate": 0.001,
      "loss": 2.942,
      "step": 26532
    },
    {
      "epoch": 5.1,
      "learning_rate": 0.001,
      "loss": 2.9387,
      "step": 26544
    },
    {
      "epoch": 5.1,
      "learning_rate": 0.001,
      "loss": 2.95,
      "step": 26556
    },
    {
      "epoch": 5.1,
      "learning_rate": 0.001,
      "loss": 2.9463,
      "step": 26568
    },
    {
      "epoch": 5.1,
      "learning_rate": 0.001,
      "loss": 2.9495,
      "step": 26580
    },
    {
      "epoch": 5.11,
      "learning_rate": 0.001,
      "loss": 2.9408,
      "step": 26592
    },
    {
      "epoch": 5.11,
      "learning_rate": 0.001,
      "loss": 2.9454,
      "step": 26604
    },
    {
      "epoch": 5.11,
      "learning_rate": 0.001,
      "loss": 2.9522,
      "step": 26616
    },
    {
      "epoch": 5.11,
      "learning_rate": 0.001,
      "loss": 2.9466,
      "step": 26628
    },
    {
      "epoch": 5.12,
      "learning_rate": 0.001,
      "loss": 2.9369,
      "step": 26640
    },
    {
      "epoch": 5.12,
      "learning_rate": 0.001,
      "loss": 2.9425,
      "step": 26652
    },
    {
      "epoch": 5.12,
      "learning_rate": 0.001,
      "loss": 2.9396,
      "step": 26664
    },
    {
      "epoch": 5.12,
      "learning_rate": 0.001,
      "loss": 2.9392,
      "step": 26676
    },
    {
      "epoch": 5.12,
      "learning_rate": 0.001,
      "loss": 2.9503,
      "step": 26688
    },
    {
      "epoch": 5.13,
      "learning_rate": 0.001,
      "loss": 2.9481,
      "step": 26700
    },
    {
      "epoch": 5.13,
      "learning_rate": 0.001,
      "loss": 2.93,
      "step": 26712
    },
    {
      "epoch": 5.13,
      "learning_rate": 0.001,
      "loss": 2.949,
      "step": 26724
    },
    {
      "epoch": 5.13,
      "learning_rate": 0.001,
      "loss": 2.9597,
      "step": 26736
    },
    {
      "epoch": 5.14,
      "learning_rate": 0.001,
      "loss": 2.9528,
      "step": 26748
    },
    {
      "epoch": 5.14,
      "learning_rate": 0.001,
      "loss": 2.9501,
      "step": 26760
    },
    {
      "epoch": 5.14,
      "learning_rate": 0.001,
      "loss": 2.9405,
      "step": 26772
    },
    {
      "epoch": 5.14,
      "learning_rate": 0.001,
      "loss": 2.9482,
      "step": 26784
    },
    {
      "epoch": 5.15,
      "learning_rate": 0.001,
      "loss": 2.9616,
      "step": 26796
    },
    {
      "epoch": 5.15,
      "learning_rate": 0.001,
      "loss": 2.946,
      "step": 26808
    },
    {
      "epoch": 5.15,
      "learning_rate": 0.001,
      "loss": 2.9434,
      "step": 26820
    },
    {
      "epoch": 5.15,
      "learning_rate": 0.001,
      "loss": 2.9392,
      "step": 26832
    },
    {
      "epoch": 5.15,
      "learning_rate": 0.001,
      "loss": 2.9417,
      "step": 26844
    },
    {
      "epoch": 5.16,
      "learning_rate": 0.001,
      "loss": 2.94,
      "step": 26856
    },
    {
      "epoch": 5.16,
      "learning_rate": 0.001,
      "loss": 2.9375,
      "step": 26868
    },
    {
      "epoch": 5.16,
      "eval_ag_news_accuracy": 0.282875,
      "eval_ag_news_bleu_score": 3.7854512005663534,
      "eval_ag_news_bleu_score_sem": 0.13142873040472827,
      "eval_ag_news_emb_cos_sim": 0.7158944010734558,
      "eval_ag_news_emb_cos_sim_sem": 0.009918688521861473,
      "eval_ag_news_emb_top1_equal": 0.1484375,
      "eval_ag_news_emb_top1_equal_sem": 0.031548465007086954,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.006141662597656,
      "eval_ag_news_n_ngrams_match_1": 11.738,
      "eval_ag_news_n_ngrams_match_2": 2.338,
      "eval_ag_news_n_ngrams_match_3": 0.622,
      "eval_ag_news_num_pred_words": 46.208,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 54.93450528205335,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.28279509556964266,
      "eval_ag_news_runtime": 11.4196,
      "eval_ag_news_samples_per_second": 43.785,
      "eval_ag_news_steps_per_second": 0.088,
      "eval_ag_news_token_set_f1": 0.3045864210040348,
      "eval_ag_news_token_set_f1_sem": 0.004338228889314767,
      "eval_ag_news_token_set_precision": 0.2740803309127095,
      "eval_ag_news_token_set_recall": 0.3651097856435764,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 26875
    },
    {
      "epoch": 5.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.0950625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.241674797912559,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0798695328560625,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5853935480117798,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010477638051179426,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.7078540325164795,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.76,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.228,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.356,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.1,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 40.76622958884503,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1654613509662934,
      "eval_anthropic_toxic_prompts_runtime": 11.2034,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.629,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.295427675294669,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005867996618701394,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.32898140873572207,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3047311345214721,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 26875
    },
    {
      "epoch": 5.16,
      "eval_arxiv_accuracy": 0.30940625,
      "eval_arxiv_bleu_score": 3.5055841830513903,
      "eval_arxiv_bleu_score_sem": 0.11004740666993268,
      "eval_arxiv_emb_cos_sim": 0.6475298404693604,
      "eval_arxiv_emb_cos_sim_sem": 0.00764625030525452,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8715243339538574,
      "eval_arxiv_n_ngrams_match_1": 12.398,
      "eval_arxiv_n_ngrams_match_2": 2.192,
      "eval_arxiv_n_ngrams_match_3": 0.466,
      "eval_arxiv_num_pred_words": 40.926,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 48.015522015383816,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2873588664592228,
      "eval_arxiv_runtime": 20.6966,
      "eval_arxiv_samples_per_second": 24.159,
      "eval_arxiv_steps_per_second": 0.048,
      "eval_arxiv_token_set_f1": 0.29173504762864994,
      "eval_arxiv_token_set_f1_sem": 0.00396961779486314,
      "eval_arxiv_token_set_precision": 0.23397454634483217,
      "eval_arxiv_token_set_recall": 0.40993250286747596,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 26875
    },
    {
      "epoch": 5.16,
      "eval_python_code_alpaca_accuracy": 0.13178125,
      "eval_python_code_alpaca_bleu_score": 3.2753286120583023,
      "eval_python_code_alpaca_bleu_score_sem": 0.12241459065912691,
      "eval_python_code_alpaca_emb_cos_sim": 0.5975684523582458,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009792527199200176,
      "eval_python_code_alpaca_emb_top1_equal": 0.0625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.4491353034973145,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.064,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.632,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.466,
      "eval_python_code_alpaca_num_pred_words": 40.74,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 31.473165802714046,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.24194486437238982,
      "eval_python_code_alpaca_runtime": 11.0812,
      "eval_python_code_alpaca_samples_per_second": 45.122,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.37393461236903536,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005537714648176672,
      "eval_python_code_alpaca_token_set_precision": 0.37571808813384855,
      "eval_python_code_alpaca_token_set_recall": 0.4066883117045732,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 26875
    },
    {
      "epoch": 5.16,
      "eval_wikibio_accuracy": 0.28740625,
      "eval_wikibio_bleu_score": 5.197628431999734,
      "eval_wikibio_bleu_score_sem": 0.19350876333660505,
      "eval_wikibio_emb_cos_sim": 0.678260087966919,
      "eval_wikibio_emb_cos_sim_sem": 0.010448878753971312,
      "eval_wikibio_emb_top1_equal": 0.0625,
      "eval_wikibio_emb_top1_equal_sem": 0.02147948148198014,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.2829084396362305,
      "eval_wikibio_n_ngrams_match_1": 9.4,
      "eval_wikibio_n_ngrams_match_2": 3.022,
      "eval_wikibio_n_ngrams_match_3": 1.024,
      "eval_wikibio_num_pred_words": 37.286,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 72.45085280451723,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.31443435401668496,
      "eval_wikibio_runtime": 9.7185,
      "eval_wikibio_samples_per_second": 51.448,
      "eval_wikibio_steps_per_second": 0.103,
      "eval_wikibio_token_set_f1": 0.29930695997135986,
      "eval_wikibio_token_set_f1_sem": 0.0054978596287111586,
      "eval_wikibio_token_set_precision": 0.3017414676425142,
      "eval_wikibio_token_set_recall": 0.3126191063497086,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 26875
    },
    {
      "epoch": 5.16,
      "eval_nq_accuracy": 0.4753125,
      "eval_nq_bleu_score": 8.761326230204498,
      "eval_nq_bleu_score_sem": 0.3924102676552468,
      "eval_nq_emb_cos_sim": 0.7646273374557495,
      "eval_nq_emb_cos_sim_sem": 0.009928361223594723,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5821120738983154,
      "eval_nq_n_ngrams_match_1": 20.428,
      "eval_nq_n_ngrams_match_2": 6.58,
      "eval_nq_n_ngrams_match_3": 2.728,
      "eval_nq_num_pred_words": 48.832,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 13.225040946699409,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3861617532351824,
      "eval_nq_runtime": 10.9479,
      "eval_nq_samples_per_second": 45.671,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4097102558091227,
      "eval_nq_token_set_f1_sem": 0.00494036544186437,
      "eval_nq_token_set_precision": 0.35675489572426566,
      "eval_nq_token_set_recall": 0.49628606792205227,
      "eval_nq_true_num_tokens": 64.0,
      "step": 26875
    },
    {
      "epoch": 5.16,
      "learning_rate": 0.001,
      "loss": 2.9374,
      "step": 26880
    },
    {
      "epoch": 5.16,
      "learning_rate": 0.001,
      "loss": 2.9462,
      "step": 26892
    },
    {
      "epoch": 5.17,
      "learning_rate": 0.001,
      "loss": 2.9399,
      "step": 26904
    },
    {
      "epoch": 5.17,
      "learning_rate": 0.001,
      "loss": 2.9268,
      "step": 26916
    },
    {
      "epoch": 5.17,
      "learning_rate": 0.001,
      "loss": 2.9466,
      "step": 26928
    },
    {
      "epoch": 5.17,
      "learning_rate": 0.001,
      "loss": 2.9406,
      "step": 26940
    },
    {
      "epoch": 5.18,
      "learning_rate": 0.001,
      "loss": 2.9349,
      "step": 26952
    },
    {
      "epoch": 5.18,
      "learning_rate": 0.001,
      "loss": 2.9415,
      "step": 26964
    },
    {
      "epoch": 5.18,
      "learning_rate": 0.001,
      "loss": 2.9368,
      "step": 26976
    },
    {
      "epoch": 5.18,
      "learning_rate": 0.001,
      "loss": 2.9476,
      "step": 26988
    },
    {
      "epoch": 5.18,
      "learning_rate": 0.001,
      "loss": 2.9419,
      "step": 27000
    },
    {
      "epoch": 5.19,
      "learning_rate": 0.001,
      "loss": 2.9713,
      "step": 27012
    },
    {
      "epoch": 5.19,
      "learning_rate": 0.001,
      "loss": 2.9454,
      "step": 27024
    },
    {
      "epoch": 5.19,
      "learning_rate": 0.001,
      "loss": 2.9415,
      "step": 27036
    },
    {
      "epoch": 5.19,
      "learning_rate": 0.001,
      "loss": 2.9344,
      "step": 27048
    },
    {
      "epoch": 5.2,
      "learning_rate": 0.001,
      "loss": 2.9366,
      "step": 27060
    },
    {
      "epoch": 5.2,
      "learning_rate": 0.001,
      "loss": 2.9298,
      "step": 27072
    },
    {
      "epoch": 5.2,
      "learning_rate": 0.001,
      "loss": 2.9431,
      "step": 27084
    },
    {
      "epoch": 5.2,
      "learning_rate": 0.001,
      "loss": 2.9377,
      "step": 27096
    },
    {
      "epoch": 5.21,
      "learning_rate": 0.001,
      "loss": 2.9373,
      "step": 27108
    },
    {
      "epoch": 5.21,
      "learning_rate": 0.001,
      "loss": 2.937,
      "step": 27120
    },
    {
      "epoch": 5.21,
      "learning_rate": 0.001,
      "loss": 2.926,
      "step": 27132
    },
    {
      "epoch": 5.21,
      "learning_rate": 0.001,
      "loss": 2.9423,
      "step": 27144
    },
    {
      "epoch": 5.21,
      "learning_rate": 0.001,
      "loss": 2.9383,
      "step": 27156
    },
    {
      "epoch": 5.22,
      "learning_rate": 0.001,
      "loss": 2.9441,
      "step": 27168
    },
    {
      "epoch": 5.22,
      "learning_rate": 0.001,
      "loss": 2.9487,
      "step": 27180
    },
    {
      "epoch": 5.22,
      "learning_rate": 0.001,
      "loss": 2.9374,
      "step": 27192
    },
    {
      "epoch": 5.22,
      "learning_rate": 0.001,
      "loss": 2.9409,
      "step": 27204
    },
    {
      "epoch": 5.23,
      "learning_rate": 0.001,
      "loss": 2.9464,
      "step": 27216
    },
    {
      "epoch": 5.23,
      "learning_rate": 0.001,
      "loss": 2.9389,
      "step": 27228
    },
    {
      "epoch": 5.23,
      "learning_rate": 0.001,
      "loss": 2.9467,
      "step": 27240
    },
    {
      "epoch": 5.23,
      "learning_rate": 0.001,
      "loss": 2.9469,
      "step": 27252
    },
    {
      "epoch": 5.24,
      "learning_rate": 0.001,
      "loss": 2.9504,
      "step": 27264
    },
    {
      "epoch": 5.24,
      "learning_rate": 0.001,
      "loss": 2.945,
      "step": 27276
    },
    {
      "epoch": 5.24,
      "learning_rate": 0.001,
      "loss": 2.9417,
      "step": 27288
    },
    {
      "epoch": 5.24,
      "learning_rate": 0.001,
      "loss": 2.9406,
      "step": 27300
    },
    {
      "epoch": 5.24,
      "learning_rate": 0.001,
      "loss": 2.9359,
      "step": 27312
    },
    {
      "epoch": 5.25,
      "learning_rate": 0.001,
      "loss": 2.9336,
      "step": 27324
    },
    {
      "epoch": 5.25,
      "learning_rate": 0.001,
      "loss": 2.9484,
      "step": 27336
    },
    {
      "epoch": 5.25,
      "learning_rate": 0.001,
      "loss": 2.9372,
      "step": 27348
    },
    {
      "epoch": 5.25,
      "learning_rate": 0.001,
      "loss": 2.9401,
      "step": 27360
    },
    {
      "epoch": 5.26,
      "learning_rate": 0.001,
      "loss": 2.9363,
      "step": 27372
    },
    {
      "epoch": 5.26,
      "learning_rate": 0.001,
      "loss": 2.9451,
      "step": 27384
    },
    {
      "epoch": 5.26,
      "learning_rate": 0.001,
      "loss": 2.939,
      "step": 27396
    },
    {
      "epoch": 5.26,
      "learning_rate": 0.001,
      "loss": 2.9267,
      "step": 27408
    },
    {
      "epoch": 5.26,
      "learning_rate": 0.001,
      "loss": 2.9365,
      "step": 27420
    },
    {
      "epoch": 5.27,
      "learning_rate": 0.001,
      "loss": 2.9334,
      "step": 27432
    },
    {
      "epoch": 5.27,
      "learning_rate": 0.001,
      "loss": 2.9472,
      "step": 27444
    },
    {
      "epoch": 5.27,
      "learning_rate": 0.001,
      "loss": 2.9322,
      "step": 27456
    },
    {
      "epoch": 5.27,
      "learning_rate": 0.001,
      "loss": 2.9478,
      "step": 27468
    },
    {
      "epoch": 5.28,
      "learning_rate": 0.001,
      "loss": 2.9494,
      "step": 27480
    },
    {
      "epoch": 5.28,
      "learning_rate": 0.001,
      "loss": 2.9527,
      "step": 27492
    },
    {
      "epoch": 5.28,
      "eval_ag_news_accuracy": 0.28053125,
      "eval_ag_news_bleu_score": 3.7578612855779228,
      "eval_ag_news_bleu_score_sem": 0.13059503646846266,
      "eval_ag_news_emb_cos_sim": 0.7259774804115295,
      "eval_ag_news_emb_cos_sim_sem": 0.009987638131847264,
      "eval_ag_news_emb_top1_equal": 0.1953125,
      "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 4.006907939910889,
      "eval_ag_news_n_ngrams_match_1": 11.738,
      "eval_ag_news_n_ngrams_match_2": 2.22,
      "eval_ag_news_n_ngrams_match_3": 0.59,
      "eval_ag_news_num_pred_words": 46.208,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 54.97661647953169,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2849091899999439,
      "eval_ag_news_runtime": 9.5209,
      "eval_ag_news_samples_per_second": 52.516,
      "eval_ag_news_steps_per_second": 0.105,
      "eval_ag_news_token_set_f1": 0.3037773517598862,
      "eval_ag_news_token_set_f1_sem": 0.004323399708916598,
      "eval_ag_news_token_set_precision": 0.27500233176900535,
      "eval_ag_news_token_set_recall": 0.3585740470239835,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 27500
    },
    {
      "epoch": 5.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.095875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.3345396780786647,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09228593706829934,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5953093767166138,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010548440146866171,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.7097744941711426,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.944,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.296,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.4,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.87,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 40.844594794176174,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1721896052541996,
      "eval_anthropic_toxic_prompts_runtime": 9.4444,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.942,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.106,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3030983429661299,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006125489978078192,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3411348033054161,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.305753402274649,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 27500
    },
    {
      "epoch": 5.28,
      "eval_arxiv_accuracy": 0.3085625,
      "eval_arxiv_bleu_score": 3.296061585608656,
      "eval_arxiv_bleu_score_sem": 0.10416942048217424,
      "eval_arxiv_emb_cos_sim": 0.6408599615097046,
      "eval_arxiv_emb_cos_sim_sem": 0.008817862888139528,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8604612350463867,
      "eval_arxiv_n_ngrams_match_1": 11.928,
      "eval_arxiv_n_ngrams_match_2": 2.114,
      "eval_arxiv_n_ngrams_match_3": 0.392,
      "eval_arxiv_num_pred_words": 38.774,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 47.48724910200668,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.279966132452408,
      "eval_arxiv_runtime": 10.0851,
      "eval_arxiv_samples_per_second": 49.578,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.28102966511696453,
      "eval_arxiv_token_set_f1_sem": 0.004155880176126974,
      "eval_arxiv_token_set_precision": 0.22268511062260807,
      "eval_arxiv_token_set_recall": 0.4107942109840077,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 27500
    },
    {
      "epoch": 5.28,
      "eval_python_code_alpaca_accuracy": 0.13365625,
      "eval_python_code_alpaca_bleu_score": 3.2589135948120718,
      "eval_python_code_alpaca_bleu_score_sem": 0.11047396042084712,
      "eval_python_code_alpaca_emb_cos_sim": 0.6059686541557312,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010050184713581939,
      "eval_python_code_alpaca_emb_top1_equal": 0.0625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.441763162612915,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.272,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.61,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.454,
      "eval_python_code_alpaca_num_pred_words": 39.792,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 31.24199435157494,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.25177699078380644,
      "eval_python_code_alpaca_runtime": 9.2088,
      "eval_python_code_alpaca_samples_per_second": 54.296,
      "eval_python_code_alpaca_steps_per_second": 0.109,
      "eval_python_code_alpaca_token_set_f1": 0.3723886277441521,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005290559946533025,
      "eval_python_code_alpaca_token_set_precision": 0.3768749211123057,
      "eval_python_code_alpaca_token_set_recall": 0.39575625410252074,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 27500
    },
    {
      "epoch": 5.28,
      "eval_wikibio_accuracy": 0.2888125,
      "eval_wikibio_bleu_score": 5.139496459021352,
      "eval_wikibio_bleu_score_sem": 0.1920110596782717,
      "eval_wikibio_emb_cos_sim": 0.6766979694366455,
      "eval_wikibio_emb_cos_sim_sem": 0.011211206583689569,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.278515815734863,
      "eval_wikibio_n_ngrams_match_1": 9.132,
      "eval_wikibio_n_ngrams_match_2": 2.938,
      "eval_wikibio_n_ngrams_match_3": 1.04,
      "eval_wikibio_num_pred_words": 36.41,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 72.1333014093377,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.30869399813293014,
      "eval_wikibio_runtime": 11.6577,
      "eval_wikibio_samples_per_second": 42.89,
      "eval_wikibio_steps_per_second": 0.086,
      "eval_wikibio_token_set_f1": 0.29523833946066613,
      "eval_wikibio_token_set_f1_sem": 0.005688691490455236,
      "eval_wikibio_token_set_precision": 0.29625627495775986,
      "eval_wikibio_token_set_recall": 0.31488517577506525,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 27500
    },
    {
      "epoch": 5.28,
      "eval_nq_accuracy": 0.47734375,
      "eval_nq_bleu_score": 8.98719360816301,
      "eval_nq_bleu_score_sem": 0.3950689751367678,
      "eval_nq_emb_cos_sim": 0.7688852548599243,
      "eval_nq_emb_cos_sim_sem": 0.009140463427720513,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.576362371444702,
      "eval_nq_n_ngrams_match_1": 20.522,
      "eval_nq_n_ngrams_match_2": 6.728,
      "eval_nq_n_ngrams_match_3": 2.844,
      "eval_nq_num_pred_words": 48.832,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 13.149219081783706,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3879656762261471,
      "eval_nq_runtime": 9.8247,
      "eval_nq_samples_per_second": 50.892,
      "eval_nq_steps_per_second": 0.102,
      "eval_nq_token_set_f1": 0.4120591427760328,
      "eval_nq_token_set_f1_sem": 0.004981493214156046,
      "eval_nq_token_set_precision": 0.3583314366492254,
      "eval_nq_token_set_recall": 0.49836094384856133,
      "eval_nq_true_num_tokens": 64.0,
      "step": 27500
    },
    {
      "epoch": 5.28,
      "learning_rate": 0.001,
      "loss": 2.9319,
      "step": 27504
    },
    {
      "epoch": 5.28,
      "learning_rate": 0.001,
      "loss": 2.9308,
      "step": 27516
    },
    {
      "epoch": 5.29,
      "learning_rate": 0.001,
      "loss": 2.9335,
      "step": 27528
    },
    {
      "epoch": 5.29,
      "learning_rate": 0.001,
      "loss": 2.9443,
      "step": 27540
    },
    {
      "epoch": 5.29,
      "learning_rate": 0.001,
      "loss": 2.9353,
      "step": 27552
    },
    {
      "epoch": 5.29,
      "learning_rate": 0.001,
      "loss": 2.9288,
      "step": 27564
    },
    {
      "epoch": 5.29,
      "learning_rate": 0.001,
      "loss": 2.9284,
      "step": 27576
    },
    {
      "epoch": 5.3,
      "learning_rate": 0.001,
      "loss": 2.9443,
      "step": 27588
    },
    {
      "epoch": 5.3,
      "learning_rate": 0.001,
      "loss": 2.9342,
      "step": 27600
    },
    {
      "epoch": 5.3,
      "learning_rate": 0.001,
      "loss": 2.9305,
      "step": 27612
    },
    {
      "epoch": 5.3,
      "learning_rate": 0.001,
      "loss": 2.9397,
      "step": 27624
    },
    {
      "epoch": 5.31,
      "learning_rate": 0.001,
      "loss": 2.9419,
      "step": 27636
    },
    {
      "epoch": 5.31,
      "learning_rate": 0.001,
      "loss": 2.9427,
      "step": 27648
    },
    {
      "epoch": 5.31,
      "learning_rate": 0.001,
      "loss": 2.9476,
      "step": 27660
    },
    {
      "epoch": 5.31,
      "learning_rate": 0.001,
      "loss": 2.9398,
      "step": 27672
    },
    {
      "epoch": 5.32,
      "learning_rate": 0.001,
      "loss": 2.941,
      "step": 27684
    },
    {
      "epoch": 5.32,
      "learning_rate": 0.001,
      "loss": 2.9441,
      "step": 27696
    },
    {
      "epoch": 5.32,
      "learning_rate": 0.001,
      "loss": 2.9441,
      "step": 27708
    },
    {
      "epoch": 5.32,
      "learning_rate": 0.001,
      "loss": 2.943,
      "step": 27720
    },
    {
      "epoch": 5.32,
      "learning_rate": 0.001,
      "loss": 2.9356,
      "step": 27732
    },
    {
      "epoch": 5.33,
      "learning_rate": 0.001,
      "loss": 2.937,
      "step": 27744
    },
    {
      "epoch": 5.33,
      "learning_rate": 0.001,
      "loss": 2.931,
      "step": 27756
    },
    {
      "epoch": 5.33,
      "learning_rate": 0.001,
      "loss": 2.9459,
      "step": 27768
    },
    {
      "epoch": 5.33,
      "learning_rate": 0.001,
      "loss": 2.9297,
      "step": 27780
    },
    {
      "epoch": 5.34,
      "learning_rate": 0.001,
      "loss": 2.9302,
      "step": 27792
    },
    {
      "epoch": 5.34,
      "learning_rate": 0.001,
      "loss": 2.931,
      "step": 27804
    },
    {
      "epoch": 5.34,
      "learning_rate": 0.001,
      "loss": 2.9434,
      "step": 27816
    },
    {
      "epoch": 5.34,
      "learning_rate": 0.001,
      "loss": 2.9378,
      "step": 27828
    },
    {
      "epoch": 5.35,
      "learning_rate": 0.001,
      "loss": 2.9262,
      "step": 27840
    },
    {
      "epoch": 5.35,
      "learning_rate": 0.001,
      "loss": 2.9415,
      "step": 27852
    },
    {
      "epoch": 5.35,
      "learning_rate": 0.001,
      "loss": 2.9476,
      "step": 27864
    },
    {
      "epoch": 5.35,
      "learning_rate": 0.001,
      "loss": 2.94,
      "step": 27876
    },
    {
      "epoch": 5.35,
      "learning_rate": 0.001,
      "loss": 2.9443,
      "step": 27888
    },
    {
      "epoch": 5.36,
      "learning_rate": 0.001,
      "loss": 2.9338,
      "step": 27900
    },
    {
      "epoch": 5.36,
      "learning_rate": 0.001,
      "loss": 2.9523,
      "step": 27912
    },
    {
      "epoch": 5.36,
      "learning_rate": 0.001,
      "loss": 2.9416,
      "step": 27924
    },
    {
      "epoch": 5.36,
      "learning_rate": 0.001,
      "loss": 2.9283,
      "step": 27936
    },
    {
      "epoch": 5.37,
      "learning_rate": 0.001,
      "loss": 2.9286,
      "step": 27948
    },
    {
      "epoch": 5.37,
      "learning_rate": 0.001,
      "loss": 2.9437,
      "step": 27960
    },
    {
      "epoch": 5.37,
      "learning_rate": 0.001,
      "loss": 2.9341,
      "step": 27972
    },
    {
      "epoch": 5.37,
      "learning_rate": 0.001,
      "loss": 2.9415,
      "step": 27984
    },
    {
      "epoch": 5.38,
      "learning_rate": 0.001,
      "loss": 2.9261,
      "step": 27996
    },
    {
      "epoch": 5.38,
      "learning_rate": 0.001,
      "loss": 2.9347,
      "step": 28008
    },
    {
      "epoch": 5.38,
      "learning_rate": 0.001,
      "loss": 2.9229,
      "step": 28020
    },
    {
      "epoch": 5.38,
      "learning_rate": 0.001,
      "loss": 2.9346,
      "step": 28032
    },
    {
      "epoch": 5.38,
      "learning_rate": 0.001,
      "loss": 2.9342,
      "step": 28044
    },
    {
      "epoch": 5.39,
      "learning_rate": 0.001,
      "loss": 2.931,
      "step": 28056
    },
    {
      "epoch": 5.39,
      "learning_rate": 0.001,
      "loss": 2.9315,
      "step": 28068
    },
    {
      "epoch": 5.39,
      "learning_rate": 0.001,
      "loss": 2.9283,
      "step": 28080
    },
    {
      "epoch": 5.39,
      "learning_rate": 0.001,
      "loss": 2.9333,
      "step": 28092
    },
    {
      "epoch": 5.4,
      "learning_rate": 0.001,
      "loss": 2.9353,
      "step": 28104
    },
    {
      "epoch": 5.4,
      "learning_rate": 0.001,
      "loss": 2.9419,
      "step": 28116
    },
    {
      "epoch": 5.4,
      "eval_ag_news_accuracy": 0.28425,
      "eval_ag_news_bleu_score": 3.8629165277375987,
      "eval_ag_news_bleu_score_sem": 0.135544131175173,
      "eval_ag_news_emb_cos_sim": 0.7188982963562012,
      "eval_ag_news_emb_cos_sim_sem": 0.010305849472282545,
      "eval_ag_news_emb_top1_equal": 0.1796875,
      "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.989623546600342,
      "eval_ag_news_n_ngrams_match_1": 11.71,
      "eval_ag_news_n_ngrams_match_2": 2.3,
      "eval_ag_news_n_ngrams_match_3": 0.602,
      "eval_ag_news_num_pred_words": 45.456,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 54.03454404622978,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2857413940456188,
      "eval_ag_news_runtime": 9.5872,
      "eval_ag_news_samples_per_second": 52.153,
      "eval_ag_news_steps_per_second": 0.104,
      "eval_ag_news_token_set_f1": 0.30337213456634515,
      "eval_ag_news_token_set_f1_sem": 0.004370975610579565,
      "eval_ag_news_token_set_precision": 0.2725525747100057,
      "eval_ag_news_token_set_recall": 0.3613898275473111,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 28125
    },
    {
      "epoch": 5.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.0959375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.272102842149566,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08801381308167477,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.594087541103363,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01074972098300855,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.6989173889160156,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.822,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.218,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.368,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.744,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 40.403539354564465,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.16950995038576921,
      "eval_anthropic_toxic_prompts_runtime": 9.5821,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.181,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.104,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.29779781201804956,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005960438905574659,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.33424624499404704,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3051423563684294,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 28125
    },
    {
      "epoch": 5.4,
      "eval_arxiv_accuracy": 0.30684375,
      "eval_arxiv_bleu_score": 3.457059980301291,
      "eval_arxiv_bleu_score_sem": 0.11260979517869693,
      "eval_arxiv_emb_cos_sim": 0.6441875100135803,
      "eval_arxiv_emb_cos_sim_sem": 0.007731428427550528,
      "eval_arxiv_emb_top1_equal": 0.1796875,
      "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8626210689544678,
      "eval_arxiv_n_ngrams_match_1": 12.296,
      "eval_arxiv_n_ngrams_match_2": 2.186,
      "eval_arxiv_n_ngrams_match_3": 0.424,
      "eval_arxiv_num_pred_words": 39.466,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 47.58992451382261,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.28924850025282833,
      "eval_arxiv_runtime": 10.2322,
      "eval_arxiv_samples_per_second": 48.865,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.29142292421585997,
      "eval_arxiv_token_set_f1_sem": 0.004075060979354117,
      "eval_arxiv_token_set_precision": 0.23226119987236554,
      "eval_arxiv_token_set_recall": 0.41309108685453116,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 28125
    },
    {
      "epoch": 5.4,
      "eval_python_code_alpaca_accuracy": 0.1331875,
      "eval_python_code_alpaca_bleu_score": 3.276823387450839,
      "eval_python_code_alpaca_bleu_score_sem": 0.1289571953592232,
      "eval_python_code_alpaca_emb_cos_sim": 0.5843392610549927,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010902956387482033,
      "eval_python_code_alpaca_emb_top1_equal": 0.046875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.4225106239318848,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.978,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.594,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.48,
      "eval_python_code_alpaca_num_pred_words": 39.45,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 30.64625974972903,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2434943104821939,
      "eval_python_code_alpaca_runtime": 10.6502,
      "eval_python_code_alpaca_samples_per_second": 46.947,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.36531147315177526,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005839595746561747,
      "eval_python_code_alpaca_token_set_precision": 0.36269220036771904,
      "eval_python_code_alpaca_token_set_recall": 0.40091619267996637,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 28125
    },
    {
      "epoch": 5.4,
      "eval_wikibio_accuracy": 0.28459375,
      "eval_wikibio_bleu_score": 5.13638619529026,
      "eval_wikibio_bleu_score_sem": 0.18125277923423516,
      "eval_wikibio_emb_cos_sim": 0.697567880153656,
      "eval_wikibio_emb_cos_sim_sem": 0.010435320639640963,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.319770812988281,
      "eval_wikibio_n_ngrams_match_1": 9.674,
      "eval_wikibio_n_ngrams_match_2": 3.024,
      "eval_wikibio_n_ngrams_match_3": 1.014,
      "eval_wikibio_num_pred_words": 38.196,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 75.17139800954351,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.323949620156584,
      "eval_wikibio_runtime": 9.7845,
      "eval_wikibio_samples_per_second": 51.101,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.3041517611296937,
      "eval_wikibio_token_set_f1_sem": 0.0048940121649151186,
      "eval_wikibio_token_set_precision": 0.3106342098196182,
      "eval_wikibio_token_set_recall": 0.31110706844695096,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 28125
    },
    {
      "epoch": 5.4,
      "eval_nq_accuracy": 0.477125,
      "eval_nq_bleu_score": 8.996300433262643,
      "eval_nq_bleu_score_sem": 0.39888254366499315,
      "eval_nq_emb_cos_sim": 0.763920783996582,
      "eval_nq_emb_cos_sim_sem": 0.009703978646112311,
      "eval_nq_emb_top1_equal": 0.2109375,
      "eval_nq_emb_top1_equal_sem": 0.03620184850179216,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5666260719299316,
      "eval_nq_n_ngrams_match_1": 20.432,
      "eval_nq_n_ngrams_match_2": 6.794,
      "eval_nq_n_ngrams_match_3": 2.856,
      "eval_nq_num_pred_words": 48.59,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 13.02181557222095,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3883234856283586,
      "eval_nq_runtime": 11.6579,
      "eval_nq_samples_per_second": 42.889,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.40987566302541584,
      "eval_nq_token_set_f1_sem": 0.005033940333738851,
      "eval_nq_token_set_precision": 0.3554135430985488,
      "eval_nq_token_set_recall": 0.5004185330412072,
      "eval_nq_true_num_tokens": 64.0,
      "step": 28125
    },
    {
      "epoch": 5.4,
      "learning_rate": 0.001,
      "loss": 2.9311,
      "step": 28128
    },
    {
      "epoch": 5.4,
      "learning_rate": 0.001,
      "loss": 2.9289,
      "step": 28140
    },
    {
      "epoch": 5.41,
      "learning_rate": 0.001,
      "loss": 2.9335,
      "step": 28152
    },
    {
      "epoch": 5.41,
      "learning_rate": 0.001,
      "loss": 2.9314,
      "step": 28164
    },
    {
      "epoch": 5.41,
      "learning_rate": 0.001,
      "loss": 2.9216,
      "step": 28176
    },
    {
      "epoch": 5.41,
      "learning_rate": 0.001,
      "loss": 2.9297,
      "step": 28188
    },
    {
      "epoch": 5.41,
      "learning_rate": 0.001,
      "loss": 2.9393,
      "step": 28200
    },
    {
      "epoch": 5.42,
      "learning_rate": 0.001,
      "loss": 2.9256,
      "step": 28212
    },
    {
      "epoch": 5.42,
      "learning_rate": 0.001,
      "loss": 2.9268,
      "step": 28224
    },
    {
      "epoch": 5.42,
      "learning_rate": 0.001,
      "loss": 2.9334,
      "step": 28236
    },
    {
      "epoch": 5.42,
      "learning_rate": 0.001,
      "loss": 2.9322,
      "step": 28248
    },
    {
      "epoch": 5.43,
      "learning_rate": 0.001,
      "loss": 2.9424,
      "step": 28260
    },
    {
      "epoch": 5.43,
      "learning_rate": 0.001,
      "loss": 2.9244,
      "step": 28272
    },
    {
      "epoch": 5.43,
      "learning_rate": 0.001,
      "loss": 2.9343,
      "step": 28284
    },
    {
      "epoch": 5.43,
      "learning_rate": 0.001,
      "loss": 2.9333,
      "step": 28296
    },
    {
      "epoch": 5.44,
      "learning_rate": 0.001,
      "loss": 2.9402,
      "step": 28308
    },
    {
      "epoch": 5.44,
      "learning_rate": 0.001,
      "loss": 2.931,
      "step": 28320
    },
    {
      "epoch": 5.44,
      "learning_rate": 0.001,
      "loss": 2.931,
      "step": 28332
    },
    {
      "epoch": 5.44,
      "learning_rate": 0.001,
      "loss": 2.929,
      "step": 28344
    },
    {
      "epoch": 5.44,
      "learning_rate": 0.001,
      "loss": 2.9327,
      "step": 28356
    },
    {
      "epoch": 5.45,
      "learning_rate": 0.001,
      "loss": 2.9416,
      "step": 28368
    },
    {
      "epoch": 5.45,
      "learning_rate": 0.001,
      "loss": 2.9364,
      "step": 28380
    },
    {
      "epoch": 5.45,
      "learning_rate": 0.001,
      "loss": 2.922,
      "step": 28392
    },
    {
      "epoch": 5.45,
      "learning_rate": 0.001,
      "loss": 2.9244,
      "step": 28404
    },
    {
      "epoch": 5.46,
      "learning_rate": 0.001,
      "loss": 2.9267,
      "step": 28416
    },
    {
      "epoch": 5.46,
      "learning_rate": 0.001,
      "loss": 2.939,
      "step": 28428
    },
    {
      "epoch": 5.46,
      "learning_rate": 0.001,
      "loss": 2.9327,
      "step": 28440
    },
    {
      "epoch": 5.46,
      "learning_rate": 0.001,
      "loss": 2.9235,
      "step": 28452
    },
    {
      "epoch": 5.47,
      "learning_rate": 0.001,
      "loss": 2.9168,
      "step": 28464
    },
    {
      "epoch": 5.47,
      "learning_rate": 0.001,
      "loss": 2.9222,
      "step": 28476
    },
    {
      "epoch": 5.47,
      "learning_rate": 0.001,
      "loss": 2.9095,
      "step": 28488
    },
    {
      "epoch": 5.47,
      "learning_rate": 0.001,
      "loss": 2.923,
      "step": 28500
    },
    {
      "epoch": 5.47,
      "learning_rate": 0.001,
      "loss": 2.9406,
      "step": 28512
    },
    {
      "epoch": 5.48,
      "learning_rate": 0.001,
      "loss": 2.9307,
      "step": 28524
    },
    {
      "epoch": 5.48,
      "learning_rate": 0.001,
      "loss": 2.9327,
      "step": 28536
    },
    {
      "epoch": 5.48,
      "learning_rate": 0.001,
      "loss": 2.9293,
      "step": 28548
    },
    {
      "epoch": 5.48,
      "learning_rate": 0.001,
      "loss": 2.927,
      "step": 28560
    },
    {
      "epoch": 5.49,
      "learning_rate": 0.001,
      "loss": 2.9261,
      "step": 28572
    },
    {
      "epoch": 5.49,
      "learning_rate": 0.001,
      "loss": 2.9292,
      "step": 28584
    },
    {
      "epoch": 5.49,
      "learning_rate": 0.001,
      "loss": 2.9231,
      "step": 28596
    },
    {
      "epoch": 5.49,
      "learning_rate": 0.001,
      "loss": 2.9258,
      "step": 28608
    },
    {
      "epoch": 5.5,
      "learning_rate": 0.001,
      "loss": 2.9275,
      "step": 28620
    },
    {
      "epoch": 5.5,
      "learning_rate": 0.001,
      "loss": 2.9292,
      "step": 28632
    },
    {
      "epoch": 5.5,
      "learning_rate": 0.001,
      "loss": 2.9178,
      "step": 28644
    },
    {
      "epoch": 5.5,
      "learning_rate": 0.001,
      "loss": 2.9305,
      "step": 28656
    },
    {
      "epoch": 5.5,
      "learning_rate": 0.001,
      "loss": 2.9237,
      "step": 28668
    },
    {
      "epoch": 5.51,
      "learning_rate": 0.001,
      "loss": 2.9202,
      "step": 28680
    },
    {
      "epoch": 5.51,
      "learning_rate": 0.001,
      "loss": 2.9267,
      "step": 28692
    },
    {
      "epoch": 5.51,
      "learning_rate": 0.001,
      "loss": 2.9315,
      "step": 28704
    },
    {
      "epoch": 5.51,
      "learning_rate": 0.001,
      "loss": 2.9206,
      "step": 28716
    },
    {
      "epoch": 5.52,
      "learning_rate": 0.001,
      "loss": 2.9317,
      "step": 28728
    },
    {
      "epoch": 5.52,
      "learning_rate": 0.001,
      "loss": 2.9168,
      "step": 28740
    },
    {
      "epoch": 5.52,
      "eval_ag_news_accuracy": 0.28525,
      "eval_ag_news_bleu_score": 3.683319881216632,
      "eval_ag_news_bleu_score_sem": 0.11641768842346403,
      "eval_ag_news_emb_cos_sim": 0.7261709570884705,
      "eval_ag_news_emb_cos_sim_sem": 0.009650001045515508,
      "eval_ag_news_emb_top1_equal": 0.1796875,
      "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.9693117141723633,
      "eval_ag_news_n_ngrams_match_1": 11.778,
      "eval_ag_news_n_ngrams_match_2": 2.164,
      "eval_ag_news_n_ngrams_match_3": 0.538,
      "eval_ag_news_num_pred_words": 46.07,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 52.94807488551273,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.28490252400450233,
      "eval_ag_news_runtime": 10.2211,
      "eval_ag_news_samples_per_second": 48.919,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3019860572832703,
      "eval_ag_news_token_set_f1_sem": 0.00422515325440724,
      "eval_ag_news_token_set_precision": 0.27553396990373064,
      "eval_ag_news_token_set_recall": 0.3536215365054163,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 28750
    },
    {
      "epoch": 5.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.09525,
      "eval_anthropic_toxic_prompts_bleu_score": 2.160806075862903,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07906417799213382,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5775938034057617,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01143050980738191,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.673187017440796,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.756,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.144,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.326,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.534,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 39.3772019231811,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.16762825525763436,
      "eval_anthropic_toxic_prompts_runtime": 9.6452,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.84,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.104,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2896135354073873,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005875932793963504,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.32533301138436854,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.2946068753251194,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 28750
    },
    {
      "epoch": 5.52,
      "eval_arxiv_accuracy": 0.3083125,
      "eval_arxiv_bleu_score": 3.268154741507437,
      "eval_arxiv_bleu_score_sem": 0.09972210165521798,
      "eval_arxiv_emb_cos_sim": 0.6481302976608276,
      "eval_arxiv_emb_cos_sim_sem": 0.007643326367639076,
      "eval_arxiv_emb_top1_equal": 0.1640625,
      "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8440616130828857,
      "eval_arxiv_n_ngrams_match_1": 11.992,
      "eval_arxiv_n_ngrams_match_2": 2.088,
      "eval_arxiv_n_ngrams_match_3": 0.396,
      "eval_arxiv_num_pred_words": 39.278,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 46.714827193965235,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.281924942172997,
      "eval_arxiv_runtime": 9.6802,
      "eval_arxiv_samples_per_second": 51.652,
      "eval_arxiv_steps_per_second": 0.103,
      "eval_arxiv_token_set_f1": 0.286338715088182,
      "eval_arxiv_token_set_f1_sem": 0.003952879797160271,
      "eval_arxiv_token_set_precision": 0.22698345522828195,
      "eval_arxiv_token_set_recall": 0.4138662418310259,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 28750
    },
    {
      "epoch": 5.52,
      "eval_python_code_alpaca_accuracy": 0.132,
      "eval_python_code_alpaca_bleu_score": 3.0746062585888287,
      "eval_python_code_alpaca_bleu_score_sem": 0.09891558940506559,
      "eval_python_code_alpaca_emb_cos_sim": 0.5892761945724487,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009927602580321528,
      "eval_python_code_alpaca_emb_top1_equal": 0.0546875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.020175758285348722,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.415736198425293,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.894,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.476,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.412,
      "eval_python_code_alpaca_num_pred_words": 39.508,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 30.439350582671775,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.23782295399497383,
      "eval_python_code_alpaca_runtime": 10.2993,
      "eval_python_code_alpaca_samples_per_second": 48.547,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.36354813563273164,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005754133766519511,
      "eval_python_code_alpaca_token_set_precision": 0.357568283734813,
      "eval_python_code_alpaca_token_set_recall": 0.4071109379454599,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 28750
    },
    {
      "epoch": 5.52,
      "eval_wikibio_accuracy": 0.2851875,
      "eval_wikibio_bleu_score": 5.14171844989698,
      "eval_wikibio_bleu_score_sem": 0.1790106479977739,
      "eval_wikibio_emb_cos_sim": 0.6940916776657104,
      "eval_wikibio_emb_cos_sim_sem": 0.009865538711711204,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.292008876800537,
      "eval_wikibio_n_ngrams_match_1": 9.63,
      "eval_wikibio_n_ngrams_match_2": 2.984,
      "eval_wikibio_n_ngrams_match_3": 1.006,
      "eval_wikibio_num_pred_words": 37.524,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 73.11319647528754,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3223504903817959,
      "eval_wikibio_runtime": 9.6048,
      "eval_wikibio_samples_per_second": 52.057,
      "eval_wikibio_steps_per_second": 0.104,
      "eval_wikibio_token_set_f1": 0.304034879370388,
      "eval_wikibio_token_set_f1_sem": 0.005097293072236239,
      "eval_wikibio_token_set_precision": 0.3097706740080534,
      "eval_wikibio_token_set_recall": 0.3143658159191039,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 28750
    },
    {
      "epoch": 5.52,
      "eval_nq_accuracy": 0.47884375,
      "eval_nq_bleu_score": 9.092205620452065,
      "eval_nq_bleu_score_sem": 0.3947123207494208,
      "eval_nq_emb_cos_sim": 0.7664515972137451,
      "eval_nq_emb_cos_sim_sem": 0.00981405831188867,
      "eval_nq_emb_top1_equal": 0.203125,
      "eval_nq_emb_top1_equal_sem": 0.03570055125142555,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5566017627716064,
      "eval_nq_n_ngrams_match_1": 20.568,
      "eval_nq_n_ngrams_match_2": 6.798,
      "eval_nq_n_ngrams_match_3": 2.828,
      "eval_nq_num_pred_words": 48.934,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.891932946541385,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3882656244447178,
      "eval_nq_runtime": 10.0125,
      "eval_nq_samples_per_second": 49.938,
      "eval_nq_steps_per_second": 0.1,
      "eval_nq_token_set_f1": 0.41441957291226117,
      "eval_nq_token_set_f1_sem": 0.004839709257070892,
      "eval_nq_token_set_precision": 0.3624421831091439,
      "eval_nq_token_set_recall": 0.4976973300610982,
      "eval_nq_true_num_tokens": 64.0,
      "step": 28750
    },
    {
      "epoch": 5.52,
      "learning_rate": 0.001,
      "loss": 2.9225,
      "step": 28752
    },
    {
      "epoch": 5.52,
      "learning_rate": 0.001,
      "loss": 2.9271,
      "step": 28764
    },
    {
      "epoch": 5.53,
      "learning_rate": 0.001,
      "loss": 2.9154,
      "step": 28776
    },
    {
      "epoch": 5.53,
      "learning_rate": 0.001,
      "loss": 2.925,
      "step": 28788
    },
    {
      "epoch": 5.53,
      "learning_rate": 0.001,
      "loss": 2.924,
      "step": 28800
    },
    {
      "epoch": 5.53,
      "learning_rate": 0.001,
      "loss": 2.9294,
      "step": 28812
    },
    {
      "epoch": 5.53,
      "learning_rate": 0.001,
      "loss": 2.9207,
      "step": 28824
    },
    {
      "epoch": 5.54,
      "learning_rate": 0.001,
      "loss": 2.9265,
      "step": 28836
    },
    {
      "epoch": 5.54,
      "learning_rate": 0.001,
      "loss": 2.93,
      "step": 28848
    },
    {
      "epoch": 5.54,
      "learning_rate": 0.001,
      "loss": 2.9272,
      "step": 28860
    },
    {
      "epoch": 5.54,
      "learning_rate": 0.001,
      "loss": 2.9112,
      "step": 28872
    },
    {
      "epoch": 5.55,
      "learning_rate": 0.001,
      "loss": 2.9346,
      "step": 28884
    },
    {
      "epoch": 5.55,
      "learning_rate": 0.001,
      "loss": 2.9242,
      "step": 28896
    },
    {
      "epoch": 5.55,
      "learning_rate": 0.001,
      "loss": 2.9212,
      "step": 28908
    },
    {
      "epoch": 5.55,
      "learning_rate": 0.001,
      "loss": 2.9067,
      "step": 28920
    },
    {
      "epoch": 5.56,
      "learning_rate": 0.001,
      "loss": 2.9176,
      "step": 28932
    },
    {
      "epoch": 5.56,
      "learning_rate": 0.001,
      "loss": 2.9117,
      "step": 28944
    },
    {
      "epoch": 5.56,
      "learning_rate": 0.001,
      "loss": 2.9186,
      "step": 28956
    },
    {
      "epoch": 5.56,
      "learning_rate": 0.001,
      "loss": 2.9141,
      "step": 28968
    },
    {
      "epoch": 5.56,
      "learning_rate": 0.001,
      "loss": 2.9255,
      "step": 28980
    },
    {
      "epoch": 5.57,
      "learning_rate": 0.001,
      "loss": 2.9271,
      "step": 28992
    },
    {
      "epoch": 5.57,
      "learning_rate": 0.001,
      "loss": 2.9274,
      "step": 29004
    },
    {
      "epoch": 5.57,
      "learning_rate": 0.001,
      "loss": 2.9314,
      "step": 29016
    },
    {
      "epoch": 5.57,
      "learning_rate": 0.001,
      "loss": 2.9287,
      "step": 29028
    },
    {
      "epoch": 5.58,
      "learning_rate": 0.001,
      "loss": 2.9224,
      "step": 29040
    },
    {
      "epoch": 5.58,
      "learning_rate": 0.001,
      "loss": 2.9253,
      "step": 29052
    },
    {
      "epoch": 5.58,
      "learning_rate": 0.001,
      "loss": 2.9302,
      "step": 29064
    },
    {
      "epoch": 5.58,
      "learning_rate": 0.001,
      "loss": 2.9212,
      "step": 29076
    },
    {
      "epoch": 5.59,
      "learning_rate": 0.001,
      "loss": 2.9161,
      "step": 29088
    },
    {
      "epoch": 5.59,
      "learning_rate": 0.001,
      "loss": 2.9241,
      "step": 29100
    },
    {
      "epoch": 5.59,
      "learning_rate": 0.001,
      "loss": 2.9111,
      "step": 29112
    },
    {
      "epoch": 5.59,
      "learning_rate": 0.001,
      "loss": 2.9269,
      "step": 29124
    },
    {
      "epoch": 5.59,
      "learning_rate": 0.001,
      "loss": 2.9249,
      "step": 29136
    },
    {
      "epoch": 5.6,
      "learning_rate": 0.001,
      "loss": 2.9227,
      "step": 29148
    },
    {
      "epoch": 5.6,
      "learning_rate": 0.001,
      "loss": 2.9246,
      "step": 29160
    },
    {
      "epoch": 5.6,
      "learning_rate": 0.001,
      "loss": 2.9241,
      "step": 29172
    },
    {
      "epoch": 5.6,
      "learning_rate": 0.001,
      "loss": 2.9187,
      "step": 29184
    },
    {
      "epoch": 5.61,
      "learning_rate": 0.001,
      "loss": 2.9121,
      "step": 29196
    },
    {
      "epoch": 5.61,
      "learning_rate": 0.001,
      "loss": 2.9212,
      "step": 29208
    },
    {
      "epoch": 5.61,
      "learning_rate": 0.001,
      "loss": 2.9234,
      "step": 29220
    },
    {
      "epoch": 5.61,
      "learning_rate": 0.001,
      "loss": 2.9263,
      "step": 29232
    },
    {
      "epoch": 5.62,
      "learning_rate": 0.001,
      "loss": 2.9193,
      "step": 29244
    },
    {
      "epoch": 5.62,
      "learning_rate": 0.001,
      "loss": 2.9246,
      "step": 29256
    },
    {
      "epoch": 5.62,
      "learning_rate": 0.001,
      "loss": 2.9231,
      "step": 29268
    },
    {
      "epoch": 5.62,
      "learning_rate": 0.001,
      "loss": 2.9165,
      "step": 29280
    },
    {
      "epoch": 5.62,
      "learning_rate": 0.001,
      "loss": 2.9112,
      "step": 29292
    },
    {
      "epoch": 5.63,
      "learning_rate": 0.001,
      "loss": 2.913,
      "step": 29304
    },
    {
      "epoch": 5.63,
      "learning_rate": 0.001,
      "loss": 2.9225,
      "step": 29316
    },
    {
      "epoch": 5.63,
      "learning_rate": 0.001,
      "loss": 2.9135,
      "step": 29328
    },
    {
      "epoch": 5.63,
      "learning_rate": 0.001,
      "loss": 2.9195,
      "step": 29340
    },
    {
      "epoch": 5.64,
      "learning_rate": 0.001,
      "loss": 2.919,
      "step": 29352
    },
    {
      "epoch": 5.64,
      "learning_rate": 0.001,
      "loss": 2.9097,
      "step": 29364
    },
    {
      "epoch": 5.64,
      "eval_ag_news_accuracy": 0.2815625,
      "eval_ag_news_bleu_score": 3.9579431453791933,
      "eval_ag_news_bleu_score_sem": 0.13389890050609876,
      "eval_ag_news_emb_cos_sim": 0.7305126190185547,
      "eval_ag_news_emb_cos_sim_sem": 0.010065528142073428,
      "eval_ag_news_emb_top1_equal": 0.1796875,
      "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.980177402496338,
      "eval_ag_news_n_ngrams_match_1": 11.824,
      "eval_ag_news_n_ngrams_match_2": 2.32,
      "eval_ag_news_n_ngrams_match_3": 0.654,
      "eval_ag_news_num_pred_words": 46.228,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 53.52652912514409,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2884148738540912,
      "eval_ag_news_runtime": 10.1631,
      "eval_ag_news_samples_per_second": 49.197,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3045808051396171,
      "eval_ag_news_token_set_f1_sem": 0.004275929817397152,
      "eval_ag_news_token_set_precision": 0.2761385247378778,
      "eval_ag_news_token_set_recall": 0.3577790649932207,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 29375
    },
    {
      "epoch": 5.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.0966875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.3350248656909875,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09176338011601559,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5957716703414917,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010789873100204765,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.6640102863311768,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.94,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.266,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.392,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.534,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 39.0175008933683,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1730207063385469,
      "eval_anthropic_toxic_prompts_runtime": 9.8831,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.592,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3000346491804135,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006012415945277402,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3420610200801772,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.30261754564838533,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 29375
    },
    {
      "epoch": 5.64,
      "eval_arxiv_accuracy": 0.3090625,
      "eval_arxiv_bleu_score": 3.3241359711659437,
      "eval_arxiv_bleu_score_sem": 0.09375160570800184,
      "eval_arxiv_emb_cos_sim": 0.6541392207145691,
      "eval_arxiv_emb_cos_sim_sem": 0.007641927619104121,
      "eval_arxiv_emb_top1_equal": 0.1953125,
      "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8257458209991455,
      "eval_arxiv_n_ngrams_match_1": 12.044,
      "eval_arxiv_n_ngrams_match_2": 2.128,
      "eval_arxiv_n_ngrams_match_3": 0.404,
      "eval_arxiv_num_pred_words": 39.116,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 45.86699618168979,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2852597968181938,
      "eval_arxiv_runtime": 22.9602,
      "eval_arxiv_samples_per_second": 21.777,
      "eval_arxiv_steps_per_second": 0.044,
      "eval_arxiv_token_set_f1": 0.2857373098850007,
      "eval_arxiv_token_set_f1_sem": 0.003933038607794231,
      "eval_arxiv_token_set_precision": 0.22609456688552154,
      "eval_arxiv_token_set_recall": 0.41236478347392447,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 29375
    },
    {
      "epoch": 5.64,
      "eval_python_code_alpaca_accuracy": 0.13390625,
      "eval_python_code_alpaca_bleu_score": 3.185916377392192,
      "eval_python_code_alpaca_bleu_score_sem": 0.11558272745725323,
      "eval_python_code_alpaca_emb_cos_sim": 0.5900557637214661,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011087127578762512,
      "eval_python_code_alpaca_emb_top1_equal": 0.0625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.3717613220214844,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.822,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.536,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.42,
      "eval_python_code_alpaca_num_pred_words": 39.574,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 29.129788838855962,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.23834039010553257,
      "eval_python_code_alpaca_runtime": 12.6156,
      "eval_python_code_alpaca_samples_per_second": 39.633,
      "eval_python_code_alpaca_steps_per_second": 0.079,
      "eval_python_code_alpaca_token_set_f1": 0.36887512618713925,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005777866137227637,
      "eval_python_code_alpaca_token_set_precision": 0.35829297488772005,
      "eval_python_code_alpaca_token_set_recall": 0.42257935245215933,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 29375
    },
    {
      "epoch": 5.64,
      "eval_wikibio_accuracy": 0.28859375,
      "eval_wikibio_bleu_score": 5.232629256479571,
      "eval_wikibio_bleu_score_sem": 0.19667584747879158,
      "eval_wikibio_emb_cos_sim": 0.69016432762146,
      "eval_wikibio_emb_cos_sim_sem": 0.01163198227997611,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.284221172332764,
      "eval_wikibio_n_ngrams_match_1": 9.578,
      "eval_wikibio_n_ngrams_match_2": 3.032,
      "eval_wikibio_n_ngrams_match_3": 1.038,
      "eval_wikibio_num_pred_words": 37.286,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 72.54602386129733,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3205587357177153,
      "eval_wikibio_runtime": 14.9378,
      "eval_wikibio_samples_per_second": 33.472,
      "eval_wikibio_steps_per_second": 0.067,
      "eval_wikibio_token_set_f1": 0.2997864642925253,
      "eval_wikibio_token_set_f1_sem": 0.005517867437531372,
      "eval_wikibio_token_set_precision": 0.30791202680347074,
      "eval_wikibio_token_set_recall": 0.3069112164459001,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 29375
    },
    {
      "epoch": 5.64,
      "eval_nq_accuracy": 0.47859375,
      "eval_nq_bleu_score": 9.07752341736059,
      "eval_nq_bleu_score_sem": 0.41199354295420804,
      "eval_nq_emb_cos_sim": 0.7646245956420898,
      "eval_nq_emb_cos_sim_sem": 0.009229337301795322,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5586891174316406,
      "eval_nq_n_ngrams_match_1": 20.232,
      "eval_nq_n_ngrams_match_2": 6.75,
      "eval_nq_n_ngrams_match_3": 2.904,
      "eval_nq_num_pred_words": 48.412,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.91887108780064,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3843453631454201,
      "eval_nq_runtime": 9.7747,
      "eval_nq_samples_per_second": 51.152,
      "eval_nq_steps_per_second": 0.102,
      "eval_nq_token_set_f1": 0.4079419735728884,
      "eval_nq_token_set_f1_sem": 0.004967891975882009,
      "eval_nq_token_set_precision": 0.35603870422141043,
      "eval_nq_token_set_recall": 0.49267351774513696,
      "eval_nq_true_num_tokens": 64.0,
      "step": 29375
    },
    {
      "epoch": 5.64,
      "learning_rate": 0.001,
      "loss": 2.9148,
      "step": 29376
    },
    {
      "epoch": 5.64,
      "learning_rate": 0.001,
      "loss": 2.9313,
      "step": 29388
    },
    {
      "epoch": 5.65,
      "learning_rate": 0.001,
      "loss": 2.9231,
      "step": 29400
    },
    {
      "epoch": 5.65,
      "learning_rate": 0.001,
      "loss": 2.9201,
      "step": 29412
    },
    {
      "epoch": 5.65,
      "learning_rate": 0.001,
      "loss": 2.9194,
      "step": 29424
    },
    {
      "epoch": 5.65,
      "learning_rate": 0.001,
      "loss": 2.9141,
      "step": 29436
    },
    {
      "epoch": 5.65,
      "learning_rate": 0.001,
      "loss": 2.9279,
      "step": 29448
    },
    {
      "epoch": 5.66,
      "learning_rate": 0.001,
      "loss": 2.9105,
      "step": 29460
    },
    {
      "epoch": 5.66,
      "learning_rate": 0.001,
      "loss": 2.9191,
      "step": 29472
    },
    {
      "epoch": 5.66,
      "learning_rate": 0.001,
      "loss": 2.9161,
      "step": 29484
    },
    {
      "epoch": 5.66,
      "learning_rate": 0.001,
      "loss": 2.9192,
      "step": 29496
    },
    {
      "epoch": 5.67,
      "learning_rate": 0.001,
      "loss": 2.915,
      "step": 29508
    },
    {
      "epoch": 5.67,
      "learning_rate": 0.001,
      "loss": 2.9162,
      "step": 29520
    },
    {
      "epoch": 5.67,
      "learning_rate": 0.001,
      "loss": 2.9178,
      "step": 29532
    },
    {
      "epoch": 5.67,
      "learning_rate": 0.001,
      "loss": 2.922,
      "step": 29544
    },
    {
      "epoch": 5.68,
      "learning_rate": 0.001,
      "loss": 2.9292,
      "step": 29556
    },
    {
      "epoch": 5.68,
      "learning_rate": 0.001,
      "loss": 2.9285,
      "step": 29568
    },
    {
      "epoch": 5.68,
      "learning_rate": 0.001,
      "loss": 2.9153,
      "step": 29580
    },
    {
      "epoch": 5.68,
      "learning_rate": 0.001,
      "loss": 2.9175,
      "step": 29592
    },
    {
      "epoch": 5.68,
      "learning_rate": 0.001,
      "loss": 2.9274,
      "step": 29604
    },
    {
      "epoch": 5.69,
      "learning_rate": 0.001,
      "loss": 2.92,
      "step": 29616
    },
    {
      "epoch": 5.69,
      "learning_rate": 0.001,
      "loss": 2.9218,
      "step": 29628
    },
    {
      "epoch": 5.69,
      "learning_rate": 0.001,
      "loss": 2.9184,
      "step": 29640
    },
    {
      "epoch": 5.69,
      "learning_rate": 0.001,
      "loss": 2.9218,
      "step": 29652
    },
    {
      "epoch": 5.7,
      "learning_rate": 0.001,
      "loss": 2.9128,
      "step": 29664
    },
    {
      "epoch": 5.7,
      "learning_rate": 0.001,
      "loss": 2.9165,
      "step": 29676
    },
    {
      "epoch": 5.7,
      "learning_rate": 0.001,
      "loss": 2.919,
      "step": 29688
    },
    {
      "epoch": 5.7,
      "learning_rate": 0.001,
      "loss": 2.9148,
      "step": 29700
    },
    {
      "epoch": 5.71,
      "learning_rate": 0.001,
      "loss": 2.9225,
      "step": 29712
    },
    {
      "epoch": 5.71,
      "learning_rate": 0.001,
      "loss": 2.9328,
      "step": 29724
    },
    {
      "epoch": 5.71,
      "learning_rate": 0.001,
      "loss": 2.9174,
      "step": 29736
    },
    {
      "epoch": 5.71,
      "learning_rate": 0.001,
      "loss": 2.9262,
      "step": 29748
    },
    {
      "epoch": 5.71,
      "learning_rate": 0.001,
      "loss": 2.91,
      "step": 29760
    },
    {
      "epoch": 5.72,
      "learning_rate": 0.001,
      "loss": 2.9191,
      "step": 29772
    },
    {
      "epoch": 5.72,
      "learning_rate": 0.001,
      "loss": 2.9118,
      "step": 29784
    },
    {
      "epoch": 5.72,
      "learning_rate": 0.001,
      "loss": 2.9152,
      "step": 29796
    },
    {
      "epoch": 5.72,
      "learning_rate": 0.001,
      "loss": 2.9168,
      "step": 29808
    },
    {
      "epoch": 5.73,
      "learning_rate": 0.001,
      "loss": 2.9185,
      "step": 29820
    },
    {
      "epoch": 5.73,
      "learning_rate": 0.001,
      "loss": 2.9227,
      "step": 29832
    },
    {
      "epoch": 5.73,
      "learning_rate": 0.001,
      "loss": 2.9149,
      "step": 29844
    },
    {
      "epoch": 5.73,
      "learning_rate": 0.001,
      "loss": 2.9134,
      "step": 29856
    },
    {
      "epoch": 5.74,
      "learning_rate": 0.001,
      "loss": 2.9189,
      "step": 29868
    },
    {
      "epoch": 5.74,
      "learning_rate": 0.001,
      "loss": 2.9203,
      "step": 29880
    },
    {
      "epoch": 5.74,
      "learning_rate": 0.001,
      "loss": 2.9142,
      "step": 29892
    },
    {
      "epoch": 5.74,
      "learning_rate": 0.001,
      "loss": 2.9146,
      "step": 29904
    },
    {
      "epoch": 5.74,
      "learning_rate": 0.001,
      "loss": 2.9058,
      "step": 29916
    },
    {
      "epoch": 5.75,
      "learning_rate": 0.001,
      "loss": 2.9221,
      "step": 29928
    },
    {
      "epoch": 5.75,
      "learning_rate": 0.001,
      "loss": 2.9152,
      "step": 29940
    },
    {
      "epoch": 5.75,
      "learning_rate": 0.001,
      "loss": 2.9109,
      "step": 29952
    },
    {
      "epoch": 5.75,
      "learning_rate": 0.001,
      "loss": 2.9112,
      "step": 29964
    },
    {
      "epoch": 5.76,
      "learning_rate": 0.001,
      "loss": 2.9201,
      "step": 29976
    },
    {
      "epoch": 5.76,
      "learning_rate": 0.001,
      "loss": 2.9164,
      "step": 29988
    },
    {
      "epoch": 5.76,
      "learning_rate": 0.001,
      "loss": 2.9188,
      "step": 30000
    },
    {
      "epoch": 5.76,
      "eval_ag_news_accuracy": 0.285875,
      "eval_ag_news_bleu_score": 3.850783328454288,
      "eval_ag_news_bleu_score_sem": 0.1372927895962863,
      "eval_ag_news_emb_cos_sim": 0.7276172637939453,
      "eval_ag_news_emb_cos_sim_sem": 0.009851425444360625,
      "eval_ag_news_emb_top1_equal": 0.15625,
      "eval_ag_news_emb_top1_equal_sem": 0.03221922156442571,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.966392755508423,
      "eval_ag_news_n_ngrams_match_1": 11.838,
      "eval_ag_news_n_ngrams_match_2": 2.302,
      "eval_ag_news_n_ngrams_match_3": 0.596,
      "eval_ag_news_num_pred_words": 46.148,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 52.79374699153521,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2835993467716257,
      "eval_ag_news_runtime": 15.8892,
      "eval_ag_news_samples_per_second": 31.468,
      "eval_ag_news_steps_per_second": 0.063,
      "eval_ag_news_token_set_f1": 0.30344832396101123,
      "eval_ag_news_token_set_f1_sem": 0.004250525713913587,
      "eval_ag_news_token_set_precision": 0.27299063247100686,
      "eval_ag_news_token_set_recall": 0.361079217786275,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 30000
    },
    {
      "epoch": 5.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.095625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.2058590011185744,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08472565620356692,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.595095694065094,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010127160005486514,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.6490745544433594,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.784,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.194,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.352,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.61,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 38.439076311016336,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.16773877446726976,
      "eval_anthropic_toxic_prompts_runtime": 10.4948,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.643,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3010833694682913,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006084173852136998,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.32939871473377075,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.318107955362561,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 30000
    },
    {
      "epoch": 5.76,
      "eval_arxiv_accuracy": 0.310375,
      "eval_arxiv_bleu_score": 3.3752250293476336,
      "eval_arxiv_bleu_score_sem": 0.09475203164372994,
      "eval_arxiv_emb_cos_sim": 0.6450604200363159,
      "eval_arxiv_emb_cos_sim_sem": 0.008361617326108527,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.827465295791626,
      "eval_arxiv_n_ngrams_match_1": 12.068,
      "eval_arxiv_n_ngrams_match_2": 2.108,
      "eval_arxiv_n_ngrams_match_3": 0.416,
      "eval_arxiv_num_pred_words": 38.628,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 45.94593116934354,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2873292248246817,
      "eval_arxiv_runtime": 15.7135,
      "eval_arxiv_samples_per_second": 31.82,
      "eval_arxiv_steps_per_second": 0.064,
      "eval_arxiv_token_set_f1": 0.2873084298308554,
      "eval_arxiv_token_set_f1_sem": 0.0037490863754161387,
      "eval_arxiv_token_set_precision": 0.22817390153959008,
      "eval_arxiv_token_set_recall": 0.41772290533826184,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 30000
    },
    {
      "epoch": 5.76,
      "eval_python_code_alpaca_accuracy": 0.1338125,
      "eval_python_code_alpaca_bleu_score": 3.073501627024064,
      "eval_python_code_alpaca_bleu_score_sem": 0.1004260312872625,
      "eval_python_code_alpaca_emb_cos_sim": 0.6021091938018799,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.012731274822083471,
      "eval_python_code_alpaca_emb_top1_equal": 0.046875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.3815906047821045,
      "eval_python_code_alpaca_n_ngrams_match_1": 6.786,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.546,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.428,
      "eval_python_code_alpaca_num_pred_words": 38.752,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 29.417525576355107,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.23933823569072102,
      "eval_python_code_alpaca_runtime": 12.0021,
      "eval_python_code_alpaca_samples_per_second": 41.659,
      "eval_python_code_alpaca_steps_per_second": 0.083,
      "eval_python_code_alpaca_token_set_f1": 0.3658373813590061,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0062407786743706685,
      "eval_python_code_alpaca_token_set_precision": 0.3516124488730524,
      "eval_python_code_alpaca_token_set_recall": 0.423038221603183,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 30000
    },
    {
      "epoch": 5.76,
      "eval_wikibio_accuracy": 0.29065625,
      "eval_wikibio_bleu_score": 5.053948308197059,
      "eval_wikibio_bleu_score_sem": 0.18603968757630915,
      "eval_wikibio_emb_cos_sim": 0.6840104460716248,
      "eval_wikibio_emb_cos_sim_sem": 0.01135399615517743,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.2231764793396,
      "eval_wikibio_n_ngrams_match_1": 9.4,
      "eval_wikibio_n_ngrams_match_2": 2.918,
      "eval_wikibio_n_ngrams_match_3": 0.982,
      "eval_wikibio_num_pred_words": 36.798,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 68.24993484001781,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3166582288314724,
      "eval_wikibio_runtime": 15.6901,
      "eval_wikibio_samples_per_second": 31.867,
      "eval_wikibio_steps_per_second": 0.064,
      "eval_wikibio_token_set_f1": 0.29972557652174886,
      "eval_wikibio_token_set_f1_sem": 0.005375044090514448,
      "eval_wikibio_token_set_precision": 0.3021887502153504,
      "eval_wikibio_token_set_recall": 0.3136608932700702,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 30000
    },
    {
      "epoch": 5.76,
      "eval_nq_accuracy": 0.48146875,
      "eval_nq_bleu_score": 8.78585342678472,
      "eval_nq_bleu_score_sem": 0.3673154230053644,
      "eval_nq_emb_cos_sim": 0.7678723335266113,
      "eval_nq_emb_cos_sim_sem": 0.010203222554703026,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5455660820007324,
      "eval_nq_n_ngrams_match_1": 20.458,
      "eval_nq_n_ngrams_match_2": 6.692,
      "eval_nq_n_ngrams_match_3": 2.742,
      "eval_nq_num_pred_words": 48.502,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.750443840277873,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3889194328154036,
      "eval_nq_runtime": 20.9512,
      "eval_nq_samples_per_second": 23.865,
      "eval_nq_steps_per_second": 0.048,
      "eval_nq_token_set_f1": 0.40984881096717857,
      "eval_nq_token_set_f1_sem": 0.004945079188842102,
      "eval_nq_token_set_precision": 0.3578919668831182,
      "eval_nq_token_set_recall": 0.49188674622319023,
      "eval_nq_true_num_tokens": 64.0,
      "step": 30000
    },
    {
      "epoch": 5.76,
      "learning_rate": 0.001,
      "loss": 2.9134,
      "step": 30012
    },
    {
      "epoch": 5.76,
      "learning_rate": 0.001,
      "loss": 2.9031,
      "step": 30024
    },
    {
      "epoch": 5.77,
      "learning_rate": 0.001,
      "loss": 2.9133,
      "step": 30036
    },
    {
      "epoch": 5.77,
      "learning_rate": 0.001,
      "loss": 2.8988,
      "step": 30048
    },
    {
      "epoch": 5.77,
      "learning_rate": 0.001,
      "loss": 2.9129,
      "step": 30060
    },
    {
      "epoch": 5.77,
      "learning_rate": 0.001,
      "loss": 2.9092,
      "step": 30072
    },
    {
      "epoch": 5.78,
      "learning_rate": 0.001,
      "loss": 2.9179,
      "step": 30084
    },
    {
      "epoch": 5.78,
      "learning_rate": 0.001,
      "loss": 2.9137,
      "step": 30096
    },
    {
      "epoch": 5.78,
      "learning_rate": 0.001,
      "loss": 2.9185,
      "step": 30108
    },
    {
      "epoch": 5.78,
      "learning_rate": 0.001,
      "loss": 2.9126,
      "step": 30120
    },
    {
      "epoch": 5.79,
      "learning_rate": 0.001,
      "loss": 2.9077,
      "step": 30132
    },
    {
      "epoch": 5.79,
      "learning_rate": 0.001,
      "loss": 2.9208,
      "step": 30144
    },
    {
      "epoch": 5.79,
      "learning_rate": 0.001,
      "loss": 2.9097,
      "step": 30156
    },
    {
      "epoch": 5.79,
      "learning_rate": 0.001,
      "loss": 2.9093,
      "step": 30168
    },
    {
      "epoch": 5.79,
      "learning_rate": 0.001,
      "loss": 2.9049,
      "step": 30180
    },
    {
      "epoch": 5.8,
      "learning_rate": 0.001,
      "loss": 2.9035,
      "step": 30192
    },
    {
      "epoch": 5.8,
      "learning_rate": 0.001,
      "loss": 2.904,
      "step": 30204
    },
    {
      "epoch": 5.8,
      "learning_rate": 0.001,
      "loss": 2.9195,
      "step": 30216
    },
    {
      "epoch": 5.8,
      "learning_rate": 0.001,
      "loss": 2.913,
      "step": 30228
    },
    {
      "epoch": 5.81,
      "learning_rate": 0.001,
      "loss": 2.9113,
      "step": 30240
    },
    {
      "epoch": 5.81,
      "learning_rate": 0.001,
      "loss": 2.913,
      "step": 30252
    },
    {
      "epoch": 5.81,
      "learning_rate": 0.001,
      "loss": 2.9228,
      "step": 30264
    },
    {
      "epoch": 5.81,
      "learning_rate": 0.001,
      "loss": 2.9073,
      "step": 30276
    },
    {
      "epoch": 5.82,
      "learning_rate": 0.001,
      "loss": 2.9143,
      "step": 30288
    },
    {
      "epoch": 5.82,
      "learning_rate": 0.001,
      "loss": 2.9178,
      "step": 30300
    },
    {
      "epoch": 5.82,
      "learning_rate": 0.001,
      "loss": 2.9136,
      "step": 30312
    },
    {
      "epoch": 5.82,
      "learning_rate": 0.001,
      "loss": 2.9236,
      "step": 30324
    },
    {
      "epoch": 5.82,
      "learning_rate": 0.001,
      "loss": 2.9106,
      "step": 30336
    },
    {
      "epoch": 5.83,
      "learning_rate": 0.001,
      "loss": 2.9091,
      "step": 30348
    },
    {
      "epoch": 5.83,
      "learning_rate": 0.001,
      "loss": 2.9061,
      "step": 30360
    },
    {
      "epoch": 5.83,
      "learning_rate": 0.001,
      "loss": 2.9258,
      "step": 30372
    },
    {
      "epoch": 5.83,
      "learning_rate": 0.001,
      "loss": 2.9205,
      "step": 30384
    },
    {
      "epoch": 5.84,
      "learning_rate": 0.001,
      "loss": 2.9164,
      "step": 30396
    },
    {
      "epoch": 5.84,
      "learning_rate": 0.001,
      "loss": 2.9256,
      "step": 30408
    },
    {
      "epoch": 5.84,
      "learning_rate": 0.001,
      "loss": 2.9129,
      "step": 30420
    },
    {
      "epoch": 5.84,
      "learning_rate": 0.001,
      "loss": 2.9132,
      "step": 30432
    },
    {
      "epoch": 5.85,
      "learning_rate": 0.001,
      "loss": 2.9119,
      "step": 30444
    },
    {
      "epoch": 5.85,
      "learning_rate": 0.001,
      "loss": 2.9142,
      "step": 30456
    },
    {
      "epoch": 5.85,
      "learning_rate": 0.001,
      "loss": 2.9021,
      "step": 30468
    },
    {
      "epoch": 5.85,
      "learning_rate": 0.001,
      "loss": 2.9111,
      "step": 30480
    },
    {
      "epoch": 5.85,
      "learning_rate": 0.001,
      "loss": 2.9121,
      "step": 30492
    },
    {
      "epoch": 5.86,
      "learning_rate": 0.001,
      "loss": 2.914,
      "step": 30504
    },
    {
      "epoch": 5.86,
      "learning_rate": 0.001,
      "loss": 2.9219,
      "step": 30516
    },
    {
      "epoch": 5.86,
      "learning_rate": 0.001,
      "loss": 2.9169,
      "step": 30528
    },
    {
      "epoch": 5.86,
      "learning_rate": 0.001,
      "loss": 2.9133,
      "step": 30540
    },
    {
      "epoch": 5.87,
      "learning_rate": 0.001,
      "loss": 2.9096,
      "step": 30552
    },
    {
      "epoch": 5.87,
      "learning_rate": 0.001,
      "loss": 2.9112,
      "step": 30564
    },
    {
      "epoch": 5.87,
      "learning_rate": 0.001,
      "loss": 2.9115,
      "step": 30576
    },
    {
      "epoch": 5.87,
      "learning_rate": 0.001,
      "loss": 2.9114,
      "step": 30588
    },
    {
      "epoch": 5.88,
      "learning_rate": 0.001,
      "loss": 2.9059,
      "step": 30600
    },
    {
      "epoch": 5.88,
      "learning_rate": 0.001,
      "loss": 2.909,
      "step": 30612
    },
    {
      "epoch": 5.88,
      "learning_rate": 0.001,
      "loss": 2.9161,
      "step": 30624
    },
    {
      "epoch": 5.88,
      "eval_ag_news_accuracy": 0.28559375,
      "eval_ag_news_bleu_score": 3.92340353353268,
      "eval_ag_news_bleu_score_sem": 0.13422552780712915,
      "eval_ag_news_emb_cos_sim": 0.7396842241287231,
      "eval_ag_news_emb_cos_sim_sem": 0.00864020997039906,
      "eval_ag_news_emb_top1_equal": 0.140625,
      "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.9616646766662598,
      "eval_ag_news_n_ngrams_match_1": 11.986,
      "eval_ag_news_n_ngrams_match_2": 2.32,
      "eval_ag_news_n_ngrams_match_3": 0.646,
      "eval_ag_news_num_pred_words": 45.83,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 52.544723159446725,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.29149405296365205,
      "eval_ag_news_runtime": 16.0074,
      "eval_ag_news_samples_per_second": 31.236,
      "eval_ag_news_steps_per_second": 0.062,
      "eval_ag_news_token_set_f1": 0.3067500767208229,
      "eval_ag_news_token_set_f1_sem": 0.004254976760109095,
      "eval_ag_news_token_set_precision": 0.27931568804659523,
      "eval_ag_news_token_set_recall": 0.35896683671270074,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 30625
    },
    {
      "epoch": 5.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.09690625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.317182031116432,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08863940473307765,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5967903137207031,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010474420402713422,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.641948938369751,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.986,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.29,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.404,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.492,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 38.16614775785699,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.17395441823891908,
      "eval_anthropic_toxic_prompts_runtime": 21.7978,
      "eval_anthropic_toxic_prompts_samples_per_second": 22.938,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.046,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3030149576520227,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006170249046331457,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.34521743389186765,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3060803273150024,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 30625
    },
    {
      "epoch": 5.88,
      "eval_arxiv_accuracy": 0.30934375,
      "eval_arxiv_bleu_score": 3.382515072277503,
      "eval_arxiv_bleu_score_sem": 0.09941382190161986,
      "eval_arxiv_emb_cos_sim": 0.6513580083847046,
      "eval_arxiv_emb_cos_sim_sem": 0.00900234952223933,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8381197452545166,
      "eval_arxiv_n_ngrams_match_1": 12.278,
      "eval_arxiv_n_ngrams_match_2": 2.202,
      "eval_arxiv_n_ngrams_match_3": 0.412,
      "eval_arxiv_num_pred_words": 39.182,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 46.438076886268775,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.2883738238974642,
      "eval_arxiv_runtime": 17.7678,
      "eval_arxiv_samples_per_second": 28.141,
      "eval_arxiv_steps_per_second": 0.056,
      "eval_arxiv_token_set_f1": 0.28930532566938244,
      "eval_arxiv_token_set_f1_sem": 0.0039930677042840125,
      "eval_arxiv_token_set_precision": 0.2311305139145135,
      "eval_arxiv_token_set_recall": 0.416346233017776,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 30625
    },
    {
      "epoch": 5.88,
      "eval_python_code_alpaca_accuracy": 0.13021875,
      "eval_python_code_alpaca_bleu_score": 3.0414240146457576,
      "eval_python_code_alpaca_bleu_score_sem": 0.0941243423484182,
      "eval_python_code_alpaca_emb_cos_sim": 0.6037212014198303,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01196555351251558,
      "eval_python_code_alpaca_emb_top1_equal": 0.046875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.404144048690796,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.038,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.604,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.4,
      "eval_python_code_alpaca_num_pred_words": 40.216,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 30.088530381579464,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2401659567842701,
      "eval_python_code_alpaca_runtime": 15.2337,
      "eval_python_code_alpaca_samples_per_second": 32.822,
      "eval_python_code_alpaca_steps_per_second": 0.066,
      "eval_python_code_alpaca_token_set_f1": 0.36518958850939326,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005729664256662593,
      "eval_python_code_alpaca_token_set_precision": 0.36651890213393223,
      "eval_python_code_alpaca_token_set_recall": 0.40220446678496957,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 30625
    },
    {
      "epoch": 5.88,
      "eval_wikibio_accuracy": 0.29025,
      "eval_wikibio_bleu_score": 5.304119152730881,
      "eval_wikibio_bleu_score_sem": 0.1965021409657196,
      "eval_wikibio_emb_cos_sim": 0.687311053276062,
      "eval_wikibio_emb_cos_sim_sem": 0.010802350542997512,
      "eval_wikibio_emb_top1_equal": 0.1171875,
      "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.245105743408203,
      "eval_wikibio_n_ngrams_match_1": 9.61,
      "eval_wikibio_n_ngrams_match_2": 3.176,
      "eval_wikibio_n_ngrams_match_3": 1.132,
      "eval_wikibio_num_pred_words": 37.752,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 69.76313674572921,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3214556022208892,
      "eval_wikibio_runtime": 18.8061,
      "eval_wikibio_samples_per_second": 26.587,
      "eval_wikibio_steps_per_second": 0.053,
      "eval_wikibio_token_set_f1": 0.29773290452739015,
      "eval_wikibio_token_set_f1_sem": 0.005731029723955615,
      "eval_wikibio_token_set_precision": 0.3073059385941136,
      "eval_wikibio_token_set_recall": 0.29928461317683497,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 30625
    },
    {
      "epoch": 5.88,
      "eval_nq_accuracy": 0.48153125,
      "eval_nq_bleu_score": 9.108636317570715,
      "eval_nq_bleu_score_sem": 0.41082468716068044,
      "eval_nq_emb_cos_sim": 0.77602618932724,
      "eval_nq_emb_cos_sim_sem": 0.008655139832939538,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5364458560943604,
      "eval_nq_n_ngrams_match_1": 20.486,
      "eval_nq_n_ngrams_match_2": 6.876,
      "eval_nq_n_ngrams_match_3": 2.902,
      "eval_nq_num_pred_words": 48.736,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.634685585148935,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3876309017159371,
      "eval_nq_runtime": 20.303,
      "eval_nq_samples_per_second": 24.627,
      "eval_nq_steps_per_second": 0.049,
      "eval_nq_token_set_f1": 0.4124639328938928,
      "eval_nq_token_set_f1_sem": 0.005092074732061953,
      "eval_nq_token_set_precision": 0.36078636749553916,
      "eval_nq_token_set_recall": 0.4959744441039201,
      "eval_nq_true_num_tokens": 64.0,
      "step": 30625
    },
    {
      "epoch": 5.88,
      "learning_rate": 0.001,
      "loss": 2.9095,
      "step": 30636
    },
    {
      "epoch": 5.88,
      "learning_rate": 0.001,
      "loss": 2.9053,
      "step": 30648
    },
    {
      "epoch": 5.89,
      "learning_rate": 0.001,
      "loss": 2.916,
      "step": 30660
    },
    {
      "epoch": 5.89,
      "learning_rate": 0.001,
      "loss": 2.9189,
      "step": 30672
    },
    {
      "epoch": 5.89,
      "learning_rate": 0.001,
      "loss": 2.9144,
      "step": 30684
    },
    {
      "epoch": 5.89,
      "learning_rate": 0.001,
      "loss": 2.9069,
      "step": 30696
    },
    {
      "epoch": 5.9,
      "learning_rate": 0.001,
      "loss": 2.9071,
      "step": 30708
    },
    {
      "epoch": 5.9,
      "learning_rate": 0.001,
      "loss": 2.9133,
      "step": 30720
    },
    {
      "epoch": 5.9,
      "learning_rate": 0.001,
      "loss": 2.8953,
      "step": 30732
    },
    {
      "epoch": 5.9,
      "learning_rate": 0.001,
      "loss": 2.9122,
      "step": 30744
    },
    {
      "epoch": 5.91,
      "learning_rate": 0.001,
      "loss": 2.9116,
      "step": 30756
    },
    {
      "epoch": 5.91,
      "learning_rate": 0.001,
      "loss": 2.9072,
      "step": 30768
    },
    {
      "epoch": 5.91,
      "learning_rate": 0.001,
      "loss": 2.913,
      "step": 30780
    },
    {
      "epoch": 5.91,
      "learning_rate": 0.001,
      "loss": 2.8922,
      "step": 30792
    },
    {
      "epoch": 5.91,
      "learning_rate": 0.001,
      "loss": 2.914,
      "step": 30804
    },
    {
      "epoch": 5.92,
      "learning_rate": 0.001,
      "loss": 2.9051,
      "step": 30816
    },
    {
      "epoch": 5.92,
      "learning_rate": 0.001,
      "loss": 2.9131,
      "step": 30828
    },
    {
      "epoch": 5.92,
      "learning_rate": 0.001,
      "loss": 2.9065,
      "step": 30840
    },
    {
      "epoch": 5.92,
      "learning_rate": 0.001,
      "loss": 2.8955,
      "step": 30852
    },
    {
      "epoch": 5.93,
      "learning_rate": 0.001,
      "loss": 2.9023,
      "step": 30864
    },
    {
      "epoch": 5.93,
      "learning_rate": 0.001,
      "loss": 2.9079,
      "step": 30876
    },
    {
      "epoch": 5.93,
      "learning_rate": 0.001,
      "loss": 2.9058,
      "step": 30888
    },
    {
      "epoch": 5.93,
      "learning_rate": 0.001,
      "loss": 2.8973,
      "step": 30900
    },
    {
      "epoch": 5.94,
      "learning_rate": 0.001,
      "loss": 2.9036,
      "step": 30912
    },
    {
      "epoch": 5.94,
      "learning_rate": 0.001,
      "loss": 2.9067,
      "step": 30924
    },
    {
      "epoch": 5.94,
      "learning_rate": 0.001,
      "loss": 2.9141,
      "step": 30936
    },
    {
      "epoch": 5.94,
      "learning_rate": 0.001,
      "loss": 2.9116,
      "step": 30948
    },
    {
      "epoch": 5.94,
      "learning_rate": 0.001,
      "loss": 2.8932,
      "step": 30960
    },
    {
      "epoch": 5.95,
      "learning_rate": 0.001,
      "loss": 2.8937,
      "step": 30972
    },
    {
      "epoch": 5.95,
      "learning_rate": 0.001,
      "loss": 2.8956,
      "step": 30984
    },
    {
      "epoch": 5.95,
      "learning_rate": 0.001,
      "loss": 2.911,
      "step": 30996
    },
    {
      "epoch": 5.95,
      "learning_rate": 0.001,
      "loss": 2.9071,
      "step": 31008
    },
    {
      "epoch": 5.96,
      "learning_rate": 0.001,
      "loss": 2.9092,
      "step": 31020
    },
    {
      "epoch": 5.96,
      "learning_rate": 0.001,
      "loss": 2.8976,
      "step": 31032
    },
    {
      "epoch": 5.96,
      "learning_rate": 0.001,
      "loss": 2.9127,
      "step": 31044
    },
    {
      "epoch": 5.96,
      "learning_rate": 0.001,
      "loss": 2.9024,
      "step": 31056
    },
    {
      "epoch": 5.97,
      "learning_rate": 0.001,
      "loss": 2.9064,
      "step": 31068
    },
    {
      "epoch": 5.97,
      "learning_rate": 0.001,
      "loss": 2.9061,
      "step": 31080
    },
    {
      "epoch": 5.97,
      "learning_rate": 0.001,
      "loss": 2.895,
      "step": 31092
    },
    {
      "epoch": 5.97,
      "learning_rate": 0.001,
      "loss": 2.9064,
      "step": 31104
    },
    {
      "epoch": 5.97,
      "learning_rate": 0.001,
      "loss": 2.9021,
      "step": 31116
    },
    {
      "epoch": 5.98,
      "learning_rate": 0.001,
      "loss": 2.9177,
      "step": 31128
    },
    {
      "epoch": 5.98,
      "learning_rate": 0.001,
      "loss": 2.9069,
      "step": 31140
    },
    {
      "epoch": 5.98,
      "learning_rate": 0.001,
      "loss": 2.9006,
      "step": 31152
    },
    {
      "epoch": 5.98,
      "learning_rate": 0.001,
      "loss": 2.9117,
      "step": 31164
    },
    {
      "epoch": 5.99,
      "learning_rate": 0.001,
      "loss": 2.9087,
      "step": 31176
    },
    {
      "epoch": 5.99,
      "learning_rate": 0.001,
      "loss": 2.9145,
      "step": 31188
    },
    {
      "epoch": 5.99,
      "learning_rate": 0.001,
      "loss": 2.9111,
      "step": 31200
    },
    {
      "epoch": 5.99,
      "learning_rate": 0.001,
      "loss": 2.9092,
      "step": 31212
    },
    {
      "epoch": 6.0,
      "learning_rate": 0.001,
      "loss": 2.9017,
      "step": 31224
    },
    {
      "epoch": 6.0,
      "learning_rate": 0.001,
      "loss": 2.902,
      "step": 31236
    },
    {
      "epoch": 6.0,
      "learning_rate": 0.001,
      "loss": 2.8876,
      "step": 31248
    },
    {
      "epoch": 6.0,
      "eval_ag_news_accuracy": 0.287375,
      "eval_ag_news_bleu_score": 4.015214203939312,
      "eval_ag_news_bleu_score_sem": 0.1424946105711094,
      "eval_ag_news_emb_cos_sim": 0.7467055916786194,
      "eval_ag_news_emb_cos_sim_sem": 0.00897354346837115,
      "eval_ag_news_emb_top1_equal": 0.1796875,
      "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.961060047149658,
      "eval_ag_news_n_ngrams_match_1": 12.14,
      "eval_ag_news_n_ngrams_match_2": 2.428,
      "eval_ag_news_n_ngrams_match_3": 0.664,
      "eval_ag_news_num_pred_words": 46.146,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 52.51296267151468,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.291699403650167,
      "eval_ag_news_runtime": 26.2348,
      "eval_ag_news_samples_per_second": 19.059,
      "eval_ag_news_steps_per_second": 0.038,
      "eval_ag_news_token_set_f1": 0.308974160282909,
      "eval_ag_news_token_set_f1_sem": 0.004320376120830538,
      "eval_ag_news_token_set_precision": 0.2823875616511164,
      "eval_ag_news_token_set_recall": 0.35966361874241987,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 31250
    },
    {
      "epoch": 6.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.096,
      "eval_anthropic_toxic_prompts_bleu_score": 2.3739915493739954,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10047672394127616,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6032194495201111,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009325521678453247,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.6662943363189697,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.04,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.306,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.43,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.864,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 39.106720668232924,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.17207862529830276,
      "eval_anthropic_toxic_prompts_runtime": 15.1592,
      "eval_anthropic_toxic_prompts_samples_per_second": 32.983,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.066,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.31302152092834934,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006202876001572878,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3572100630860854,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3188142744818477,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 31250
    },
    {
      "epoch": 6.0,
      "eval_arxiv_accuracy": 0.31265625,
      "eval_arxiv_bleu_score": 3.3270061688436345,
      "eval_arxiv_bleu_score_sem": 0.09410719973132245,
      "eval_arxiv_emb_cos_sim": 0.6667373180389404,
      "eval_arxiv_emb_cos_sim_sem": 0.007759442910920113,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8215150833129883,
      "eval_arxiv_n_ngrams_match_1": 12.286,
      "eval_arxiv_n_ngrams_match_2": 2.14,
      "eval_arxiv_n_ngrams_match_3": 0.404,
      "eval_arxiv_num_pred_words": 39.652,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 45.673354864037535,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.28890175618743996,
      "eval_arxiv_runtime": 13.9317,
      "eval_arxiv_samples_per_second": 35.889,
      "eval_arxiv_steps_per_second": 0.072,
      "eval_arxiv_token_set_f1": 0.29112317015796885,
      "eval_arxiv_token_set_f1_sem": 0.00383495199730816,
      "eval_arxiv_token_set_precision": 0.23451290562972754,
      "eval_arxiv_token_set_recall": 0.410144685003836,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 31250
    },
    {
      "epoch": 6.0,
      "eval_python_code_alpaca_accuracy": 0.13575,
      "eval_python_code_alpaca_bleu_score": 3.12962272204674,
      "eval_python_code_alpaca_bleu_score_sem": 0.09627211251719704,
      "eval_python_code_alpaca_emb_cos_sim": 0.629444420337677,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009285553953719267,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.366333484649658,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.464,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.708,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.394,
      "eval_python_code_alpaca_num_pred_words": 40.52,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 28.972105409499672,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.25402462240221274,
      "eval_python_code_alpaca_runtime": 12.9798,
      "eval_python_code_alpaca_samples_per_second": 38.521,
      "eval_python_code_alpaca_steps_per_second": 0.077,
      "eval_python_code_alpaca_token_set_f1": 0.3870647442475046,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005686533325171371,
      "eval_python_code_alpaca_token_set_precision": 0.3932321531850331,
      "eval_python_code_alpaca_token_set_recall": 0.41167386461257605,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 31250
    },
    {
      "epoch": 6.0,
      "eval_wikibio_accuracy": 0.28753125,
      "eval_wikibio_bleu_score": 5.232131776967655,
      "eval_wikibio_bleu_score_sem": 0.1854721115056108,
      "eval_wikibio_emb_cos_sim": 0.7211657166481018,
      "eval_wikibio_emb_cos_sim_sem": 0.009413412345577635,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.320174694061279,
      "eval_wikibio_n_ngrams_match_1": 10.026,
      "eval_wikibio_n_ngrams_match_2": 3.172,
      "eval_wikibio_n_ngrams_match_3": 1.074,
      "eval_wikibio_num_pred_words": 39.128,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 75.20176444623209,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33436423551524785,
      "eval_wikibio_runtime": 17.6294,
      "eval_wikibio_samples_per_second": 28.362,
      "eval_wikibio_steps_per_second": 0.057,
      "eval_wikibio_token_set_f1": 0.3076759294899552,
      "eval_wikibio_token_set_f1_sem": 0.004869346963903943,
      "eval_wikibio_token_set_precision": 0.321417208075223,
      "eval_wikibio_token_set_recall": 0.30604146929154535,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 31250
    },
    {
      "epoch": 6.0,
      "eval_nq_accuracy": 0.480625,
      "eval_nq_bleu_score": 9.321623981279211,
      "eval_nq_bleu_score_sem": 0.4011978620674311,
      "eval_nq_emb_cos_sim": 0.7748517990112305,
      "eval_nq_emb_cos_sim_sem": 0.009179478897095951,
      "eval_nq_emb_top1_equal": 0.1796875,
      "eval_nq_emb_top1_equal_sem": 0.034068008879424266,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5365450382232666,
      "eval_nq_n_ngrams_match_1": 20.56,
      "eval_nq_n_ngrams_match_2": 6.96,
      "eval_nq_n_ngrams_match_3": 2.94,
      "eval_nq_num_pred_words": 49.014,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.635938782309685,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3921534682185354,
      "eval_nq_runtime": 25.3882,
      "eval_nq_samples_per_second": 19.694,
      "eval_nq_steps_per_second": 0.039,
      "eval_nq_token_set_f1": 0.4133887882062554,
      "eval_nq_token_set_f1_sem": 0.004906133716717383,
      "eval_nq_token_set_precision": 0.3629658953597597,
      "eval_nq_token_set_recall": 0.49369864023304144,
      "eval_nq_true_num_tokens": 64.0,
      "step": 31250
    },
    {
      "epoch": 6.0,
      "learning_rate": 0.001,
      "loss": 2.8757,
      "step": 31260
    },
    {
      "epoch": 6.0,
      "learning_rate": 0.001,
      "loss": 2.8896,
      "step": 31272
    },
    {
      "epoch": 6.01,
      "learning_rate": 0.001,
      "loss": 2.8965,
      "step": 31284
    },
    {
      "epoch": 6.01,
      "learning_rate": 0.001,
      "loss": 2.8779,
      "step": 31296
    },
    {
      "epoch": 6.01,
      "learning_rate": 0.001,
      "loss": 2.8833,
      "step": 31308
    },
    {
      "epoch": 6.01,
      "learning_rate": 0.001,
      "loss": 2.8868,
      "step": 31320
    },
    {
      "epoch": 6.02,
      "learning_rate": 0.001,
      "loss": 2.8871,
      "step": 31332
    },
    {
      "epoch": 6.02,
      "learning_rate": 0.001,
      "loss": 2.8783,
      "step": 31344
    },
    {
      "epoch": 6.02,
      "learning_rate": 0.001,
      "loss": 2.8899,
      "step": 31356
    },
    {
      "epoch": 6.02,
      "learning_rate": 0.001,
      "loss": 2.886,
      "step": 31368
    },
    {
      "epoch": 6.03,
      "learning_rate": 0.001,
      "loss": 2.8927,
      "step": 31380
    },
    {
      "epoch": 6.03,
      "learning_rate": 0.001,
      "loss": 2.8869,
      "step": 31392
    },
    {
      "epoch": 6.03,
      "learning_rate": 0.001,
      "loss": 2.8895,
      "step": 31404
    },
    {
      "epoch": 6.03,
      "learning_rate": 0.001,
      "loss": 2.8845,
      "step": 31416
    },
    {
      "epoch": 6.03,
      "learning_rate": 0.001,
      "loss": 2.8887,
      "step": 31428
    },
    {
      "epoch": 6.04,
      "learning_rate": 0.001,
      "loss": 2.8739,
      "step": 31440
    },
    {
      "epoch": 6.04,
      "learning_rate": 0.001,
      "loss": 2.8859,
      "step": 31452
    },
    {
      "epoch": 6.04,
      "learning_rate": 0.001,
      "loss": 2.8845,
      "step": 31464
    },
    {
      "epoch": 6.04,
      "learning_rate": 0.001,
      "loss": 2.8819,
      "step": 31476
    },
    {
      "epoch": 6.05,
      "learning_rate": 0.001,
      "loss": 2.8898,
      "step": 31488
    },
    {
      "epoch": 6.05,
      "learning_rate": 0.001,
      "loss": 2.8784,
      "step": 31500
    },
    {
      "epoch": 6.05,
      "learning_rate": 0.001,
      "loss": 2.8811,
      "step": 31512
    },
    {
      "epoch": 6.05,
      "learning_rate": 0.001,
      "loss": 2.8754,
      "step": 31524
    },
    {
      "epoch": 6.06,
      "learning_rate": 0.001,
      "loss": 2.8832,
      "step": 31536
    },
    {
      "epoch": 6.06,
      "learning_rate": 0.001,
      "loss": 2.8815,
      "step": 31548
    },
    {
      "epoch": 6.06,
      "learning_rate": 0.001,
      "loss": 2.8804,
      "step": 31560
    },
    {
      "epoch": 6.06,
      "learning_rate": 0.001,
      "loss": 2.8885,
      "step": 31572
    },
    {
      "epoch": 6.06,
      "learning_rate": 0.001,
      "loss": 2.8886,
      "step": 31584
    },
    {
      "epoch": 6.07,
      "learning_rate": 0.001,
      "loss": 2.8744,
      "step": 31596
    },
    {
      "epoch": 6.07,
      "learning_rate": 0.001,
      "loss": 2.8763,
      "step": 31608
    },
    {
      "epoch": 6.07,
      "learning_rate": 0.001,
      "loss": 2.8784,
      "step": 31620
    },
    {
      "epoch": 6.07,
      "learning_rate": 0.001,
      "loss": 2.8879,
      "step": 31632
    },
    {
      "epoch": 6.08,
      "learning_rate": 0.001,
      "loss": 2.8826,
      "step": 31644
    },
    {
      "epoch": 6.08,
      "learning_rate": 0.001,
      "loss": 2.8887,
      "step": 31656
    },
    {
      "epoch": 6.08,
      "learning_rate": 0.001,
      "loss": 2.8835,
      "step": 31668
    },
    {
      "epoch": 6.08,
      "learning_rate": 0.001,
      "loss": 2.8862,
      "step": 31680
    },
    {
      "epoch": 6.09,
      "learning_rate": 0.001,
      "loss": 2.8757,
      "step": 31692
    },
    {
      "epoch": 6.09,
      "learning_rate": 0.001,
      "loss": 2.8834,
      "step": 31704
    },
    {
      "epoch": 6.09,
      "learning_rate": 0.001,
      "loss": 2.8781,
      "step": 31716
    },
    {
      "epoch": 6.09,
      "learning_rate": 0.001,
      "loss": 2.8809,
      "step": 31728
    },
    {
      "epoch": 6.09,
      "learning_rate": 0.001,
      "loss": 2.8739,
      "step": 31740
    },
    {
      "epoch": 6.1,
      "learning_rate": 0.001,
      "loss": 2.8733,
      "step": 31752
    },
    {
      "epoch": 6.1,
      "learning_rate": 0.001,
      "loss": 2.8898,
      "step": 31764
    },
    {
      "epoch": 6.1,
      "learning_rate": 0.001,
      "loss": 2.8733,
      "step": 31776
    },
    {
      "epoch": 6.1,
      "learning_rate": 0.001,
      "loss": 2.8903,
      "step": 31788
    },
    {
      "epoch": 6.11,
      "learning_rate": 0.001,
      "loss": 2.8926,
      "step": 31800
    },
    {
      "epoch": 6.11,
      "learning_rate": 0.001,
      "loss": 2.8678,
      "step": 31812
    },
    {
      "epoch": 6.11,
      "learning_rate": 0.001,
      "loss": 2.8809,
      "step": 31824
    },
    {
      "epoch": 6.11,
      "learning_rate": 0.001,
      "loss": 2.8914,
      "step": 31836
    },
    {
      "epoch": 6.12,
      "learning_rate": 0.001,
      "loss": 2.8796,
      "step": 31848
    },
    {
      "epoch": 6.12,
      "learning_rate": 0.001,
      "loss": 2.88,
      "step": 31860
    },
    {
      "epoch": 6.12,
      "learning_rate": 0.001,
      "loss": 2.8807,
      "step": 31872
    },
    {
      "epoch": 6.12,
      "eval_ag_news_accuracy": 0.28703125,
      "eval_ag_news_bleu_score": 3.9932676838373107,
      "eval_ag_news_bleu_score_sem": 0.13873922496043958,
      "eval_ag_news_emb_cos_sim": 0.7425938844680786,
      "eval_ag_news_emb_cos_sim_sem": 0.009404906584712531,
      "eval_ag_news_emb_top1_equal": 0.171875,
      "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.9517323970794678,
      "eval_ag_news_n_ngrams_match_1": 12.048,
      "eval_ag_news_n_ngrams_match_2": 2.434,
      "eval_ag_news_n_ngrams_match_3": 0.642,
      "eval_ag_news_num_pred_words": 45.244,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 52.02541749188316,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.29268099682497567,
      "eval_ag_news_runtime": 15.554,
      "eval_ag_news_samples_per_second": 32.146,
      "eval_ag_news_steps_per_second": 0.064,
      "eval_ag_news_token_set_f1": 0.3065164175911375,
      "eval_ag_news_token_set_f1_sem": 0.004548383426737349,
      "eval_ag_news_token_set_precision": 0.28022749987454626,
      "eval_ag_news_token_set_recall": 0.35378235643225114,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 31875
    },
    {
      "epoch": 6.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.09775,
      "eval_anthropic_toxic_prompts_bleu_score": 2.3144127335439033,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08886186451074533,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5984156131744385,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010762831945617978,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.5901401042938232,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.006,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.284,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.398,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.342,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 36.23915283173629,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1761338177152958,
      "eval_anthropic_toxic_prompts_runtime": 9.8608,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.706,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3076996036298104,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006039570351417131,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.35492655821637065,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3071782174691777,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 31875
    },
    {
      "epoch": 6.12,
      "eval_arxiv_accuracy": 0.31171875,
      "eval_arxiv_bleu_score": 3.6209685211712053,
      "eval_arxiv_bleu_score_sem": 0.10203255348629121,
      "eval_arxiv_emb_cos_sim": 0.6666562557220459,
      "eval_arxiv_emb_cos_sim_sem": 0.007851389553669168,
      "eval_arxiv_emb_top1_equal": 0.15625,
      "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8036117553710938,
      "eval_arxiv_n_ngrams_match_1": 12.728,
      "eval_arxiv_n_ngrams_match_2": 2.322,
      "eval_arxiv_n_ngrams_match_3": 0.48,
      "eval_arxiv_num_pred_words": 39.948,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 44.86292614630713,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.29874082080978565,
      "eval_arxiv_runtime": 11.4394,
      "eval_arxiv_samples_per_second": 43.709,
      "eval_arxiv_steps_per_second": 0.087,
      "eval_arxiv_token_set_f1": 0.3005493610398855,
      "eval_arxiv_token_set_f1_sem": 0.004011125521915269,
      "eval_arxiv_token_set_precision": 0.23983913848504776,
      "eval_arxiv_token_set_recall": 0.42529561998233933,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 31875
    },
    {
      "epoch": 6.12,
      "eval_python_code_alpaca_accuracy": 0.13559375,
      "eval_python_code_alpaca_bleu_score": 3.2798934394768655,
      "eval_python_code_alpaca_bleu_score_sem": 0.11962363733603859,
      "eval_python_code_alpaca_emb_cos_sim": 0.6181508302688599,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.012074139599347836,
      "eval_python_code_alpaca_emb_top1_equal": 0.046875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.3517544269561768,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.464,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.746,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.496,
      "eval_python_code_alpaca_num_pred_words": 40.594,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 28.552783499566868,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.25366845011818695,
      "eval_python_code_alpaca_runtime": 27.361,
      "eval_python_code_alpaca_samples_per_second": 18.274,
      "eval_python_code_alpaca_steps_per_second": 0.037,
      "eval_python_code_alpaca_token_set_f1": 0.382675939671801,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00593786920103333,
      "eval_python_code_alpaca_token_set_precision": 0.38861998928986086,
      "eval_python_code_alpaca_token_set_recall": 0.4088522752550497,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 31875
    },
    {
      "epoch": 6.12,
      "eval_wikibio_accuracy": 0.287,
      "eval_wikibio_bleu_score": 5.426647334274031,
      "eval_wikibio_bleu_score_sem": 0.20950147700261437,
      "eval_wikibio_emb_cos_sim": 0.6828176975250244,
      "eval_wikibio_emb_cos_sim_sem": 0.01142746733175503,
      "eval_wikibio_emb_top1_equal": 0.109375,
      "eval_wikibio_emb_top1_equal_sem": 0.027695207821224692,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.277820110321045,
      "eval_wikibio_n_ngrams_match_1": 9.624,
      "eval_wikibio_n_ngrams_match_2": 3.098,
      "eval_wikibio_n_ngrams_match_3": 1.1,
      "eval_wikibio_num_pred_words": 37.146,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 72.08313533345931,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.323363437138928,
      "eval_wikibio_runtime": 18.8164,
      "eval_wikibio_samples_per_second": 26.573,
      "eval_wikibio_steps_per_second": 0.053,
      "eval_wikibio_token_set_f1": 0.301290889736359,
      "eval_wikibio_token_set_f1_sem": 0.005463492256677466,
      "eval_wikibio_token_set_precision": 0.30933469186489587,
      "eval_wikibio_token_set_recall": 0.31016110244566325,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 31875
    },
    {
      "epoch": 6.12,
      "eval_nq_accuracy": 0.484375,
      "eval_nq_bleu_score": 9.371793599953639,
      "eval_nq_bleu_score_sem": 0.41320039465612174,
      "eval_nq_emb_cos_sim": 0.7752724885940552,
      "eval_nq_emb_cos_sim_sem": 0.008654330481739227,
      "eval_nq_emb_top1_equal": 0.203125,
      "eval_nq_emb_top1_equal_sem": 0.03570055125142555,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5239036083221436,
      "eval_nq_n_ngrams_match_1": 20.84,
      "eval_nq_n_ngrams_match_2": 6.944,
      "eval_nq_n_ngrams_match_3": 2.992,
      "eval_nq_num_pred_words": 48.744,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.477207854742506,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3976274855456138,
      "eval_nq_runtime": 16.8623,
      "eval_nq_samples_per_second": 29.652,
      "eval_nq_steps_per_second": 0.059,
      "eval_nq_token_set_f1": 0.4178742295278702,
      "eval_nq_token_set_f1_sem": 0.005004144786754618,
      "eval_nq_token_set_precision": 0.3687919901897754,
      "eval_nq_token_set_recall": 0.49331111295269586,
      "eval_nq_true_num_tokens": 64.0,
      "step": 31875
    },
    {
      "epoch": 6.12,
      "learning_rate": 0.001,
      "loss": 2.8861,
      "step": 31884
    },
    {
      "epoch": 6.12,
      "learning_rate": 0.001,
      "loss": 2.8842,
      "step": 31896
    },
    {
      "epoch": 6.13,
      "learning_rate": 0.001,
      "loss": 2.8994,
      "step": 31908
    },
    {
      "epoch": 6.13,
      "learning_rate": 0.001,
      "loss": 2.8784,
      "step": 31920
    },
    {
      "epoch": 6.13,
      "learning_rate": 0.001,
      "loss": 2.8736,
      "step": 31932
    },
    {
      "epoch": 6.13,
      "learning_rate": 0.001,
      "loss": 2.8889,
      "step": 31944
    },
    {
      "epoch": 6.14,
      "learning_rate": 0.001,
      "loss": 2.8885,
      "step": 31956
    },
    {
      "epoch": 6.14,
      "learning_rate": 0.001,
      "loss": 2.8855,
      "step": 31968
    },
    {
      "epoch": 6.14,
      "learning_rate": 0.001,
      "loss": 2.8923,
      "step": 31980
    },
    {
      "epoch": 6.14,
      "learning_rate": 0.001,
      "loss": 2.885,
      "step": 31992
    },
    {
      "epoch": 6.15,
      "learning_rate": 0.001,
      "loss": 2.8806,
      "step": 32004
    },
    {
      "epoch": 6.15,
      "learning_rate": 0.001,
      "loss": 2.8674,
      "step": 32016
    },
    {
      "epoch": 6.15,
      "learning_rate": 0.001,
      "loss": 2.8798,
      "step": 32028
    },
    {
      "epoch": 6.15,
      "learning_rate": 0.001,
      "loss": 2.8716,
      "step": 32040
    },
    {
      "epoch": 6.15,
      "learning_rate": 0.001,
      "loss": 2.894,
      "step": 32052
    },
    {
      "epoch": 6.16,
      "learning_rate": 0.001,
      "loss": 2.8951,
      "step": 32064
    },
    {
      "epoch": 6.16,
      "learning_rate": 0.001,
      "loss": 2.8926,
      "step": 32076
    },
    {
      "epoch": 6.16,
      "learning_rate": 0.001,
      "loss": 2.8789,
      "step": 32088
    },
    {
      "epoch": 6.16,
      "learning_rate": 0.001,
      "loss": 2.8742,
      "step": 32100
    },
    {
      "epoch": 6.17,
      "learning_rate": 0.001,
      "loss": 2.8793,
      "step": 32112
    },
    {
      "epoch": 6.17,
      "learning_rate": 0.001,
      "loss": 2.8785,
      "step": 32124
    },
    {
      "epoch": 6.17,
      "learning_rate": 0.001,
      "loss": 2.8787,
      "step": 32136
    },
    {
      "epoch": 6.17,
      "learning_rate": 0.001,
      "loss": 2.8778,
      "step": 32148
    },
    {
      "epoch": 6.18,
      "learning_rate": 0.001,
      "loss": 2.8796,
      "step": 32160
    },
    {
      "epoch": 6.18,
      "learning_rate": 0.001,
      "loss": 2.8815,
      "step": 32172
    },
    {
      "epoch": 6.18,
      "learning_rate": 0.001,
      "loss": 2.8788,
      "step": 32184
    },
    {
      "epoch": 6.18,
      "learning_rate": 0.001,
      "loss": 2.8923,
      "step": 32196
    },
    {
      "epoch": 6.18,
      "learning_rate": 0.001,
      "loss": 2.8701,
      "step": 32208
    },
    {
      "epoch": 6.19,
      "learning_rate": 0.001,
      "loss": 2.8825,
      "step": 32220
    },
    {
      "epoch": 6.19,
      "learning_rate": 0.001,
      "loss": 2.8752,
      "step": 32232
    },
    {
      "epoch": 6.19,
      "learning_rate": 0.001,
      "loss": 2.8884,
      "step": 32244
    },
    {
      "epoch": 6.19,
      "learning_rate": 0.001,
      "loss": 2.8802,
      "step": 32256
    },
    {
      "epoch": 6.2,
      "learning_rate": 0.001,
      "loss": 2.8843,
      "step": 32268
    },
    {
      "epoch": 6.2,
      "learning_rate": 0.001,
      "loss": 2.8851,
      "step": 32280
    },
    {
      "epoch": 6.2,
      "learning_rate": 0.001,
      "loss": 2.8824,
      "step": 32292
    },
    {
      "epoch": 6.2,
      "learning_rate": 0.001,
      "loss": 2.8841,
      "step": 32304
    },
    {
      "epoch": 6.21,
      "learning_rate": 0.001,
      "loss": 2.8817,
      "step": 32316
    },
    {
      "epoch": 6.21,
      "learning_rate": 0.001,
      "loss": 2.8835,
      "step": 32328
    },
    {
      "epoch": 6.21,
      "learning_rate": 0.001,
      "loss": 2.8795,
      "step": 32340
    },
    {
      "epoch": 6.21,
      "learning_rate": 0.001,
      "loss": 2.8823,
      "step": 32352
    },
    {
      "epoch": 6.21,
      "learning_rate": 0.001,
      "loss": 2.8797,
      "step": 32364
    },
    {
      "epoch": 6.22,
      "learning_rate": 0.001,
      "loss": 2.8828,
      "step": 32376
    },
    {
      "epoch": 6.22,
      "learning_rate": 0.001,
      "loss": 2.865,
      "step": 32388
    },
    {
      "epoch": 6.22,
      "learning_rate": 0.001,
      "loss": 2.8777,
      "step": 32400
    },
    {
      "epoch": 6.22,
      "learning_rate": 0.001,
      "loss": 2.8828,
      "step": 32412
    },
    {
      "epoch": 6.23,
      "learning_rate": 0.001,
      "loss": 2.8739,
      "step": 32424
    },
    {
      "epoch": 6.23,
      "learning_rate": 0.001,
      "loss": 2.8788,
      "step": 32436
    },
    {
      "epoch": 6.23,
      "learning_rate": 0.001,
      "loss": 2.8715,
      "step": 32448
    },
    {
      "epoch": 6.23,
      "learning_rate": 0.001,
      "loss": 2.8719,
      "step": 32460
    },
    {
      "epoch": 6.24,
      "learning_rate": 0.001,
      "loss": 2.8832,
      "step": 32472
    },
    {
      "epoch": 6.24,
      "learning_rate": 0.001,
      "loss": 2.8856,
      "step": 32484
    },
    {
      "epoch": 6.24,
      "learning_rate": 0.001,
      "loss": 2.8796,
      "step": 32496
    },
    {
      "epoch": 6.24,
      "eval_ag_news_accuracy": 0.28634375,
      "eval_ag_news_bleu_score": 3.959102074807527,
      "eval_ag_news_bleu_score_sem": 0.14156623777629107,
      "eval_ag_news_emb_cos_sim": 0.7416963577270508,
      "eval_ag_news_emb_cos_sim_sem": 0.00971437969807139,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.943134069442749,
      "eval_ag_news_n_ngrams_match_1": 12.116,
      "eval_ag_news_n_ngrams_match_2": 2.384,
      "eval_ag_news_n_ngrams_match_3": 0.62,
      "eval_ag_news_num_pred_words": 45.854,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 51.580003558481124,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2956122570199592,
      "eval_ag_news_runtime": 13.9485,
      "eval_ag_news_samples_per_second": 35.846,
      "eval_ag_news_steps_per_second": 0.072,
      "eval_ag_news_token_set_f1": 0.3102278457633787,
      "eval_ag_news_token_set_f1_sem": 0.0042975261292153245,
      "eval_ag_news_token_set_precision": 0.28516797454872583,
      "eval_ag_news_token_set_recall": 0.3568302725876157,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 32500
    },
    {
      "epoch": 6.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.097375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.346702232100953,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10244771733148358,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5916671752929688,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010358631156052853,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.61450457572937,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.842,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.244,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.396,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.092,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 37.13294480574547,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.17110643622776062,
      "eval_anthropic_toxic_prompts_runtime": 14.0638,
      "eval_anthropic_toxic_prompts_samples_per_second": 35.552,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.071,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.2987623579083474,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006121050598138175,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.33639435889346403,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.304703249539111,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 32500
    },
    {
      "epoch": 6.24,
      "eval_arxiv_accuracy": 0.31234375,
      "eval_arxiv_bleu_score": 3.6087078264334114,
      "eval_arxiv_bleu_score_sem": 0.10859388044335394,
      "eval_arxiv_emb_cos_sim": 0.6700974106788635,
      "eval_arxiv_emb_cos_sim_sem": 0.008436426006581841,
      "eval_arxiv_emb_top1_equal": 0.1640625,
      "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.803508758544922,
      "eval_arxiv_n_ngrams_match_1": 12.746,
      "eval_arxiv_n_ngrams_match_2": 2.37,
      "eval_arxiv_n_ngrams_match_3": 0.5,
      "eval_arxiv_num_pred_words": 39.796,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 44.858305645253836,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.29815410898570505,
      "eval_arxiv_runtime": 16.1807,
      "eval_arxiv_samples_per_second": 30.901,
      "eval_arxiv_steps_per_second": 0.062,
      "eval_arxiv_token_set_f1": 0.2996501984122197,
      "eval_arxiv_token_set_f1_sem": 0.004016272096315681,
      "eval_arxiv_token_set_precision": 0.24099347320557093,
      "eval_arxiv_token_set_recall": 0.4225381584185373,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 32500
    },
    {
      "epoch": 6.24,
      "eval_python_code_alpaca_accuracy": 0.13284375,
      "eval_python_code_alpaca_bleu_score": 3.3482002984629546,
      "eval_python_code_alpaca_bleu_score_sem": 0.10628220002759486,
      "eval_python_code_alpaca_emb_cos_sim": 0.6326918601989746,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010186780015427714,
      "eval_python_code_alpaca_emb_top1_equal": 0.0546875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.020175758285348722,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.338104248046875,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.432,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.736,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.478,
      "eval_python_code_alpaca_num_pred_words": 40.242,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 28.165680916826854,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2606050391084044,
      "eval_python_code_alpaca_runtime": 13.5828,
      "eval_python_code_alpaca_samples_per_second": 36.811,
      "eval_python_code_alpaca_steps_per_second": 0.074,
      "eval_python_code_alpaca_token_set_f1": 0.385296779102028,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005639627371203578,
      "eval_python_code_alpaca_token_set_precision": 0.3927205218850797,
      "eval_python_code_alpaca_token_set_recall": 0.41302414576091084,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 32500
    },
    {
      "epoch": 6.24,
      "eval_wikibio_accuracy": 0.2883125,
      "eval_wikibio_bleu_score": 5.321655166915569,
      "eval_wikibio_bleu_score_sem": 0.1951197750528287,
      "eval_wikibio_emb_cos_sim": 0.6970815062522888,
      "eval_wikibio_emb_cos_sim_sem": 0.010530089145606726,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.244655132293701,
      "eval_wikibio_n_ngrams_match_1": 9.666,
      "eval_wikibio_n_ngrams_match_2": 3.104,
      "eval_wikibio_n_ngrams_match_3": 1.1,
      "eval_wikibio_num_pred_words": 36.992,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 69.73170778258094,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32495916898779176,
      "eval_wikibio_runtime": 20.358,
      "eval_wikibio_samples_per_second": 24.56,
      "eval_wikibio_steps_per_second": 0.049,
      "eval_wikibio_token_set_f1": 0.30214246471603373,
      "eval_wikibio_token_set_f1_sem": 0.0056686680788928245,
      "eval_wikibio_token_set_precision": 0.3095661244852683,
      "eval_wikibio_token_set_recall": 0.309299235578218,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 32500
    },
    {
      "epoch": 6.24,
      "eval_nq_accuracy": 0.483125,
      "eval_nq_bleu_score": 9.238648708247194,
      "eval_nq_bleu_score_sem": 0.3943277651760115,
      "eval_nq_emb_cos_sim": 0.7835253477096558,
      "eval_nq_emb_cos_sim_sem": 0.008719733171005023,
      "eval_nq_emb_top1_equal": 0.2109375,
      "eval_nq_emb_top1_equal_sem": 0.03620184850179216,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.520721197128296,
      "eval_nq_n_ngrams_match_1": 20.984,
      "eval_nq_n_ngrams_match_2": 6.988,
      "eval_nq_n_ngrams_match_3": 2.914,
      "eval_nq_num_pred_words": 49.11,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.437563364790996,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3980849746181533,
      "eval_nq_runtime": 15.8495,
      "eval_nq_samples_per_second": 31.547,
      "eval_nq_steps_per_second": 0.063,
      "eval_nq_token_set_f1": 0.42203291575286733,
      "eval_nq_token_set_f1_sem": 0.004843009113480223,
      "eval_nq_token_set_precision": 0.3713153285202159,
      "eval_nq_token_set_recall": 0.5011721417165745,
      "eval_nq_true_num_tokens": 64.0,
      "step": 32500
    },
    {
      "epoch": 6.24,
      "learning_rate": 0.001,
      "loss": 2.8826,
      "step": 32508
    },
    {
      "epoch": 6.24,
      "learning_rate": 0.001,
      "loss": 2.874,
      "step": 32520
    },
    {
      "epoch": 6.25,
      "learning_rate": 0.001,
      "loss": 2.874,
      "step": 32532
    },
    {
      "epoch": 6.25,
      "learning_rate": 0.001,
      "loss": 2.8716,
      "step": 32544
    },
    {
      "epoch": 6.25,
      "learning_rate": 0.001,
      "loss": 2.8831,
      "step": 32556
    },
    {
      "epoch": 6.25,
      "learning_rate": 0.001,
      "loss": 2.8823,
      "step": 32568
    },
    {
      "epoch": 6.26,
      "learning_rate": 0.001,
      "loss": 2.8761,
      "step": 32580
    },
    {
      "epoch": 6.26,
      "learning_rate": 0.001,
      "loss": 2.8757,
      "step": 32592
    },
    {
      "epoch": 6.26,
      "learning_rate": 0.001,
      "loss": 2.8733,
      "step": 32604
    },
    {
      "epoch": 6.26,
      "learning_rate": 0.001,
      "loss": 2.8778,
      "step": 32616
    },
    {
      "epoch": 6.26,
      "learning_rate": 0.001,
      "loss": 2.8672,
      "step": 32628
    },
    {
      "epoch": 6.27,
      "learning_rate": 0.001,
      "loss": 2.8822,
      "step": 32640
    },
    {
      "epoch": 6.27,
      "learning_rate": 0.001,
      "loss": 2.8829,
      "step": 32652
    },
    {
      "epoch": 6.27,
      "learning_rate": 0.001,
      "loss": 2.8925,
      "step": 32664
    },
    {
      "epoch": 6.27,
      "learning_rate": 0.001,
      "loss": 2.8867,
      "step": 32676
    },
    {
      "epoch": 6.28,
      "learning_rate": 0.001,
      "loss": 2.8827,
      "step": 32688
    },
    {
      "epoch": 6.28,
      "learning_rate": 0.001,
      "loss": 2.8769,
      "step": 32700
    },
    {
      "epoch": 6.28,
      "learning_rate": 0.001,
      "loss": 2.8743,
      "step": 32712
    },
    {
      "epoch": 6.28,
      "learning_rate": 0.001,
      "loss": 2.8834,
      "step": 32724
    },
    {
      "epoch": 6.29,
      "learning_rate": 0.001,
      "loss": 2.8744,
      "step": 32736
    },
    {
      "epoch": 6.29,
      "learning_rate": 0.001,
      "loss": 2.8808,
      "step": 32748
    },
    {
      "epoch": 6.29,
      "learning_rate": 0.001,
      "loss": 2.8799,
      "step": 32760
    },
    {
      "epoch": 6.29,
      "learning_rate": 0.001,
      "loss": 2.8685,
      "step": 32772
    },
    {
      "epoch": 6.29,
      "learning_rate": 0.001,
      "loss": 2.8736,
      "step": 32784
    },
    {
      "epoch": 6.3,
      "learning_rate": 0.001,
      "loss": 2.8736,
      "step": 32796
    },
    {
      "epoch": 6.3,
      "learning_rate": 0.001,
      "loss": 2.8609,
      "step": 32808
    },
    {
      "epoch": 6.3,
      "learning_rate": 0.001,
      "loss": 2.8627,
      "step": 32820
    },
    {
      "epoch": 6.3,
      "learning_rate": 0.001,
      "loss": 2.8806,
      "step": 32832
    },
    {
      "epoch": 6.31,
      "learning_rate": 0.001,
      "loss": 2.8693,
      "step": 32844
    },
    {
      "epoch": 6.31,
      "learning_rate": 0.001,
      "loss": 2.8781,
      "step": 32856
    },
    {
      "epoch": 6.31,
      "learning_rate": 0.001,
      "loss": 2.8852,
      "step": 32868
    },
    {
      "epoch": 6.31,
      "learning_rate": 0.001,
      "loss": 2.8699,
      "step": 32880
    },
    {
      "epoch": 6.32,
      "learning_rate": 0.001,
      "loss": 2.8728,
      "step": 32892
    },
    {
      "epoch": 6.32,
      "learning_rate": 0.001,
      "loss": 2.8764,
      "step": 32904
    },
    {
      "epoch": 6.32,
      "learning_rate": 0.001,
      "loss": 2.8722,
      "step": 32916
    },
    {
      "epoch": 6.32,
      "learning_rate": 0.001,
      "loss": 2.8667,
      "step": 32928
    },
    {
      "epoch": 6.32,
      "learning_rate": 0.001,
      "loss": 2.8789,
      "step": 32940
    },
    {
      "epoch": 6.33,
      "learning_rate": 0.001,
      "loss": 2.8793,
      "step": 32952
    },
    {
      "epoch": 6.33,
      "learning_rate": 0.001,
      "loss": 2.8865,
      "step": 32964
    },
    {
      "epoch": 6.33,
      "learning_rate": 0.001,
      "loss": 2.8736,
      "step": 32976
    },
    {
      "epoch": 6.33,
      "learning_rate": 0.001,
      "loss": 2.8707,
      "step": 32988
    },
    {
      "epoch": 6.34,
      "learning_rate": 0.001,
      "loss": 2.8649,
      "step": 33000
    },
    {
      "epoch": 6.34,
      "learning_rate": 0.001,
      "loss": 2.8785,
      "step": 33012
    },
    {
      "epoch": 6.34,
      "learning_rate": 0.001,
      "loss": 2.8749,
      "step": 33024
    },
    {
      "epoch": 6.34,
      "learning_rate": 0.001,
      "loss": 2.8682,
      "step": 33036
    },
    {
      "epoch": 6.35,
      "learning_rate": 0.001,
      "loss": 2.8829,
      "step": 33048
    },
    {
      "epoch": 6.35,
      "learning_rate": 0.001,
      "loss": 2.8848,
      "step": 33060
    },
    {
      "epoch": 6.35,
      "learning_rate": 0.001,
      "loss": 2.8784,
      "step": 33072
    },
    {
      "epoch": 6.35,
      "learning_rate": 0.001,
      "loss": 2.8633,
      "step": 33084
    },
    {
      "epoch": 6.35,
      "learning_rate": 0.001,
      "loss": 2.8725,
      "step": 33096
    },
    {
      "epoch": 6.36,
      "learning_rate": 0.001,
      "loss": 2.89,
      "step": 33108
    },
    {
      "epoch": 6.36,
      "learning_rate": 0.001,
      "loss": 2.8774,
      "step": 33120
    },
    {
      "epoch": 6.36,
      "eval_ag_news_accuracy": 0.287,
      "eval_ag_news_bleu_score": 3.950032735542012,
      "eval_ag_news_bleu_score_sem": 0.14019400129715076,
      "eval_ag_news_emb_cos_sim": 0.7462149858474731,
      "eval_ag_news_emb_cos_sim_sem": 0.008832162523585865,
      "eval_ag_news_emb_top1_equal": 0.1640625,
      "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.9389305114746094,
      "eval_ag_news_n_ngrams_match_1": 12.298,
      "eval_ag_news_n_ngrams_match_2": 2.374,
      "eval_ag_news_n_ngrams_match_3": 0.602,
      "eval_ag_news_num_pred_words": 46.454,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 51.36363909240867,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2955832942289821,
      "eval_ag_news_runtime": 17.5499,
      "eval_ag_news_samples_per_second": 28.49,
      "eval_ag_news_steps_per_second": 0.057,
      "eval_ag_news_token_set_f1": 0.3136387395423549,
      "eval_ag_news_token_set_f1_sem": 0.00412849699900138,
      "eval_ag_news_token_set_precision": 0.2888232867182405,
      "eval_ag_news_token_set_recall": 0.36097510299699265,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 33125
    },
    {
      "epoch": 6.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.09821875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.371140635721898,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09505355554870674,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6085380911827087,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009689355665312555,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.61575984954834,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.106,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.328,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.404,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.336,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 37.179586086846456,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1765119750915582,
      "eval_anthropic_toxic_prompts_runtime": 10.3987,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.083,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.31001100730730663,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006122938187370318,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.35626542884031015,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3126130971859074,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 33125
    },
    {
      "epoch": 6.36,
      "eval_arxiv_accuracy": 0.311375,
      "eval_arxiv_bleu_score": 3.6810036229380168,
      "eval_arxiv_bleu_score_sem": 0.10860359271281318,
      "eval_arxiv_emb_cos_sim": 0.6774710416793823,
      "eval_arxiv_emb_cos_sim_sem": 0.0073367804846182,
      "eval_arxiv_emb_top1_equal": 0.15625,
      "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.8185007572174072,
      "eval_arxiv_n_ngrams_match_1": 12.88,
      "eval_arxiv_n_ngrams_match_2": 2.358,
      "eval_arxiv_n_ngrams_match_3": 0.51,
      "eval_arxiv_num_pred_words": 40.502,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 45.53588776801289,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.29958586833396983,
      "eval_arxiv_runtime": 14.4801,
      "eval_arxiv_samples_per_second": 34.53,
      "eval_arxiv_steps_per_second": 0.069,
      "eval_arxiv_token_set_f1": 0.3002830128301934,
      "eval_arxiv_token_set_f1_sem": 0.004005425068673722,
      "eval_arxiv_token_set_precision": 0.24487552387867292,
      "eval_arxiv_token_set_recall": 0.41109997736572546,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 33125
    },
    {
      "epoch": 6.36,
      "eval_python_code_alpaca_accuracy": 0.13428125,
      "eval_python_code_alpaca_bleu_score": 3.228763862148937,
      "eval_python_code_alpaca_bleu_score_sem": 0.1005497722214129,
      "eval_python_code_alpaca_emb_cos_sim": 0.6419084072113037,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010345804026126365,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.3029255867004395,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.472,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.722,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.456,
      "eval_python_code_alpaca_num_pred_words": 41.152,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 27.19207543905038,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.25525504929475035,
      "eval_python_code_alpaca_runtime": 13.8904,
      "eval_python_code_alpaca_samples_per_second": 35.996,
      "eval_python_code_alpaca_steps_per_second": 0.072,
      "eval_python_code_alpaca_token_set_f1": 0.3894889916093848,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005579658653125345,
      "eval_python_code_alpaca_token_set_precision": 0.3956317457741966,
      "eval_python_code_alpaca_token_set_recall": 0.4152763301239062,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 33125
    },
    {
      "epoch": 6.36,
      "eval_wikibio_accuracy": 0.28784375,
      "eval_wikibio_bleu_score": 5.389823621646703,
      "eval_wikibio_bleu_score_sem": 0.18975478186988518,
      "eval_wikibio_emb_cos_sim": 0.7103409767150879,
      "eval_wikibio_emb_cos_sim_sem": 0.008903025863911852,
      "eval_wikibio_emb_top1_equal": 0.1328125,
      "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.252353668212891,
      "eval_wikibio_n_ngrams_match_1": 10.014,
      "eval_wikibio_n_ngrams_match_2": 3.18,
      "eval_wikibio_n_ngrams_match_3": 1.116,
      "eval_wikibio_num_pred_words": 38.466,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 70.2706115630885,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33469833254930753,
      "eval_wikibio_runtime": 15.3005,
      "eval_wikibio_samples_per_second": 32.679,
      "eval_wikibio_steps_per_second": 0.065,
      "eval_wikibio_token_set_f1": 0.3123403663021789,
      "eval_wikibio_token_set_f1_sem": 0.004990513917775753,
      "eval_wikibio_token_set_precision": 0.3243231676402658,
      "eval_wikibio_token_set_recall": 0.31240476052988475,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 33125
    },
    {
      "epoch": 6.36,
      "eval_nq_accuracy": 0.48215625,
      "eval_nq_bleu_score": 9.278246429298965,
      "eval_nq_bleu_score_sem": 0.3980576074068516,
      "eval_nq_emb_cos_sim": 0.7856423854827881,
      "eval_nq_emb_cos_sim_sem": 0.007734071825205571,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5158660411834717,
      "eval_nq_n_ngrams_match_1": 20.962,
      "eval_nq_n_ngrams_match_2": 6.952,
      "eval_nq_n_ngrams_match_3": 2.928,
      "eval_nq_num_pred_words": 49.082,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.377323410601164,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.39737245069653515,
      "eval_nq_runtime": 21.5111,
      "eval_nq_samples_per_second": 23.244,
      "eval_nq_steps_per_second": 0.046,
      "eval_nq_token_set_f1": 0.421570094993196,
      "eval_nq_token_set_f1_sem": 0.004781149110041133,
      "eval_nq_token_set_precision": 0.37095400610548196,
      "eval_nq_token_set_recall": 0.49802642997450525,
      "eval_nq_true_num_tokens": 64.0,
      "step": 33125
    },
    {
      "epoch": 6.36,
      "learning_rate": 0.001,
      "loss": 2.8703,
      "step": 33132
    },
    {
      "epoch": 6.36,
      "learning_rate": 0.001,
      "loss": 2.8646,
      "step": 33144
    },
    {
      "epoch": 6.37,
      "learning_rate": 0.001,
      "loss": 2.8706,
      "step": 33156
    },
    {
      "epoch": 6.37,
      "learning_rate": 0.001,
      "loss": 2.8871,
      "step": 33168
    },
    {
      "epoch": 6.37,
      "learning_rate": 0.001,
      "loss": 2.8742,
      "step": 33180
    },
    {
      "epoch": 6.37,
      "learning_rate": 0.001,
      "loss": 2.8735,
      "step": 33192
    },
    {
      "epoch": 6.38,
      "learning_rate": 0.001,
      "loss": 2.8754,
      "step": 33204
    },
    {
      "epoch": 6.38,
      "learning_rate": 0.001,
      "loss": 2.8793,
      "step": 33216
    },
    {
      "epoch": 6.38,
      "learning_rate": 0.001,
      "loss": 2.8842,
      "step": 33228
    },
    {
      "epoch": 6.38,
      "learning_rate": 0.001,
      "loss": 2.8782,
      "step": 33240
    },
    {
      "epoch": 6.38,
      "learning_rate": 0.001,
      "loss": 2.8842,
      "step": 33252
    },
    {
      "epoch": 6.39,
      "learning_rate": 0.001,
      "loss": 2.8792,
      "step": 33264
    },
    {
      "epoch": 6.39,
      "learning_rate": 0.001,
      "loss": 2.8858,
      "step": 33276
    },
    {
      "epoch": 6.39,
      "learning_rate": 0.001,
      "loss": 2.8721,
      "step": 33288
    },
    {
      "epoch": 6.39,
      "learning_rate": 0.001,
      "loss": 2.8655,
      "step": 33300
    },
    {
      "epoch": 6.4,
      "learning_rate": 0.001,
      "loss": 2.8731,
      "step": 33312
    },
    {
      "epoch": 6.4,
      "learning_rate": 0.001,
      "loss": 2.8652,
      "step": 33324
    },
    {
      "epoch": 6.4,
      "learning_rate": 0.001,
      "loss": 2.8749,
      "step": 33336
    },
    {
      "epoch": 6.4,
      "learning_rate": 0.001,
      "loss": 2.8709,
      "step": 33348
    },
    {
      "epoch": 6.41,
      "learning_rate": 0.001,
      "loss": 2.8792,
      "step": 33360
    },
    {
      "epoch": 6.41,
      "learning_rate": 0.001,
      "loss": 2.8707,
      "step": 33372
    },
    {
      "epoch": 6.41,
      "learning_rate": 0.001,
      "loss": 2.8802,
      "step": 33384
    },
    {
      "epoch": 6.41,
      "learning_rate": 0.001,
      "loss": 2.8873,
      "step": 33396
    },
    {
      "epoch": 6.41,
      "learning_rate": 0.001,
      "loss": 2.8721,
      "step": 33408
    },
    {
      "epoch": 6.42,
      "learning_rate": 0.001,
      "loss": 2.8604,
      "step": 33420
    },
    {
      "epoch": 6.42,
      "learning_rate": 0.001,
      "loss": 2.8753,
      "step": 33432
    },
    {
      "epoch": 6.42,
      "learning_rate": 0.001,
      "loss": 2.8706,
      "step": 33444
    },
    {
      "epoch": 6.42,
      "learning_rate": 0.001,
      "loss": 2.8678,
      "step": 33456
    },
    {
      "epoch": 6.43,
      "learning_rate": 0.001,
      "loss": 2.8655,
      "step": 33468
    },
    {
      "epoch": 6.43,
      "learning_rate": 0.001,
      "loss": 2.8677,
      "step": 33480
    },
    {
      "epoch": 6.43,
      "learning_rate": 0.001,
      "loss": 2.8738,
      "step": 33492
    },
    {
      "epoch": 6.43,
      "learning_rate": 0.001,
      "loss": 2.879,
      "step": 33504
    },
    {
      "epoch": 6.44,
      "learning_rate": 0.001,
      "loss": 2.8717,
      "step": 33516
    },
    {
      "epoch": 6.44,
      "learning_rate": 0.001,
      "loss": 2.8657,
      "step": 33528
    },
    {
      "epoch": 6.44,
      "learning_rate": 0.001,
      "loss": 2.8666,
      "step": 33540
    },
    {
      "epoch": 6.44,
      "learning_rate": 0.001,
      "loss": 2.8687,
      "step": 33552
    },
    {
      "epoch": 6.44,
      "learning_rate": 0.001,
      "loss": 2.8776,
      "step": 33564
    },
    {
      "epoch": 6.45,
      "learning_rate": 0.001,
      "loss": 2.8732,
      "step": 33576
    },
    {
      "epoch": 6.45,
      "learning_rate": 0.001,
      "loss": 2.8661,
      "step": 33588
    },
    {
      "epoch": 6.45,
      "learning_rate": 0.001,
      "loss": 2.8723,
      "step": 33600
    },
    {
      "epoch": 6.45,
      "learning_rate": 0.001,
      "loss": 2.8683,
      "step": 33612
    },
    {
      "epoch": 6.46,
      "learning_rate": 0.001,
      "loss": 2.8685,
      "step": 33624
    },
    {
      "epoch": 6.46,
      "learning_rate": 0.001,
      "loss": 2.8665,
      "step": 33636
    },
    {
      "epoch": 6.46,
      "learning_rate": 0.001,
      "loss": 2.8733,
      "step": 33648
    },
    {
      "epoch": 6.46,
      "learning_rate": 0.001,
      "loss": 2.868,
      "step": 33660
    },
    {
      "epoch": 6.47,
      "learning_rate": 0.001,
      "loss": 2.8701,
      "step": 33672
    },
    {
      "epoch": 6.47,
      "learning_rate": 0.001,
      "loss": 2.8698,
      "step": 33684
    },
    {
      "epoch": 6.47,
      "learning_rate": 0.001,
      "loss": 2.8715,
      "step": 33696
    },
    {
      "epoch": 6.47,
      "learning_rate": 0.001,
      "loss": 2.8769,
      "step": 33708
    },
    {
      "epoch": 6.47,
      "learning_rate": 0.001,
      "loss": 2.8681,
      "step": 33720
    },
    {
      "epoch": 6.48,
      "learning_rate": 0.001,
      "loss": 2.8633,
      "step": 33732
    },
    {
      "epoch": 6.48,
      "learning_rate": 0.001,
      "loss": 2.8674,
      "step": 33744
    },
    {
      "epoch": 6.48,
      "eval_ag_news_accuracy": 0.2890625,
      "eval_ag_news_bleu_score": 3.932410982518147,
      "eval_ag_news_bleu_score_sem": 0.1291998117660071,
      "eval_ag_news_emb_cos_sim": 0.7504019737243652,
      "eval_ag_news_emb_cos_sim_sem": 0.008021780701120109,
      "eval_ag_news_emb_top1_equal": 0.15625,
      "eval_ag_news_emb_top1_equal_sem": 0.03221922156442571,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.9291231632232666,
      "eval_ag_news_n_ngrams_match_1": 12.074,
      "eval_ag_news_n_ngrams_match_2": 2.352,
      "eval_ag_news_n_ngrams_match_3": 0.648,
      "eval_ag_news_num_pred_words": 46.18,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 50.86236012300009,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.29408939111489535,
      "eval_ag_news_runtime": 15.9685,
      "eval_ag_news_samples_per_second": 31.312,
      "eval_ag_news_steps_per_second": 0.063,
      "eval_ag_news_token_set_f1": 0.3085081850473356,
      "eval_ag_news_token_set_f1_sem": 0.004399674595433429,
      "eval_ag_news_token_set_precision": 0.2835485122791629,
      "eval_ag_news_token_set_recall": 0.3551056267540652,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 33750
    },
    {
      "epoch": 6.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.09928125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.5553378948791043,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09940717692837908,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6188018321990967,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009696636533392812,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.610792875289917,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.254,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.43,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.468,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.198,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 36.99537390711386,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.18254919545813222,
      "eval_anthropic_toxic_prompts_runtime": 10.184,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.097,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.31396583357836516,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006127741967123549,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.36607387202302716,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31002572874258566,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 33750
    },
    {
      "epoch": 6.48,
      "eval_arxiv_accuracy": 0.312875,
      "eval_arxiv_bleu_score": 3.548344708985615,
      "eval_arxiv_bleu_score_sem": 0.11083964357254245,
      "eval_arxiv_emb_cos_sim": 0.6740222573280334,
      "eval_arxiv_emb_cos_sim_sem": 0.007520927309751884,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.783830165863037,
      "eval_arxiv_n_ngrams_match_1": 12.43,
      "eval_arxiv_n_ngrams_match_2": 2.204,
      "eval_arxiv_n_ngrams_match_3": 0.472,
      "eval_arxiv_num_pred_words": 38.772,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 43.98418624800976,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.29352644525619054,
      "eval_arxiv_runtime": 10.6071,
      "eval_arxiv_samples_per_second": 47.138,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.29642046247294596,
      "eval_arxiv_token_set_f1_sem": 0.0040327139143596525,
      "eval_arxiv_token_set_precision": 0.2370876540307842,
      "eval_arxiv_token_set_recall": 0.4261550629059122,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 33750
    },
    {
      "epoch": 6.48,
      "eval_python_code_alpaca_accuracy": 0.13721875,
      "eval_python_code_alpaca_bleu_score": 3.2779011790289307,
      "eval_python_code_alpaca_bleu_score_sem": 0.11043417760361648,
      "eval_python_code_alpaca_emb_cos_sim": 0.642796516418457,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011234746915671919,
      "eval_python_code_alpaca_emb_top1_equal": 0.078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.312835931777954,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.644,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.794,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.476,
      "eval_python_code_alpaca_num_pred_words": 41.902,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 27.46289804620583,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2591009943649206,
      "eval_python_code_alpaca_runtime": 10.5335,
      "eval_python_code_alpaca_samples_per_second": 47.468,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.3986237679436292,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005568005774957073,
      "eval_python_code_alpaca_token_set_precision": 0.407473030960325,
      "eval_python_code_alpaca_token_set_recall": 0.42484056543566645,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 33750
    },
    {
      "epoch": 6.48,
      "eval_wikibio_accuracy": 0.28815625,
      "eval_wikibio_bleu_score": 5.297948565492029,
      "eval_wikibio_bleu_score_sem": 0.1816595475722539,
      "eval_wikibio_emb_cos_sim": 0.6929820775985718,
      "eval_wikibio_emb_cos_sim_sem": 0.010644598181743776,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.286059856414795,
      "eval_wikibio_n_ngrams_match_1": 9.828,
      "eval_wikibio_n_ngrams_match_2": 3.126,
      "eval_wikibio_n_ngrams_match_3": 1.08,
      "eval_wikibio_num_pred_words": 37.772,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 72.67953578609689,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32688126627313485,
      "eval_wikibio_runtime": 19.7744,
      "eval_wikibio_samples_per_second": 25.285,
      "eval_wikibio_steps_per_second": 0.051,
      "eval_wikibio_token_set_f1": 0.30820207438733105,
      "eval_wikibio_token_set_f1_sem": 0.0051921793765689825,
      "eval_wikibio_token_set_precision": 0.3166989989819676,
      "eval_wikibio_token_set_recall": 0.3150842318918094,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 33750
    },
    {
      "epoch": 6.48,
      "eval_nq_accuracy": 0.48315625,
      "eval_nq_bleu_score": 9.2809600316322,
      "eval_nq_bleu_score_sem": 0.4064657603452962,
      "eval_nq_emb_cos_sim": 0.7859359979629517,
      "eval_nq_emb_cos_sim_sem": 0.008534873142169193,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.5102248191833496,
      "eval_nq_n_ngrams_match_1": 20.94,
      "eval_nq_n_ngrams_match_2": 6.91,
      "eval_nq_n_ngrams_match_3": 2.92,
      "eval_nq_num_pred_words": 48.784,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.307696755829227,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.3991825878678902,
      "eval_nq_runtime": 15.1079,
      "eval_nq_samples_per_second": 33.095,
      "eval_nq_steps_per_second": 0.066,
      "eval_nq_token_set_f1": 0.4207198722805135,
      "eval_nq_token_set_f1_sem": 0.004964994288404282,
      "eval_nq_token_set_precision": 0.37042648453248295,
      "eval_nq_token_set_recall": 0.4984827871399623,
      "eval_nq_true_num_tokens": 64.0,
      "step": 33750
    },
    {
      "epoch": 6.48,
      "learning_rate": 0.001,
      "loss": 2.8675,
      "step": 33756
    },
    {
      "epoch": 6.48,
      "learning_rate": 0.001,
      "loss": 2.8792,
      "step": 33768
    },
    {
      "epoch": 6.49,
      "learning_rate": 0.001,
      "loss": 2.8704,
      "step": 33780
    },
    {
      "epoch": 6.49,
      "learning_rate": 0.001,
      "loss": 2.8732,
      "step": 33792
    },
    {
      "epoch": 6.49,
      "learning_rate": 0.001,
      "loss": 2.8837,
      "step": 33804
    },
    {
      "epoch": 6.49,
      "learning_rate": 0.001,
      "loss": 2.8697,
      "step": 33816
    },
    {
      "epoch": 6.5,
      "learning_rate": 0.001,
      "loss": 2.8661,
      "step": 33828
    },
    {
      "epoch": 6.5,
      "learning_rate": 0.001,
      "loss": 2.8688,
      "step": 33840
    },
    {
      "epoch": 6.5,
      "learning_rate": 0.001,
      "loss": 2.8496,
      "step": 33852
    },
    {
      "epoch": 6.5,
      "learning_rate": 0.001,
      "loss": 2.8764,
      "step": 33864
    },
    {
      "epoch": 6.5,
      "learning_rate": 0.001,
      "loss": 2.8662,
      "step": 33876
    },
    {
      "epoch": 6.51,
      "learning_rate": 0.001,
      "loss": 2.8684,
      "step": 33888
    },
    {
      "epoch": 6.51,
      "learning_rate": 0.001,
      "loss": 2.856,
      "step": 33900
    },
    {
      "epoch": 6.51,
      "learning_rate": 0.001,
      "loss": 2.8704,
      "step": 33912
    },
    {
      "epoch": 6.51,
      "learning_rate": 0.001,
      "loss": 2.8734,
      "step": 33924
    },
    {
      "epoch": 6.52,
      "learning_rate": 0.001,
      "loss": 2.8691,
      "step": 33936
    },
    {
      "epoch": 6.52,
      "learning_rate": 0.001,
      "loss": 2.8683,
      "step": 33948
    },
    {
      "epoch": 6.52,
      "learning_rate": 0.001,
      "loss": 2.8743,
      "step": 33960
    },
    {
      "epoch": 6.52,
      "learning_rate": 0.001,
      "loss": 2.8647,
      "step": 33972
    },
    {
      "epoch": 6.53,
      "learning_rate": 0.001,
      "loss": 2.8595,
      "step": 33984
    },
    {
      "epoch": 6.53,
      "learning_rate": 0.001,
      "loss": 2.8642,
      "step": 33996
    },
    {
      "epoch": 6.53,
      "learning_rate": 0.001,
      "loss": 2.8719,
      "step": 34008
    },
    {
      "epoch": 6.53,
      "learning_rate": 0.001,
      "loss": 2.8643,
      "step": 34020
    },
    {
      "epoch": 6.53,
      "learning_rate": 0.001,
      "loss": 2.866,
      "step": 34032
    },
    {
      "epoch": 6.54,
      "learning_rate": 0.001,
      "loss": 2.8662,
      "step": 34044
    },
    {
      "epoch": 6.54,
      "learning_rate": 0.001,
      "loss": 2.8591,
      "step": 34056
    },
    {
      "epoch": 6.54,
      "learning_rate": 0.001,
      "loss": 2.8704,
      "step": 34068
    },
    {
      "epoch": 6.54,
      "learning_rate": 0.001,
      "loss": 2.8533,
      "step": 34080
    },
    {
      "epoch": 6.55,
      "learning_rate": 0.001,
      "loss": 2.8709,
      "step": 34092
    },
    {
      "epoch": 6.55,
      "learning_rate": 0.001,
      "loss": 2.8714,
      "step": 34104
    },
    {
      "epoch": 6.55,
      "learning_rate": 0.001,
      "loss": 2.871,
      "step": 34116
    },
    {
      "epoch": 6.55,
      "learning_rate": 0.001,
      "loss": 2.8706,
      "step": 34128
    },
    {
      "epoch": 6.56,
      "learning_rate": 0.001,
      "loss": 2.8688,
      "step": 34140
    },
    {
      "epoch": 6.56,
      "learning_rate": 0.001,
      "loss": 2.8647,
      "step": 34152
    },
    {
      "epoch": 6.56,
      "learning_rate": 0.001,
      "loss": 2.8655,
      "step": 34164
    },
    {
      "epoch": 6.56,
      "learning_rate": 0.001,
      "loss": 2.8585,
      "step": 34176
    },
    {
      "epoch": 6.56,
      "learning_rate": 0.001,
      "loss": 2.8598,
      "step": 34188
    },
    {
      "epoch": 6.57,
      "learning_rate": 0.001,
      "loss": 2.8537,
      "step": 34200
    },
    {
      "epoch": 6.57,
      "learning_rate": 0.001,
      "loss": 2.8604,
      "step": 34212
    },
    {
      "epoch": 6.57,
      "learning_rate": 0.001,
      "loss": 2.8705,
      "step": 34224
    },
    {
      "epoch": 6.57,
      "learning_rate": 0.001,
      "loss": 2.8551,
      "step": 34236
    },
    {
      "epoch": 6.58,
      "learning_rate": 0.001,
      "loss": 2.8538,
      "step": 34248
    },
    {
      "epoch": 6.58,
      "learning_rate": 0.001,
      "loss": 2.8747,
      "step": 34260
    },
    {
      "epoch": 6.58,
      "learning_rate": 0.001,
      "loss": 2.8553,
      "step": 34272
    },
    {
      "epoch": 6.58,
      "learning_rate": 0.001,
      "loss": 2.8694,
      "step": 34284
    },
    {
      "epoch": 6.59,
      "learning_rate": 0.001,
      "loss": 2.8557,
      "step": 34296
    },
    {
      "epoch": 6.59,
      "learning_rate": 0.001,
      "loss": 2.8744,
      "step": 34308
    },
    {
      "epoch": 6.59,
      "learning_rate": 0.001,
      "loss": 2.8597,
      "step": 34320
    },
    {
      "epoch": 6.59,
      "learning_rate": 0.001,
      "loss": 2.8696,
      "step": 34332
    },
    {
      "epoch": 6.59,
      "learning_rate": 0.001,
      "loss": 2.866,
      "step": 34344
    },
    {
      "epoch": 6.6,
      "learning_rate": 0.001,
      "loss": 2.8548,
      "step": 34356
    },
    {
      "epoch": 6.6,
      "learning_rate": 0.001,
      "loss": 2.8526,
      "step": 34368
    },
    {
      "epoch": 6.6,
      "eval_ag_news_accuracy": 0.28865625,
      "eval_ag_news_bleu_score": 3.9808220742861793,
      "eval_ag_news_bleu_score_sem": 0.1251884902370004,
      "eval_ag_news_emb_cos_sim": 0.7487285137176514,
      "eval_ag_news_emb_cos_sim_sem": 0.008285175455255417,
      "eval_ag_news_emb_top1_equal": 0.140625,
      "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.9099693298339844,
      "eval_ag_news_n_ngrams_match_1": 12.098,
      "eval_ag_news_n_ngrams_match_2": 2.398,
      "eval_ag_news_n_ngrams_match_3": 0.638,
      "eval_ag_news_num_pred_words": 45.91,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 49.89742158773554,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2956530095620596,
      "eval_ag_news_runtime": 12.8691,
      "eval_ag_news_samples_per_second": 38.853,
      "eval_ag_news_steps_per_second": 0.078,
      "eval_ag_news_token_set_f1": 0.3104337388231065,
      "eval_ag_news_token_set_f1_sem": 0.004452844806865152,
      "eval_ag_news_token_set_precision": 0.2843858954523405,
      "eval_ag_news_token_set_recall": 0.36018926161695275,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 34375
    },
    {
      "epoch": 6.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.09878125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.570596415059724,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10578402473129396,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6138656735420227,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009576375110978532,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.5956509113311768,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.202,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.418,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.482,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.518,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 36.43941109571111,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1817760515076232,
      "eval_anthropic_toxic_prompts_runtime": 10.4761,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.728,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.31408359049780926,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006057114708069728,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3648893419076783,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.30846587214679694,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 34375
    },
    {
      "epoch": 6.6,
      "eval_arxiv_accuracy": 0.31528125,
      "eval_arxiv_bleu_score": 3.4961232713119115,
      "eval_arxiv_bleu_score_sem": 0.10449275208560148,
      "eval_arxiv_emb_cos_sim": 0.6694149374961853,
      "eval_arxiv_emb_cos_sim_sem": 0.008328695369065456,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.768296480178833,
      "eval_arxiv_n_ngrams_match_1": 12.516,
      "eval_arxiv_n_ngrams_match_2": 2.214,
      "eval_arxiv_n_ngrams_match_3": 0.458,
      "eval_arxiv_num_pred_words": 39.366,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 43.306228943868845,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.29682351028940945,
      "eval_arxiv_runtime": 10.9782,
      "eval_arxiv_samples_per_second": 45.545,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.29969631609345126,
      "eval_arxiv_token_set_f1_sem": 0.0040482923164340354,
      "eval_arxiv_token_set_precision": 0.2407923349090727,
      "eval_arxiv_token_set_recall": 0.4186512239012958,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 34375
    },
    {
      "epoch": 6.6,
      "eval_python_code_alpaca_accuracy": 0.1371875,
      "eval_python_code_alpaca_bleu_score": 3.538106681301775,
      "eval_python_code_alpaca_bleu_score_sem": 0.12999780892390916,
      "eval_python_code_alpaca_emb_cos_sim": 0.6586180925369263,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009541149565246536,
      "eval_python_code_alpaca_emb_top1_equal": 0.0625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.2873189449310303,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.834,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.87,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.552,
      "eval_python_code_alpaca_num_pred_words": 40.97,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 26.770992848076013,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2664782288377152,
      "eval_python_code_alpaca_runtime": 11.5815,
      "eval_python_code_alpaca_samples_per_second": 43.172,
      "eval_python_code_alpaca_steps_per_second": 0.086,
      "eval_python_code_alpaca_token_set_f1": 0.39915023487651846,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005585884508409135,
      "eval_python_code_alpaca_token_set_precision": 0.4136092999715175,
      "eval_python_code_alpaca_token_set_recall": 0.4118775048746247,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 34375
    },
    {
      "epoch": 6.6,
      "eval_wikibio_accuracy": 0.29109375,
      "eval_wikibio_bleu_score": 5.100301677590534,
      "eval_wikibio_bleu_score_sem": 0.18325907342749737,
      "eval_wikibio_emb_cos_sim": 0.7025485038757324,
      "eval_wikibio_emb_cos_sim_sem": 0.009842035916765808,
      "eval_wikibio_emb_top1_equal": 0.1171875,
      "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.213316440582275,
      "eval_wikibio_n_ngrams_match_1": 9.51,
      "eval_wikibio_n_ngrams_match_2": 2.996,
      "eval_wikibio_n_ngrams_match_3": 0.996,
      "eval_wikibio_num_pred_words": 37.558,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 67.5802946018747,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32257824975748883,
      "eval_wikibio_runtime": 10.9509,
      "eval_wikibio_samples_per_second": 45.659,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.30043513728860616,
      "eval_wikibio_token_set_f1_sem": 0.005366059131199708,
      "eval_wikibio_token_set_precision": 0.30797517960411785,
      "eval_wikibio_token_set_recall": 0.3090812801699436,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 34375
    },
    {
      "epoch": 6.6,
      "eval_nq_accuracy": 0.484875,
      "eval_nq_bleu_score": 9.238379832253832,
      "eval_nq_bleu_score_sem": 0.40565419091997434,
      "eval_nq_emb_cos_sim": 0.7774143218994141,
      "eval_nq_emb_cos_sim_sem": 0.009179519726855444,
      "eval_nq_emb_top1_equal": 0.203125,
      "eval_nq_emb_top1_equal_sem": 0.03570055125142555,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.499204158782959,
      "eval_nq_n_ngrams_match_1": 20.678,
      "eval_nq_n_ngrams_match_2": 6.872,
      "eval_nq_n_ngrams_match_3": 2.89,
      "eval_nq_num_pred_words": 48.342,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.172802486831891,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.39509830356612174,
      "eval_nq_runtime": 11.4948,
      "eval_nq_samples_per_second": 43.498,
      "eval_nq_steps_per_second": 0.087,
      "eval_nq_token_set_f1": 0.4153722602516122,
      "eval_nq_token_set_f1_sem": 0.004972620376333147,
      "eval_nq_token_set_precision": 0.3643574149174973,
      "eval_nq_token_set_recall": 0.49514487767926674,
      "eval_nq_true_num_tokens": 64.0,
      "step": 34375
    },
    {
      "epoch": 6.6,
      "learning_rate": 0.001,
      "loss": 2.8702,
      "step": 34380
    },
    {
      "epoch": 6.6,
      "learning_rate": 0.001,
      "loss": 2.8599,
      "step": 34392
    },
    {
      "epoch": 6.61,
      "learning_rate": 0.001,
      "loss": 2.866,
      "step": 34404
    },
    {
      "epoch": 6.61,
      "learning_rate": 0.001,
      "loss": 2.8627,
      "step": 34416
    },
    {
      "epoch": 6.61,
      "learning_rate": 0.001,
      "loss": 2.8713,
      "step": 34428
    },
    {
      "epoch": 6.61,
      "learning_rate": 0.001,
      "loss": 2.8665,
      "step": 34440
    },
    {
      "epoch": 6.62,
      "learning_rate": 0.001,
      "loss": 2.8668,
      "step": 34452
    },
    {
      "epoch": 6.62,
      "learning_rate": 0.001,
      "loss": 2.8657,
      "step": 34464
    },
    {
      "epoch": 6.62,
      "learning_rate": 0.001,
      "loss": 2.8609,
      "step": 34476
    },
    {
      "epoch": 6.62,
      "learning_rate": 0.001,
      "loss": 2.8525,
      "step": 34488
    },
    {
      "epoch": 6.62,
      "learning_rate": 0.001,
      "loss": 2.8596,
      "step": 34500
    },
    {
      "epoch": 6.63,
      "learning_rate": 0.001,
      "loss": 2.874,
      "step": 34512
    },
    {
      "epoch": 6.63,
      "learning_rate": 0.001,
      "loss": 2.8531,
      "step": 34524
    },
    {
      "epoch": 6.63,
      "learning_rate": 0.001,
      "loss": 2.8666,
      "step": 34536
    },
    {
      "epoch": 6.63,
      "learning_rate": 0.001,
      "loss": 2.8631,
      "step": 34548
    },
    {
      "epoch": 6.64,
      "learning_rate": 0.001,
      "loss": 2.8582,
      "step": 34560
    },
    {
      "epoch": 6.64,
      "learning_rate": 0.001,
      "loss": 2.845,
      "step": 34572
    },
    {
      "epoch": 6.64,
      "learning_rate": 0.001,
      "loss": 2.8664,
      "step": 34584
    },
    {
      "epoch": 6.64,
      "learning_rate": 0.001,
      "loss": 2.8569,
      "step": 34596
    },
    {
      "epoch": 6.65,
      "learning_rate": 0.001,
      "loss": 2.8645,
      "step": 34608
    },
    {
      "epoch": 6.65,
      "learning_rate": 0.001,
      "loss": 2.8643,
      "step": 34620
    },
    {
      "epoch": 6.65,
      "learning_rate": 0.001,
      "loss": 2.862,
      "step": 34632
    },
    {
      "epoch": 6.65,
      "learning_rate": 0.001,
      "loss": 2.8655,
      "step": 34644
    },
    {
      "epoch": 6.65,
      "learning_rate": 0.001,
      "loss": 2.8637,
      "step": 34656
    },
    {
      "epoch": 6.66,
      "learning_rate": 0.001,
      "loss": 2.8467,
      "step": 34668
    },
    {
      "epoch": 6.66,
      "learning_rate": 0.001,
      "loss": 2.8719,
      "step": 34680
    },
    {
      "epoch": 6.66,
      "learning_rate": 0.001,
      "loss": 2.8639,
      "step": 34692
    },
    {
      "epoch": 6.66,
      "learning_rate": 0.001,
      "loss": 2.8483,
      "step": 34704
    },
    {
      "epoch": 6.67,
      "learning_rate": 0.001,
      "loss": 2.8562,
      "step": 34716
    },
    {
      "epoch": 6.67,
      "learning_rate": 0.001,
      "loss": 2.865,
      "step": 34728
    },
    {
      "epoch": 6.67,
      "learning_rate": 0.001,
      "loss": 2.8623,
      "step": 34740
    },
    {
      "epoch": 6.67,
      "learning_rate": 0.001,
      "loss": 2.84,
      "step": 34752
    },
    {
      "epoch": 6.68,
      "learning_rate": 0.001,
      "loss": 2.8599,
      "step": 34764
    },
    {
      "epoch": 6.68,
      "learning_rate": 0.001,
      "loss": 2.8637,
      "step": 34776
    },
    {
      "epoch": 6.68,
      "learning_rate": 0.001,
      "loss": 2.8508,
      "step": 34788
    },
    {
      "epoch": 6.68,
      "learning_rate": 0.001,
      "loss": 2.8584,
      "step": 34800
    },
    {
      "epoch": 6.68,
      "learning_rate": 0.001,
      "loss": 2.8557,
      "step": 34812
    },
    {
      "epoch": 6.69,
      "learning_rate": 0.001,
      "loss": 2.868,
      "step": 34824
    },
    {
      "epoch": 6.69,
      "learning_rate": 0.001,
      "loss": 2.8637,
      "step": 34836
    },
    {
      "epoch": 6.69,
      "learning_rate": 0.001,
      "loss": 2.8637,
      "step": 34848
    },
    {
      "epoch": 6.69,
      "learning_rate": 0.001,
      "loss": 2.8508,
      "step": 34860
    },
    {
      "epoch": 6.7,
      "learning_rate": 0.001,
      "loss": 2.8701,
      "step": 34872
    },
    {
      "epoch": 6.7,
      "learning_rate": 0.001,
      "loss": 2.8561,
      "step": 34884
    },
    {
      "epoch": 6.7,
      "learning_rate": 0.001,
      "loss": 2.8635,
      "step": 34896
    },
    {
      "epoch": 6.7,
      "learning_rate": 0.001,
      "loss": 2.865,
      "step": 34908
    },
    {
      "epoch": 6.71,
      "learning_rate": 0.001,
      "loss": 2.8588,
      "step": 34920
    },
    {
      "epoch": 6.71,
      "learning_rate": 0.001,
      "loss": 2.8595,
      "step": 34932
    },
    {
      "epoch": 6.71,
      "learning_rate": 0.001,
      "loss": 2.8503,
      "step": 34944
    },
    {
      "epoch": 6.71,
      "learning_rate": 0.001,
      "loss": 2.8504,
      "step": 34956
    },
    {
      "epoch": 6.71,
      "learning_rate": 0.001,
      "loss": 2.8673,
      "step": 34968
    },
    {
      "epoch": 6.72,
      "learning_rate": 0.001,
      "loss": 2.8522,
      "step": 34980
    },
    {
      "epoch": 6.72,
      "learning_rate": 0.001,
      "loss": 2.861,
      "step": 34992
    },
    {
      "epoch": 6.72,
      "eval_ag_news_accuracy": 0.29171875,
      "eval_ag_news_bleu_score": 3.9790343658795435,
      "eval_ag_news_bleu_score_sem": 0.12816532315134122,
      "eval_ag_news_emb_cos_sim": 0.7471901774406433,
      "eval_ag_news_emb_cos_sim_sem": 0.007859010889260052,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.9064903259277344,
      "eval_ag_news_n_ngrams_match_1": 12.22,
      "eval_ag_news_n_ngrams_match_2": 2.448,
      "eval_ag_news_n_ngrams_match_3": 0.65,
      "eval_ag_news_num_pred_words": 45.856,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 49.72412987917137,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.30035677940403466,
      "eval_ag_news_runtime": 11.6855,
      "eval_ag_news_samples_per_second": 42.788,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.3132406257938226,
      "eval_ag_news_token_set_f1_sem": 0.004412036118865149,
      "eval_ag_news_token_set_precision": 0.28730491043904277,
      "eval_ag_news_token_set_recall": 0.3611314712369899,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 35000
    },
    {
      "epoch": 6.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.09875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.497886972594568,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.101341392859124,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6152929663658142,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01059685699763757,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.5900990962982178,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.208,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.382,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.47,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.38,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 36.237666767186695,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.17834744464066038,
      "eval_anthropic_toxic_prompts_runtime": 11.1501,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.843,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.09,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.31645419333667907,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005970010312402791,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3619720257387812,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31606308323792415,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 35000
    },
    {
      "epoch": 6.72,
      "eval_arxiv_accuracy": 0.31521875,
      "eval_arxiv_bleu_score": 3.486571756532496,
      "eval_arxiv_bleu_score_sem": 0.11443993388116293,
      "eval_arxiv_emb_cos_sim": 0.6798828840255737,
      "eval_arxiv_emb_cos_sim_sem": 0.008565536950094392,
      "eval_arxiv_emb_top1_equal": 0.1484375,
      "eval_arxiv_emb_top1_equal_sem": 0.031548465007086954,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.7692196369171143,
      "eval_arxiv_n_ngrams_match_1": 12.686,
      "eval_arxiv_n_ngrams_match_2": 2.23,
      "eval_arxiv_n_ngrams_match_3": 0.458,
      "eval_arxiv_num_pred_words": 39.274,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 43.346225839789405,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.29977012288826155,
      "eval_arxiv_runtime": 11.1014,
      "eval_arxiv_samples_per_second": 45.039,
      "eval_arxiv_steps_per_second": 0.09,
      "eval_arxiv_token_set_f1": 0.2976885077159939,
      "eval_arxiv_token_set_f1_sem": 0.00418356139217434,
      "eval_arxiv_token_set_precision": 0.24099664930764575,
      "eval_arxiv_token_set_recall": 0.418809256534706,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 35000
    },
    {
      "epoch": 6.72,
      "eval_python_code_alpaca_accuracy": 0.13825,
      "eval_python_code_alpaca_bleu_score": 3.212306300204904,
      "eval_python_code_alpaca_bleu_score_sem": 0.09695396669486495,
      "eval_python_code_alpaca_emb_cos_sim": 0.64945387840271,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010687332451955791,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.2819371223449707,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.844,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.864,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.484,
      "eval_python_code_alpaca_num_pred_words": 42.6,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 26.62730311725348,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2625065191240496,
      "eval_python_code_alpaca_runtime": 11.0188,
      "eval_python_code_alpaca_samples_per_second": 45.377,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.40376819021253124,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005833305827812379,
      "eval_python_code_alpaca_token_set_precision": 0.41775042920541255,
      "eval_python_code_alpaca_token_set_recall": 0.41977667646916167,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 35000
    },
    {
      "epoch": 6.72,
      "eval_wikibio_accuracy": 0.29409375,
      "eval_wikibio_bleu_score": 4.97081038203576,
      "eval_wikibio_bleu_score_sem": 0.18702653628909408,
      "eval_wikibio_emb_cos_sim": 0.6821502447128296,
      "eval_wikibio_emb_cos_sim_sem": 0.011951348707478394,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.175293922424316,
      "eval_wikibio_n_ngrams_match_1": 9.322,
      "eval_wikibio_n_ngrams_match_2": 2.962,
      "eval_wikibio_n_ngrams_match_3": 0.976,
      "eval_wikibio_num_pred_words": 36.47,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 65.05895913805116,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.31270342671269835,
      "eval_wikibio_runtime": 10.2647,
      "eval_wikibio_samples_per_second": 48.711,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.29586921961166124,
      "eval_wikibio_token_set_f1_sem": 0.005922116862736919,
      "eval_wikibio_token_set_precision": 0.3007645948794808,
      "eval_wikibio_token_set_recall": 0.3076653750410545,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 35000
    },
    {
      "epoch": 6.72,
      "eval_nq_accuracy": 0.486125,
      "eval_nq_bleu_score": 9.535454820459188,
      "eval_nq_bleu_score_sem": 0.41691831562754833,
      "eval_nq_emb_cos_sim": 0.7831763029098511,
      "eval_nq_emb_cos_sim_sem": 0.008866690012139399,
      "eval_nq_emb_top1_equal": 0.2109375,
      "eval_nq_emb_top1_equal_sem": 0.03620184850179216,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4910407066345215,
      "eval_nq_n_ngrams_match_1": 21.108,
      "eval_nq_n_ngrams_match_2": 7.026,
      "eval_nq_n_ngrams_match_3": 3.03,
      "eval_nq_num_pred_words": 48.928,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 12.073834904395445,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.40223704983995545,
      "eval_nq_runtime": 10.7933,
      "eval_nq_samples_per_second": 46.325,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.4235553063558491,
      "eval_nq_token_set_f1_sem": 0.004986664205900059,
      "eval_nq_token_set_precision": 0.3738815575146614,
      "eval_nq_token_set_recall": 0.4991581075893894,
      "eval_nq_true_num_tokens": 64.0,
      "step": 35000
    },
    {
      "epoch": 6.72,
      "learning_rate": 0.001,
      "loss": 2.8634,
      "step": 35004
    },
    {
      "epoch": 6.72,
      "learning_rate": 0.001,
      "loss": 2.8536,
      "step": 35016
    },
    {
      "epoch": 6.73,
      "learning_rate": 0.001,
      "loss": 2.8584,
      "step": 35028
    },
    {
      "epoch": 6.73,
      "learning_rate": 0.001,
      "loss": 2.8567,
      "step": 35040
    },
    {
      "epoch": 6.73,
      "learning_rate": 0.001,
      "loss": 2.8584,
      "step": 35052
    },
    {
      "epoch": 6.73,
      "learning_rate": 0.001,
      "loss": 2.8626,
      "step": 35064
    },
    {
      "epoch": 6.74,
      "learning_rate": 0.001,
      "loss": 2.8454,
      "step": 35076
    },
    {
      "epoch": 6.74,
      "learning_rate": 0.001,
      "loss": 2.8607,
      "step": 35088
    },
    {
      "epoch": 6.74,
      "learning_rate": 0.001,
      "loss": 2.8514,
      "step": 35100
    },
    {
      "epoch": 6.74,
      "learning_rate": 0.001,
      "loss": 2.8495,
      "step": 35112
    },
    {
      "epoch": 6.74,
      "learning_rate": 0.001,
      "loss": 2.8601,
      "step": 35124
    },
    {
      "epoch": 6.75,
      "learning_rate": 0.001,
      "loss": 2.8479,
      "step": 35136
    },
    {
      "epoch": 6.75,
      "learning_rate": 0.001,
      "loss": 2.8581,
      "step": 35148
    },
    {
      "epoch": 6.75,
      "learning_rate": 0.001,
      "loss": 2.8582,
      "step": 35160
    },
    {
      "epoch": 6.75,
      "learning_rate": 0.001,
      "loss": 2.85,
      "step": 35172
    },
    {
      "epoch": 6.76,
      "learning_rate": 0.001,
      "loss": 2.8455,
      "step": 35184
    },
    {
      "epoch": 6.76,
      "learning_rate": 0.001,
      "loss": 2.8609,
      "step": 35196
    },
    {
      "epoch": 6.76,
      "learning_rate": 0.001,
      "loss": 2.8598,
      "step": 35208
    },
    {
      "epoch": 6.76,
      "learning_rate": 0.001,
      "loss": 2.8527,
      "step": 35220
    },
    {
      "epoch": 6.76,
      "learning_rate": 0.001,
      "loss": 2.847,
      "step": 35232
    },
    {
      "epoch": 6.77,
      "learning_rate": 0.001,
      "loss": 2.8607,
      "step": 35244
    },
    {
      "epoch": 6.77,
      "learning_rate": 0.001,
      "loss": 2.8562,
      "step": 35256
    },
    {
      "epoch": 6.77,
      "learning_rate": 0.001,
      "loss": 2.8587,
      "step": 35268
    },
    {
      "epoch": 6.77,
      "learning_rate": 0.001,
      "loss": 2.8676,
      "step": 35280
    },
    {
      "epoch": 6.78,
      "learning_rate": 0.001,
      "loss": 2.8592,
      "step": 35292
    },
    {
      "epoch": 6.78,
      "learning_rate": 0.001,
      "loss": 2.8564,
      "step": 35304
    },
    {
      "epoch": 6.78,
      "learning_rate": 0.001,
      "loss": 2.846,
      "step": 35316
    },
    {
      "epoch": 6.78,
      "learning_rate": 0.001,
      "loss": 2.8488,
      "step": 35328
    },
    {
      "epoch": 6.79,
      "learning_rate": 0.001,
      "loss": 2.8617,
      "step": 35340
    },
    {
      "epoch": 6.79,
      "learning_rate": 0.001,
      "loss": 2.8433,
      "step": 35352
    },
    {
      "epoch": 6.79,
      "learning_rate": 0.001,
      "loss": 2.8579,
      "step": 35364
    },
    {
      "epoch": 6.79,
      "learning_rate": 0.001,
      "loss": 2.8581,
      "step": 35376
    },
    {
      "epoch": 6.79,
      "learning_rate": 0.001,
      "loss": 2.8591,
      "step": 35388
    },
    {
      "epoch": 6.8,
      "learning_rate": 0.001,
      "loss": 2.8593,
      "step": 35400
    },
    {
      "epoch": 6.8,
      "learning_rate": 0.001,
      "loss": 2.8516,
      "step": 35412
    },
    {
      "epoch": 6.8,
      "learning_rate": 0.001,
      "loss": 2.8501,
      "step": 35424
    },
    {
      "epoch": 6.8,
      "learning_rate": 0.001,
      "loss": 2.8415,
      "step": 35436
    },
    {
      "epoch": 6.81,
      "learning_rate": 0.001,
      "loss": 2.8656,
      "step": 35448
    },
    {
      "epoch": 6.81,
      "learning_rate": 0.001,
      "loss": 2.8542,
      "step": 35460
    },
    {
      "epoch": 6.81,
      "learning_rate": 0.001,
      "loss": 2.8538,
      "step": 35472
    },
    {
      "epoch": 6.81,
      "learning_rate": 0.001,
      "loss": 2.8559,
      "step": 35484
    },
    {
      "epoch": 6.82,
      "learning_rate": 0.001,
      "loss": 2.8585,
      "step": 35496
    },
    {
      "epoch": 6.82,
      "learning_rate": 0.001,
      "loss": 2.8572,
      "step": 35508
    },
    {
      "epoch": 6.82,
      "learning_rate": 0.001,
      "loss": 2.8563,
      "step": 35520
    },
    {
      "epoch": 6.82,
      "learning_rate": 0.001,
      "loss": 2.8456,
      "step": 35532
    },
    {
      "epoch": 6.82,
      "learning_rate": 0.001,
      "loss": 2.8541,
      "step": 35544
    },
    {
      "epoch": 6.83,
      "learning_rate": 0.001,
      "loss": 2.8548,
      "step": 35556
    },
    {
      "epoch": 6.83,
      "learning_rate": 0.001,
      "loss": 2.8595,
      "step": 35568
    },
    {
      "epoch": 6.83,
      "learning_rate": 0.001,
      "loss": 2.8608,
      "step": 35580
    },
    {
      "epoch": 6.83,
      "learning_rate": 0.001,
      "loss": 2.86,
      "step": 35592
    },
    {
      "epoch": 6.84,
      "learning_rate": 0.001,
      "loss": 2.8533,
      "step": 35604
    },
    {
      "epoch": 6.84,
      "learning_rate": 0.001,
      "loss": 2.8521,
      "step": 35616
    },
    {
      "epoch": 6.84,
      "eval_ag_news_accuracy": 0.29221875,
      "eval_ag_news_bleu_score": 4.033302907273738,
      "eval_ag_news_bleu_score_sem": 0.1416210678430828,
      "eval_ag_news_emb_cos_sim": 0.7565293312072754,
      "eval_ag_news_emb_cos_sim_sem": 0.007986794865589019,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.9059700965881348,
      "eval_ag_news_n_ngrams_match_1": 12.378,
      "eval_ag_news_n_ngrams_match_2": 2.482,
      "eval_ag_news_n_ngrams_match_3": 0.668,
      "eval_ag_news_num_pred_words": 45.898,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 49.6982686553891,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.30169908200188245,
      "eval_ag_news_runtime": 11.7446,
      "eval_ag_news_samples_per_second": 42.573,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.3151272655574294,
      "eval_ag_news_token_set_f1_sem": 0.0043363109390232865,
      "eval_ag_news_token_set_precision": 0.2887459213716017,
      "eval_ag_news_token_set_recall": 0.3646203924574114,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 35625
    },
    {
      "epoch": 6.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.0999375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.4457907647733035,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09071905044115455,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6070419549942017,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010261396400904696,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.5999157428741455,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.152,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.382,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.45,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.064,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 36.595150911539015,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.17732715009504396,
      "eval_anthropic_toxic_prompts_runtime": 10.355,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.286,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3167116575512957,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006645970120400522,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3589571931969173,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31623793770486097,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 35625
    },
    {
      "epoch": 6.84,
      "eval_arxiv_accuracy": 0.31759375,
      "eval_arxiv_bleu_score": 3.596547915167506,
      "eval_arxiv_bleu_score_sem": 0.10501735834026857,
      "eval_arxiv_emb_cos_sim": 0.6719647645950317,
      "eval_arxiv_emb_cos_sim_sem": 0.008454620274248387,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.7631771564483643,
      "eval_arxiv_n_ngrams_match_1": 12.754,
      "eval_arxiv_n_ngrams_match_2": 2.27,
      "eval_arxiv_n_ngrams_match_3": 0.46,
      "eval_arxiv_num_pred_words": 39.976,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 43.08509684470342,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.30090602886694595,
      "eval_arxiv_runtime": 10.1914,
      "eval_arxiv_samples_per_second": 49.061,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3017335648735391,
      "eval_arxiv_token_set_f1_sem": 0.004063672816348415,
      "eval_arxiv_token_set_precision": 0.24524005137144733,
      "eval_arxiv_token_set_recall": 0.41690086063031406,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 35625
    },
    {
      "epoch": 6.84,
      "eval_python_code_alpaca_accuracy": 0.1419375,
      "eval_python_code_alpaca_bleu_score": 3.3131736778955037,
      "eval_python_code_alpaca_bleu_score_sem": 0.10066290300174355,
      "eval_python_code_alpaca_emb_cos_sim": 0.6628965139389038,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008923437450936777,
      "eval_python_code_alpaca_emb_top1_equal": 0.0625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.243400812149048,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.802,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.872,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.474,
      "eval_python_code_alpaca_num_pred_words": 41.076,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 25.62070496143208,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2667085180924852,
      "eval_python_code_alpaca_runtime": 9.733,
      "eval_python_code_alpaca_samples_per_second": 51.372,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.40651029567760116,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005650780074059194,
      "eval_python_code_alpaca_token_set_precision": 0.418461687800907,
      "eval_python_code_alpaca_token_set_recall": 0.426684185014457,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 35625
    },
    {
      "epoch": 6.84,
      "eval_wikibio_accuracy": 0.2973125,
      "eval_wikibio_bleu_score": 5.0371287381319,
      "eval_wikibio_bleu_score_sem": 0.19139549386612043,
      "eval_wikibio_emb_cos_sim": 0.6902219653129578,
      "eval_wikibio_emb_cos_sim_sem": 0.010707008444765298,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.148153781890869,
      "eval_wikibio_n_ngrams_match_1": 9.06,
      "eval_wikibio_n_ngrams_match_2": 2.838,
      "eval_wikibio_n_ngrams_match_3": 0.96,
      "eval_wikibio_num_pred_words": 35.634,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 63.31699533960944,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3129187764131648,
      "eval_wikibio_runtime": 10.2798,
      "eval_wikibio_samples_per_second": 48.639,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.29125349625234354,
      "eval_wikibio_token_set_f1_sem": 0.005700134087778099,
      "eval_wikibio_token_set_precision": 0.29512971044700426,
      "eval_wikibio_token_set_recall": 0.30567532670449404,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 35625
    },
    {
      "epoch": 6.84,
      "eval_nq_accuracy": 0.48709375,
      "eval_nq_bleu_score": 9.50862347290992,
      "eval_nq_bleu_score_sem": 0.3929691996343114,
      "eval_nq_emb_cos_sim": 0.7866525053977966,
      "eval_nq_emb_cos_sim_sem": 0.008427224164171798,
      "eval_nq_emb_top1_equal": 0.1484375,
      "eval_nq_emb_top1_equal_sem": 0.031548465007086954,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4843738079071045,
      "eval_nq_n_ngrams_match_1": 21.074,
      "eval_nq_n_ngrams_match_2": 7.102,
      "eval_nq_n_ngrams_match_3": 3.042,
      "eval_nq_num_pred_words": 49.09,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.993607600649542,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4013508693416742,
      "eval_nq_runtime": 10.8869,
      "eval_nq_samples_per_second": 45.927,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.42014996183504355,
      "eval_nq_token_set_f1_sem": 0.0048032256108574,
      "eval_nq_token_set_precision": 0.37189658457401875,
      "eval_nq_token_set_recall": 0.4938735890737705,
      "eval_nq_true_num_tokens": 64.0,
      "step": 35625
    },
    {
      "epoch": 6.84,
      "learning_rate": 0.001,
      "loss": 2.8578,
      "step": 35628
    },
    {
      "epoch": 6.84,
      "learning_rate": 0.001,
      "loss": 2.8646,
      "step": 35640
    },
    {
      "epoch": 6.85,
      "learning_rate": 0.001,
      "loss": 2.8592,
      "step": 35652
    },
    {
      "epoch": 6.85,
      "learning_rate": 0.001,
      "loss": 2.8525,
      "step": 35664
    },
    {
      "epoch": 6.85,
      "learning_rate": 0.001,
      "loss": 2.8558,
      "step": 35676
    },
    {
      "epoch": 6.85,
      "learning_rate": 0.001,
      "loss": 2.8457,
      "step": 35688
    },
    {
      "epoch": 6.85,
      "learning_rate": 0.001,
      "loss": 2.8419,
      "step": 35700
    },
    {
      "epoch": 6.86,
      "learning_rate": 0.001,
      "loss": 2.8466,
      "step": 35712
    },
    {
      "epoch": 6.86,
      "learning_rate": 0.001,
      "loss": 2.856,
      "step": 35724
    },
    {
      "epoch": 6.86,
      "learning_rate": 0.001,
      "loss": 2.854,
      "step": 35736
    },
    {
      "epoch": 6.86,
      "learning_rate": 0.001,
      "loss": 2.8529,
      "step": 35748
    },
    {
      "epoch": 6.87,
      "learning_rate": 0.001,
      "loss": 2.8482,
      "step": 35760
    },
    {
      "epoch": 6.87,
      "learning_rate": 0.001,
      "loss": 2.858,
      "step": 35772
    },
    {
      "epoch": 6.87,
      "learning_rate": 0.001,
      "loss": 2.8507,
      "step": 35784
    },
    {
      "epoch": 6.87,
      "learning_rate": 0.001,
      "loss": 2.8616,
      "step": 35796
    },
    {
      "epoch": 6.88,
      "learning_rate": 0.001,
      "loss": 2.8568,
      "step": 35808
    },
    {
      "epoch": 6.88,
      "learning_rate": 0.001,
      "loss": 2.8616,
      "step": 35820
    },
    {
      "epoch": 6.88,
      "learning_rate": 0.001,
      "loss": 2.8503,
      "step": 35832
    },
    {
      "epoch": 6.88,
      "learning_rate": 0.001,
      "loss": 2.8434,
      "step": 35844
    },
    {
      "epoch": 6.88,
      "learning_rate": 0.001,
      "loss": 2.8444,
      "step": 35856
    },
    {
      "epoch": 6.89,
      "learning_rate": 0.001,
      "loss": 2.8429,
      "step": 35868
    },
    {
      "epoch": 6.89,
      "learning_rate": 0.001,
      "loss": 2.8538,
      "step": 35880
    },
    {
      "epoch": 6.89,
      "learning_rate": 0.001,
      "loss": 2.8436,
      "step": 35892
    },
    {
      "epoch": 6.89,
      "learning_rate": 0.001,
      "loss": 2.8491,
      "step": 35904
    },
    {
      "epoch": 6.9,
      "learning_rate": 0.001,
      "loss": 2.8533,
      "step": 35916
    },
    {
      "epoch": 6.9,
      "learning_rate": 0.001,
      "loss": 2.8437,
      "step": 35928
    },
    {
      "epoch": 6.9,
      "learning_rate": 0.001,
      "loss": 2.8423,
      "step": 35940
    },
    {
      "epoch": 6.9,
      "learning_rate": 0.001,
      "loss": 2.8602,
      "step": 35952
    },
    {
      "epoch": 6.91,
      "learning_rate": 0.001,
      "loss": 2.8524,
      "step": 35964
    },
    {
      "epoch": 6.91,
      "learning_rate": 0.001,
      "loss": 2.852,
      "step": 35976
    },
    {
      "epoch": 6.91,
      "learning_rate": 0.001,
      "loss": 2.8552,
      "step": 35988
    },
    {
      "epoch": 6.91,
      "learning_rate": 0.001,
      "loss": 2.8545,
      "step": 36000
    },
    {
      "epoch": 6.91,
      "learning_rate": 0.001,
      "loss": 2.8462,
      "step": 36012
    },
    {
      "epoch": 6.92,
      "learning_rate": 0.001,
      "loss": 2.8455,
      "step": 36024
    },
    {
      "epoch": 6.92,
      "learning_rate": 0.001,
      "loss": 2.8533,
      "step": 36036
    },
    {
      "epoch": 6.92,
      "learning_rate": 0.001,
      "loss": 2.8509,
      "step": 36048
    },
    {
      "epoch": 6.92,
      "learning_rate": 0.001,
      "loss": 2.8451,
      "step": 36060
    },
    {
      "epoch": 6.93,
      "learning_rate": 0.001,
      "loss": 2.8382,
      "step": 36072
    },
    {
      "epoch": 6.93,
      "learning_rate": 0.001,
      "loss": 2.8433,
      "step": 36084
    },
    {
      "epoch": 6.93,
      "learning_rate": 0.001,
      "loss": 2.8458,
      "step": 36096
    },
    {
      "epoch": 6.93,
      "learning_rate": 0.001,
      "loss": 2.8464,
      "step": 36108
    },
    {
      "epoch": 6.94,
      "learning_rate": 0.001,
      "loss": 2.85,
      "step": 36120
    },
    {
      "epoch": 6.94,
      "learning_rate": 0.001,
      "loss": 2.8432,
      "step": 36132
    },
    {
      "epoch": 6.94,
      "learning_rate": 0.001,
      "loss": 2.8462,
      "step": 36144
    },
    {
      "epoch": 6.94,
      "learning_rate": 0.001,
      "loss": 2.85,
      "step": 36156
    },
    {
      "epoch": 6.94,
      "learning_rate": 0.001,
      "loss": 2.8524,
      "step": 36168
    },
    {
      "epoch": 6.95,
      "learning_rate": 0.001,
      "loss": 2.843,
      "step": 36180
    },
    {
      "epoch": 6.95,
      "learning_rate": 0.001,
      "loss": 2.8532,
      "step": 36192
    },
    {
      "epoch": 6.95,
      "learning_rate": 0.001,
      "loss": 2.8448,
      "step": 36204
    },
    {
      "epoch": 6.95,
      "learning_rate": 0.001,
      "loss": 2.8499,
      "step": 36216
    },
    {
      "epoch": 6.96,
      "learning_rate": 0.001,
      "loss": 2.8455,
      "step": 36228
    },
    {
      "epoch": 6.96,
      "learning_rate": 0.001,
      "loss": 2.8371,
      "step": 36240
    },
    {
      "epoch": 6.96,
      "eval_ag_news_accuracy": 0.29059375,
      "eval_ag_news_bleu_score": 4.001013091157865,
      "eval_ag_news_bleu_score_sem": 0.14201708960718484,
      "eval_ag_news_emb_cos_sim": 0.7538927793502808,
      "eval_ag_news_emb_cos_sim_sem": 0.008352503728662555,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.900416612625122,
      "eval_ag_news_n_ngrams_match_1": 12.128,
      "eval_ag_news_n_ngrams_match_2": 2.406,
      "eval_ag_news_n_ngrams_match_3": 0.624,
      "eval_ag_news_num_pred_words": 46.148,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 49.42303507742963,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.2973890165050359,
      "eval_ag_news_runtime": 10.7502,
      "eval_ag_news_samples_per_second": 46.511,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.3108292272235678,
      "eval_ag_news_token_set_f1_sem": 0.004354173725518134,
      "eval_ag_news_token_set_precision": 0.28546916876106476,
      "eval_ag_news_token_set_recall": 0.35694876421234406,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 36250
    },
    {
      "epoch": 6.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.099625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.4159594631973844,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10111036844234905,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6089364886283875,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010003519591201774,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.583393096923828,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.14,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.338,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.414,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.616,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 35.99546998987554,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1792649419521139,
      "eval_anthropic_toxic_prompts_runtime": 10.3329,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.389,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3109371902762712,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006244863566339072,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3576372670219784,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31091232640093097,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 36250
    },
    {
      "epoch": 6.96,
      "eval_arxiv_accuracy": 0.3146875,
      "eval_arxiv_bleu_score": 3.6703718121870126,
      "eval_arxiv_bleu_score_sem": 0.10700140531860355,
      "eval_arxiv_emb_cos_sim": 0.6836249828338623,
      "eval_arxiv_emb_cos_sim_sem": 0.008382395722424399,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.7564890384674072,
      "eval_arxiv_n_ngrams_match_1": 12.984,
      "eval_arxiv_n_ngrams_match_2": 2.346,
      "eval_arxiv_n_ngrams_match_3": 0.5,
      "eval_arxiv_num_pred_words": 40.41,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 42.7979001071644,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.30671241646693637,
      "eval_arxiv_runtime": 10.5603,
      "eval_arxiv_samples_per_second": 47.347,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.30646650884920795,
      "eval_arxiv_token_set_f1_sem": 0.003758961329871487,
      "eval_arxiv_token_set_precision": 0.24882269285300268,
      "eval_arxiv_token_set_recall": 0.4172174684794033,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 36250
    },
    {
      "epoch": 6.96,
      "eval_python_code_alpaca_accuracy": 0.14065625,
      "eval_python_code_alpaca_bleu_score": 3.2848233900059314,
      "eval_python_code_alpaca_bleu_score_sem": 0.10176746688337283,
      "eval_python_code_alpaca_emb_cos_sim": 0.6695069074630737,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00981511593436849,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.2280962467193604,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.768,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.9,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.502,
      "eval_python_code_alpaca_num_pred_words": 42.712,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 25.231576522248368,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2609131432789731,
      "eval_python_code_alpaca_runtime": 9.9834,
      "eval_python_code_alpaca_samples_per_second": 50.083,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.40384864133353643,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005792776192029661,
      "eval_python_code_alpaca_token_set_precision": 0.4170913553167784,
      "eval_python_code_alpaca_token_set_recall": 0.4216073861025638,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 36250
    },
    {
      "epoch": 6.96,
      "eval_wikibio_accuracy": 0.29371875,
      "eval_wikibio_bleu_score": 5.271636939003589,
      "eval_wikibio_bleu_score_sem": 0.19323567576436282,
      "eval_wikibio_emb_cos_sim": 0.7105895280838013,
      "eval_wikibio_emb_cos_sim_sem": 0.008956441067498839,
      "eval_wikibio_emb_top1_equal": 0.109375,
      "eval_wikibio_emb_top1_equal_sem": 0.027695207821224692,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.201203346252441,
      "eval_wikibio_n_ngrams_match_1": 9.732,
      "eval_wikibio_n_ngrams_match_2": 3.082,
      "eval_wikibio_n_ngrams_match_3": 1.07,
      "eval_wikibio_num_pred_words": 37.694,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 66.76662608915578,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32668907550985005,
      "eval_wikibio_runtime": 10.9455,
      "eval_wikibio_samples_per_second": 45.681,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.3046040829945229,
      "eval_wikibio_token_set_f1_sem": 0.0052864212939238405,
      "eval_wikibio_token_set_precision": 0.3145774256119936,
      "eval_wikibio_token_set_recall": 0.3093503197085187,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 36250
    },
    {
      "epoch": 6.96,
      "eval_nq_accuracy": 0.4876875,
      "eval_nq_bleu_score": 9.48210526660597,
      "eval_nq_bleu_score_sem": 0.40051845469570535,
      "eval_nq_emb_cos_sim": 0.7906562089920044,
      "eval_nq_emb_cos_sim_sem": 0.00784313667389513,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4820315837860107,
      "eval_nq_n_ngrams_match_1": 21.25,
      "eval_nq_n_ngrams_match_2": 7.096,
      "eval_nq_n_ngrams_match_3": 3.04,
      "eval_nq_num_pred_words": 48.962,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.965548756506747,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4038071437912738,
      "eval_nq_runtime": 11.5124,
      "eval_nq_samples_per_second": 43.432,
      "eval_nq_steps_per_second": 0.087,
      "eval_nq_token_set_f1": 0.42580213402058825,
      "eval_nq_token_set_f1_sem": 0.00475173811494621,
      "eval_nq_token_set_precision": 0.37531346390125303,
      "eval_nq_token_set_recall": 0.5037030093758051,
      "eval_nq_true_num_tokens": 64.0,
      "step": 36250
    },
    {
      "epoch": 6.96,
      "learning_rate": 0.001,
      "loss": 2.8489,
      "step": 36252
    },
    {
      "epoch": 6.96,
      "learning_rate": 0.001,
      "loss": 2.8478,
      "step": 36264
    },
    {
      "epoch": 6.97,
      "learning_rate": 0.001,
      "loss": 2.8597,
      "step": 36276
    },
    {
      "epoch": 6.97,
      "learning_rate": 0.001,
      "loss": 2.8406,
      "step": 36288
    },
    {
      "epoch": 6.97,
      "learning_rate": 0.001,
      "loss": 2.8589,
      "step": 36300
    },
    {
      "epoch": 6.97,
      "learning_rate": 0.001,
      "loss": 2.8369,
      "step": 36312
    },
    {
      "epoch": 6.97,
      "learning_rate": 0.001,
      "loss": 2.8513,
      "step": 36324
    },
    {
      "epoch": 6.98,
      "learning_rate": 0.001,
      "loss": 2.8321,
      "step": 36336
    },
    {
      "epoch": 6.98,
      "learning_rate": 0.001,
      "loss": 2.8498,
      "step": 36348
    },
    {
      "epoch": 6.98,
      "learning_rate": 0.001,
      "loss": 2.8382,
      "step": 36360
    },
    {
      "epoch": 6.98,
      "learning_rate": 0.001,
      "loss": 2.8383,
      "step": 36372
    },
    {
      "epoch": 6.99,
      "learning_rate": 0.001,
      "loss": 2.8456,
      "step": 36384
    },
    {
      "epoch": 6.99,
      "learning_rate": 0.001,
      "loss": 2.8407,
      "step": 36396
    },
    {
      "epoch": 6.99,
      "learning_rate": 0.001,
      "loss": 2.8485,
      "step": 36408
    },
    {
      "epoch": 6.99,
      "learning_rate": 0.001,
      "loss": 2.8449,
      "step": 36420
    },
    {
      "epoch": 7.0,
      "learning_rate": 0.001,
      "loss": 2.853,
      "step": 36432
    },
    {
      "epoch": 7.0,
      "learning_rate": 0.001,
      "loss": 2.8466,
      "step": 36444
    },
    {
      "epoch": 7.0,
      "learning_rate": 0.001,
      "loss": 2.8484,
      "step": 36456
    },
    {
      "epoch": 7.0,
      "learning_rate": 0.001,
      "loss": 2.843,
      "step": 36468
    },
    {
      "epoch": 7.0,
      "learning_rate": 0.001,
      "loss": 2.8334,
      "step": 36480
    },
    {
      "epoch": 7.01,
      "learning_rate": 0.001,
      "loss": 2.835,
      "step": 36492
    },
    {
      "epoch": 7.01,
      "learning_rate": 0.001,
      "loss": 2.8219,
      "step": 36504
    },
    {
      "epoch": 7.01,
      "learning_rate": 0.001,
      "loss": 2.8247,
      "step": 36516
    },
    {
      "epoch": 7.01,
      "learning_rate": 0.001,
      "loss": 2.8225,
      "step": 36528
    },
    {
      "epoch": 7.02,
      "learning_rate": 0.001,
      "loss": 2.817,
      "step": 36540
    },
    {
      "epoch": 7.02,
      "learning_rate": 0.001,
      "loss": 2.8337,
      "step": 36552
    },
    {
      "epoch": 7.02,
      "learning_rate": 0.001,
      "loss": 2.8255,
      "step": 36564
    },
    {
      "epoch": 7.02,
      "learning_rate": 0.001,
      "loss": 2.8304,
      "step": 36576
    },
    {
      "epoch": 7.03,
      "learning_rate": 0.001,
      "loss": 2.816,
      "step": 36588
    },
    {
      "epoch": 7.03,
      "learning_rate": 0.001,
      "loss": 2.8233,
      "step": 36600
    },
    {
      "epoch": 7.03,
      "learning_rate": 0.001,
      "loss": 2.8291,
      "step": 36612
    },
    {
      "epoch": 7.03,
      "learning_rate": 0.001,
      "loss": 2.83,
      "step": 36624
    },
    {
      "epoch": 7.03,
      "learning_rate": 0.001,
      "loss": 2.8223,
      "step": 36636
    },
    {
      "epoch": 7.04,
      "learning_rate": 0.001,
      "loss": 2.8189,
      "step": 36648
    },
    {
      "epoch": 7.04,
      "learning_rate": 0.001,
      "loss": 2.8093,
      "step": 36660
    },
    {
      "epoch": 7.04,
      "learning_rate": 0.001,
      "loss": 2.8338,
      "step": 36672
    },
    {
      "epoch": 7.04,
      "learning_rate": 0.001,
      "loss": 2.8249,
      "step": 36684
    },
    {
      "epoch": 7.05,
      "learning_rate": 0.001,
      "loss": 2.8358,
      "step": 36696
    },
    {
      "epoch": 7.05,
      "learning_rate": 0.001,
      "loss": 2.8367,
      "step": 36708
    },
    {
      "epoch": 7.05,
      "learning_rate": 0.001,
      "loss": 2.8302,
      "step": 36720
    },
    {
      "epoch": 7.05,
      "learning_rate": 0.001,
      "loss": 2.8186,
      "step": 36732
    },
    {
      "epoch": 7.06,
      "learning_rate": 0.001,
      "loss": 2.832,
      "step": 36744
    },
    {
      "epoch": 7.06,
      "learning_rate": 0.001,
      "loss": 2.8113,
      "step": 36756
    },
    {
      "epoch": 7.06,
      "learning_rate": 0.001,
      "loss": 2.8182,
      "step": 36768
    },
    {
      "epoch": 7.06,
      "learning_rate": 0.001,
      "loss": 2.8249,
      "step": 36780
    },
    {
      "epoch": 7.06,
      "learning_rate": 0.001,
      "loss": 2.8294,
      "step": 36792
    },
    {
      "epoch": 7.07,
      "learning_rate": 0.001,
      "loss": 2.8356,
      "step": 36804
    },
    {
      "epoch": 7.07,
      "learning_rate": 0.001,
      "loss": 2.8292,
      "step": 36816
    },
    {
      "epoch": 7.07,
      "learning_rate": 0.001,
      "loss": 2.8312,
      "step": 36828
    },
    {
      "epoch": 7.07,
      "learning_rate": 0.001,
      "loss": 2.8321,
      "step": 36840
    },
    {
      "epoch": 7.08,
      "learning_rate": 0.001,
      "loss": 2.8241,
      "step": 36852
    },
    {
      "epoch": 7.08,
      "learning_rate": 0.001,
      "loss": 2.8207,
      "step": 36864
    },
    {
      "epoch": 7.08,
      "eval_ag_news_accuracy": 0.29071875,
      "eval_ag_news_bleu_score": 4.141965842411604,
      "eval_ag_news_bleu_score_sem": 0.14642807861497975,
      "eval_ag_news_emb_cos_sim": 0.7600589990615845,
      "eval_ag_news_emb_cos_sim_sem": 0.008231787910537766,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.883575677871704,
      "eval_ag_news_n_ngrams_match_1": 12.31,
      "eval_ag_news_n_ngrams_match_2": 2.478,
      "eval_ag_news_n_ngrams_match_3": 0.67,
      "eval_ag_news_num_pred_words": 45.432,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 48.59767439815544,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3025313278235161,
      "eval_ag_news_runtime": 10.9462,
      "eval_ag_news_samples_per_second": 45.678,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.311197292571738,
      "eval_ag_news_token_set_f1_sem": 0.004346622013970811,
      "eval_ag_news_token_set_precision": 0.2888890686972827,
      "eval_ag_news_token_set_recall": 0.3517108184800994,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 36875
    },
    {
      "epoch": 7.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.1015,
      "eval_anthropic_toxic_prompts_bleu_score": 2.5673783394611256,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09869631468894487,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6269517540931702,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00992886501014333,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.581580877304077,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.342,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.474,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.482,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.636,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 35.930297364329405,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1844880856392122,
      "eval_anthropic_toxic_prompts_runtime": 10.6424,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.982,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.31599435498013273,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006232520767646726,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.37148702586973925,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.30336107624244457,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 36875
    },
    {
      "epoch": 7.08,
      "eval_arxiv_accuracy": 0.31634375,
      "eval_arxiv_bleu_score": 3.61060866537974,
      "eval_arxiv_bleu_score_sem": 0.10466893275717792,
      "eval_arxiv_emb_cos_sim": 0.6754040718078613,
      "eval_arxiv_emb_cos_sim_sem": 0.00744851836546566,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.7616000175476074,
      "eval_arxiv_n_ngrams_match_1": 12.778,
      "eval_arxiv_n_ngrams_match_2": 2.378,
      "eval_arxiv_n_ngrams_match_3": 0.464,
      "eval_arxiv_num_pred_words": 39.126,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 43.017199218494405,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.30578230002592977,
      "eval_arxiv_runtime": 10.2476,
      "eval_arxiv_samples_per_second": 48.792,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.30199824803188574,
      "eval_arxiv_token_set_f1_sem": 0.0037763345145071857,
      "eval_arxiv_token_set_precision": 0.24375933952164122,
      "eval_arxiv_token_set_recall": 0.41768886025886237,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 36875
    },
    {
      "epoch": 7.08,
      "eval_python_code_alpaca_accuracy": 0.1393125,
      "eval_python_code_alpaca_bleu_score": 3.4882156948923555,
      "eval_python_code_alpaca_bleu_score_sem": 0.11563514206491479,
      "eval_python_code_alpaca_emb_cos_sim": 0.6675550937652588,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010578084527893014,
      "eval_python_code_alpaca_emb_top1_equal": 0.0546875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.020175758285348722,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.244743585586548,
      "eval_python_code_alpaca_n_ngrams_match_1": 7.986,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.942,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.514,
      "eval_python_code_alpaca_num_pred_words": 41.336,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 25.65513087143046,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.27403475195271904,
      "eval_python_code_alpaca_runtime": 10.9427,
      "eval_python_code_alpaca_samples_per_second": 45.692,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.40519768568179015,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0057966975821193995,
      "eval_python_code_alpaca_token_set_precision": 0.4252213515985516,
      "eval_python_code_alpaca_token_set_recall": 0.41795260782813726,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 36875
    },
    {
      "epoch": 7.08,
      "eval_wikibio_accuracy": 0.29296875,
      "eval_wikibio_bleu_score": 5.199053149502148,
      "eval_wikibio_bleu_score_sem": 0.18898659375276783,
      "eval_wikibio_emb_cos_sim": 0.7024262547492981,
      "eval_wikibio_emb_cos_sim_sem": 0.010916673211038804,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.163374423980713,
      "eval_wikibio_n_ngrams_match_1": 9.754,
      "eval_wikibio_n_ngrams_match_2": 3.068,
      "eval_wikibio_n_ngrams_match_3": 1.046,
      "eval_wikibio_num_pred_words": 37.276,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 64.28809227573059,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3276041095901913,
      "eval_wikibio_runtime": 10.0714,
      "eval_wikibio_samples_per_second": 49.645,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.30448842046652186,
      "eval_wikibio_token_set_f1_sem": 0.005315955675238802,
      "eval_wikibio_token_set_precision": 0.31493274930481996,
      "eval_wikibio_token_set_recall": 0.30684122258675334,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 36875
    },
    {
      "epoch": 7.08,
      "eval_nq_accuracy": 0.48971875,
      "eval_nq_bleu_score": 9.495678587851165,
      "eval_nq_bleu_score_sem": 0.4259901616668921,
      "eval_nq_emb_cos_sim": 0.7842116951942444,
      "eval_nq_emb_cos_sim_sem": 0.008655918232547972,
      "eval_nq_emb_top1_equal": 0.203125,
      "eval_nq_emb_top1_equal_sem": 0.03570055125142555,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.469813823699951,
      "eval_nq_n_ngrams_match_1": 20.948,
      "eval_nq_n_ngrams_match_2": 7.142,
      "eval_nq_n_ngrams_match_3": 3.06,
      "eval_nq_num_pred_words": 47.976,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.820245997113831,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4023191493701264,
      "eval_nq_runtime": 10.9667,
      "eval_nq_samples_per_second": 45.593,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4214398649034764,
      "eval_nq_token_set_f1_sem": 0.0049624740970446975,
      "eval_nq_token_set_precision": 0.3724223657562727,
      "eval_nq_token_set_recall": 0.5000541047100108,
      "eval_nq_true_num_tokens": 64.0,
      "step": 36875
    },
    {
      "epoch": 7.08,
      "learning_rate": 0.001,
      "loss": 2.8229,
      "step": 36876
    },
    {
      "epoch": 7.08,
      "learning_rate": 0.001,
      "loss": 2.8334,
      "step": 36888
    },
    {
      "epoch": 7.09,
      "learning_rate": 0.001,
      "loss": 2.8293,
      "step": 36900
    },
    {
      "epoch": 7.09,
      "learning_rate": 0.001,
      "loss": 2.8382,
      "step": 36912
    },
    {
      "epoch": 7.09,
      "learning_rate": 0.001,
      "loss": 2.8266,
      "step": 36924
    },
    {
      "epoch": 7.09,
      "learning_rate": 0.001,
      "loss": 2.8239,
      "step": 36936
    },
    {
      "epoch": 7.09,
      "learning_rate": 0.001,
      "loss": 2.8294,
      "step": 36948
    },
    {
      "epoch": 7.1,
      "learning_rate": 0.001,
      "loss": 2.8258,
      "step": 36960
    },
    {
      "epoch": 7.1,
      "learning_rate": 0.001,
      "loss": 2.8331,
      "step": 36972
    },
    {
      "epoch": 7.1,
      "learning_rate": 0.001,
      "loss": 2.8309,
      "step": 36984
    },
    {
      "epoch": 7.1,
      "learning_rate": 0.001,
      "loss": 2.8184,
      "step": 36996
    },
    {
      "epoch": 7.11,
      "learning_rate": 0.001,
      "loss": 2.8206,
      "step": 37008
    },
    {
      "epoch": 7.11,
      "learning_rate": 0.001,
      "loss": 2.8378,
      "step": 37020
    },
    {
      "epoch": 7.11,
      "learning_rate": 0.001,
      "loss": 2.8309,
      "step": 37032
    },
    {
      "epoch": 7.11,
      "learning_rate": 0.001,
      "loss": 2.8212,
      "step": 37044
    },
    {
      "epoch": 7.12,
      "learning_rate": 0.001,
      "loss": 2.8287,
      "step": 37056
    },
    {
      "epoch": 7.12,
      "learning_rate": 0.001,
      "loss": 2.8362,
      "step": 37068
    },
    {
      "epoch": 7.12,
      "learning_rate": 0.001,
      "loss": 2.8264,
      "step": 37080
    },
    {
      "epoch": 7.12,
      "learning_rate": 0.001,
      "loss": 2.8358,
      "step": 37092
    },
    {
      "epoch": 7.12,
      "learning_rate": 0.001,
      "loss": 2.8278,
      "step": 37104
    },
    {
      "epoch": 7.13,
      "learning_rate": 0.001,
      "loss": 2.8257,
      "step": 37116
    },
    {
      "epoch": 7.13,
      "learning_rate": 0.001,
      "loss": 2.8351,
      "step": 37128
    },
    {
      "epoch": 7.13,
      "learning_rate": 0.001,
      "loss": 2.8303,
      "step": 37140
    },
    {
      "epoch": 7.13,
      "learning_rate": 0.001,
      "loss": 2.8269,
      "step": 37152
    },
    {
      "epoch": 7.14,
      "learning_rate": 0.001,
      "loss": 2.8345,
      "step": 37164
    },
    {
      "epoch": 7.14,
      "learning_rate": 0.001,
      "loss": 2.8335,
      "step": 37176
    },
    {
      "epoch": 7.14,
      "learning_rate": 0.001,
      "loss": 2.834,
      "step": 37188
    },
    {
      "epoch": 7.14,
      "learning_rate": 0.001,
      "loss": 2.824,
      "step": 37200
    },
    {
      "epoch": 7.15,
      "learning_rate": 0.001,
      "loss": 2.8216,
      "step": 37212
    },
    {
      "epoch": 7.15,
      "learning_rate": 0.001,
      "loss": 2.8312,
      "step": 37224
    },
    {
      "epoch": 7.15,
      "learning_rate": 0.001,
      "loss": 2.8302,
      "step": 37236
    },
    {
      "epoch": 7.15,
      "learning_rate": 0.001,
      "loss": 2.83,
      "step": 37248
    },
    {
      "epoch": 7.15,
      "learning_rate": 0.001,
      "loss": 2.8333,
      "step": 37260
    },
    {
      "epoch": 7.16,
      "learning_rate": 0.001,
      "loss": 2.8228,
      "step": 37272
    },
    {
      "epoch": 7.16,
      "learning_rate": 0.001,
      "loss": 2.8243,
      "step": 37284
    },
    {
      "epoch": 7.16,
      "learning_rate": 0.001,
      "loss": 2.8237,
      "step": 37296
    },
    {
      "epoch": 7.16,
      "learning_rate": 0.001,
      "loss": 2.8381,
      "step": 37308
    },
    {
      "epoch": 7.17,
      "learning_rate": 0.001,
      "loss": 2.8253,
      "step": 37320
    },
    {
      "epoch": 7.17,
      "learning_rate": 0.001,
      "loss": 2.8203,
      "step": 37332
    },
    {
      "epoch": 7.17,
      "learning_rate": 0.001,
      "loss": 2.8171,
      "step": 37344
    },
    {
      "epoch": 7.17,
      "learning_rate": 0.001,
      "loss": 2.8216,
      "step": 37356
    },
    {
      "epoch": 7.18,
      "learning_rate": 0.001,
      "loss": 2.8243,
      "step": 37368
    },
    {
      "epoch": 7.18,
      "learning_rate": 0.001,
      "loss": 2.8259,
      "step": 37380
    },
    {
      "epoch": 7.18,
      "learning_rate": 0.001,
      "loss": 2.8358,
      "step": 37392
    },
    {
      "epoch": 7.18,
      "learning_rate": 0.001,
      "loss": 2.8213,
      "step": 37404
    },
    {
      "epoch": 7.18,
      "learning_rate": 0.001,
      "loss": 2.8266,
      "step": 37416
    },
    {
      "epoch": 7.19,
      "learning_rate": 0.001,
      "loss": 2.8401,
      "step": 37428
    },
    {
      "epoch": 7.19,
      "learning_rate": 0.001,
      "loss": 2.8282,
      "step": 37440
    },
    {
      "epoch": 7.19,
      "learning_rate": 0.001,
      "loss": 2.8371,
      "step": 37452
    },
    {
      "epoch": 7.19,
      "learning_rate": 0.001,
      "loss": 2.836,
      "step": 37464
    },
    {
      "epoch": 7.2,
      "learning_rate": 0.001,
      "loss": 2.823,
      "step": 37476
    },
    {
      "epoch": 7.2,
      "learning_rate": 0.001,
      "loss": 2.8384,
      "step": 37488
    },
    {
      "epoch": 7.2,
      "learning_rate": 0.001,
      "loss": 2.8208,
      "step": 37500
    },
    {
      "epoch": 7.2,
      "eval_ag_news_accuracy": 0.291875,
      "eval_ag_news_bleu_score": 4.075560197578808,
      "eval_ag_news_bleu_score_sem": 0.14055279167100063,
      "eval_ag_news_emb_cos_sim": 0.7409658432006836,
      "eval_ag_news_emb_cos_sim_sem": 0.009640743226822287,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.881091833114624,
      "eval_ag_news_n_ngrams_match_1": 12.298,
      "eval_ag_news_n_ngrams_match_2": 2.484,
      "eval_ag_news_n_ngrams_match_3": 0.682,
      "eval_ag_news_num_pred_words": 45.786,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 48.47711510665973,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.30378170198314647,
      "eval_ag_news_runtime": 12.221,
      "eval_ag_news_samples_per_second": 40.913,
      "eval_ag_news_steps_per_second": 0.082,
      "eval_ag_news_token_set_f1": 0.3154912392593799,
      "eval_ag_news_token_set_f1_sem": 0.004561078227661068,
      "eval_ag_news_token_set_precision": 0.2903076069345732,
      "eval_ag_news_token_set_recall": 0.3611364481283025,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 37500
    },
    {
      "epoch": 7.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.10059375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.58843635674327,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10779527682021862,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6182723045349121,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009823784355727068,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.567587375640869,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.392,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.464,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.51,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.234,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 35.431008237475915,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1856435062955619,
      "eval_anthropic_toxic_prompts_runtime": 10.3726,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.204,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3244347198169251,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006269069725487458,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.37710394785701207,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3156356428491323,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 37500
    },
    {
      "epoch": 7.2,
      "eval_arxiv_accuracy": 0.31871875,
      "eval_arxiv_bleu_score": 3.564613700077253,
      "eval_arxiv_bleu_score_sem": 0.09531050104188801,
      "eval_arxiv_emb_cos_sim": 0.6837230324745178,
      "eval_arxiv_emb_cos_sim_sem": 0.00811848796211737,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.7428550720214844,
      "eval_arxiv_n_ngrams_match_1": 12.93,
      "eval_arxiv_n_ngrams_match_2": 2.36,
      "eval_arxiv_n_ngrams_match_3": 0.45,
      "eval_arxiv_num_pred_words": 39.578,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 42.21835470182647,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.30794959133345395,
      "eval_arxiv_runtime": 10.6342,
      "eval_arxiv_samples_per_second": 47.018,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.3062157910336005,
      "eval_arxiv_token_set_f1_sem": 0.003950539903797717,
      "eval_arxiv_token_set_precision": 0.24876342332733564,
      "eval_arxiv_token_set_recall": 0.4193676679084893,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 37500
    },
    {
      "epoch": 7.2,
      "eval_python_code_alpaca_accuracy": 0.14075,
      "eval_python_code_alpaca_bleu_score": 3.419494453444551,
      "eval_python_code_alpaca_bleu_score_sem": 0.10507328139052009,
      "eval_python_code_alpaca_emb_cos_sim": 0.6705363988876343,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010630884650909543,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.2194786071777344,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.27,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.044,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.564,
      "eval_python_code_alpaca_num_pred_words": 43.53,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 25.015074100482465,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2729404149874883,
      "eval_python_code_alpaca_runtime": 10.1377,
      "eval_python_code_alpaca_samples_per_second": 49.321,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.41605554277387596,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005467879657901665,
      "eval_python_code_alpaca_token_set_precision": 0.4406255824141954,
      "eval_python_code_alpaca_token_set_recall": 0.41881482072072057,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 37500
    },
    {
      "epoch": 7.2,
      "eval_wikibio_accuracy": 0.29240625,
      "eval_wikibio_bleu_score": 5.189507654660742,
      "eval_wikibio_bleu_score_sem": 0.1837294166051759,
      "eval_wikibio_emb_cos_sim": 0.7011621594429016,
      "eval_wikibio_emb_cos_sim_sem": 0.010774044324410892,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.191478729248047,
      "eval_wikibio_n_ngrams_match_1": 9.692,
      "eval_wikibio_n_ngrams_match_2": 3.042,
      "eval_wikibio_n_ngrams_match_3": 1.028,
      "eval_wikibio_num_pred_words": 37.5,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 66.1204930120782,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32669898414904897,
      "eval_wikibio_runtime": 10.4597,
      "eval_wikibio_samples_per_second": 47.802,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.3080096988415167,
      "eval_wikibio_token_set_f1_sem": 0.005330793027963762,
      "eval_wikibio_token_set_precision": 0.3151688113428011,
      "eval_wikibio_token_set_recall": 0.3171309092433026,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 37500
    },
    {
      "epoch": 7.2,
      "eval_nq_accuracy": 0.49121875,
      "eval_nq_bleu_score": 9.849262161468808,
      "eval_nq_bleu_score_sem": 0.40282802508063553,
      "eval_nq_emb_cos_sim": 0.7896238565444946,
      "eval_nq_emb_cos_sim_sem": 0.008385892593761787,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4660420417785645,
      "eval_nq_n_ngrams_match_1": 21.44,
      "eval_nq_n_ngrams_match_2": 7.312,
      "eval_nq_n_ngrams_match_3": 3.166,
      "eval_nq_num_pred_words": 48.942,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.775746580757938,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.40923723675163604,
      "eval_nq_runtime": 10.8654,
      "eval_nq_samples_per_second": 46.018,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.4318693510646818,
      "eval_nq_token_set_f1_sem": 0.004863742989369321,
      "eval_nq_token_set_precision": 0.3807189451573697,
      "eval_nq_token_set_recall": 0.5099779210025704,
      "eval_nq_true_num_tokens": 64.0,
      "step": 37500
    },
    {
      "epoch": 7.2,
      "learning_rate": 0.001,
      "loss": 2.8273,
      "step": 37512
    },
    {
      "epoch": 7.21,
      "learning_rate": 0.001,
      "loss": 2.8239,
      "step": 37524
    },
    {
      "epoch": 7.21,
      "learning_rate": 0.001,
      "loss": 2.8333,
      "step": 37536
    },
    {
      "epoch": 7.21,
      "learning_rate": 0.001,
      "loss": 2.8263,
      "step": 37548
    },
    {
      "epoch": 7.21,
      "learning_rate": 0.001,
      "loss": 2.829,
      "step": 37560
    },
    {
      "epoch": 7.21,
      "learning_rate": 0.001,
      "loss": 2.8368,
      "step": 37572
    },
    {
      "epoch": 7.22,
      "learning_rate": 0.001,
      "loss": 2.8205,
      "step": 37584
    },
    {
      "epoch": 7.22,
      "learning_rate": 0.001,
      "loss": 2.8289,
      "step": 37596
    },
    {
      "epoch": 7.22,
      "learning_rate": 0.001,
      "loss": 2.8197,
      "step": 37608
    },
    {
      "epoch": 7.22,
      "learning_rate": 0.001,
      "loss": 2.8303,
      "step": 37620
    },
    {
      "epoch": 7.23,
      "learning_rate": 0.001,
      "loss": 2.8327,
      "step": 37632
    },
    {
      "epoch": 7.23,
      "learning_rate": 0.001,
      "loss": 2.8223,
      "step": 37644
    },
    {
      "epoch": 7.23,
      "learning_rate": 0.001,
      "loss": 2.8316,
      "step": 37656
    },
    {
      "epoch": 7.23,
      "learning_rate": 0.001,
      "loss": 2.8262,
      "step": 37668
    },
    {
      "epoch": 7.24,
      "learning_rate": 0.001,
      "loss": 2.8349,
      "step": 37680
    },
    {
      "epoch": 7.24,
      "learning_rate": 0.001,
      "loss": 2.8337,
      "step": 37692
    },
    {
      "epoch": 7.24,
      "learning_rate": 0.001,
      "loss": 2.8323,
      "step": 37704
    },
    {
      "epoch": 7.24,
      "learning_rate": 0.001,
      "loss": 2.823,
      "step": 37716
    },
    {
      "epoch": 7.24,
      "learning_rate": 0.001,
      "loss": 2.8305,
      "step": 37728
    },
    {
      "epoch": 7.25,
      "learning_rate": 0.001,
      "loss": 2.8321,
      "step": 37740
    },
    {
      "epoch": 7.25,
      "learning_rate": 0.001,
      "loss": 2.8383,
      "step": 37752
    },
    {
      "epoch": 7.25,
      "learning_rate": 0.001,
      "loss": 2.8262,
      "step": 37764
    },
    {
      "epoch": 7.25,
      "learning_rate": 0.001,
      "loss": 2.8321,
      "step": 37776
    },
    {
      "epoch": 7.26,
      "learning_rate": 0.001,
      "loss": 2.828,
      "step": 37788
    },
    {
      "epoch": 7.26,
      "learning_rate": 0.001,
      "loss": 2.8239,
      "step": 37800
    },
    {
      "epoch": 7.26,
      "learning_rate": 0.001,
      "loss": 2.8257,
      "step": 37812
    },
    {
      "epoch": 7.26,
      "learning_rate": 0.001,
      "loss": 2.8249,
      "step": 37824
    },
    {
      "epoch": 7.26,
      "learning_rate": 0.001,
      "loss": 2.8192,
      "step": 37836
    },
    {
      "epoch": 7.27,
      "learning_rate": 0.001,
      "loss": 2.8276,
      "step": 37848
    },
    {
      "epoch": 7.27,
      "learning_rate": 0.001,
      "loss": 2.8194,
      "step": 37860
    },
    {
      "epoch": 7.27,
      "learning_rate": 0.001,
      "loss": 2.8247,
      "step": 37872
    },
    {
      "epoch": 7.27,
      "learning_rate": 0.001,
      "loss": 2.8227,
      "step": 37884
    },
    {
      "epoch": 7.28,
      "learning_rate": 0.001,
      "loss": 2.8239,
      "step": 37896
    },
    {
      "epoch": 7.28,
      "learning_rate": 0.001,
      "loss": 2.8291,
      "step": 37908
    },
    {
      "epoch": 7.28,
      "learning_rate": 0.001,
      "loss": 2.8142,
      "step": 37920
    },
    {
      "epoch": 7.28,
      "learning_rate": 0.001,
      "loss": 2.8255,
      "step": 37932
    },
    {
      "epoch": 7.29,
      "learning_rate": 0.001,
      "loss": 2.8271,
      "step": 37944
    },
    {
      "epoch": 7.29,
      "learning_rate": 0.001,
      "loss": 2.8185,
      "step": 37956
    },
    {
      "epoch": 7.29,
      "learning_rate": 0.001,
      "loss": 2.833,
      "step": 37968
    },
    {
      "epoch": 7.29,
      "learning_rate": 0.001,
      "loss": 2.8274,
      "step": 37980
    },
    {
      "epoch": 7.29,
      "learning_rate": 0.001,
      "loss": 2.8212,
      "step": 37992
    },
    {
      "epoch": 7.3,
      "learning_rate": 0.001,
      "loss": 2.8138,
      "step": 38004
    },
    {
      "epoch": 7.3,
      "learning_rate": 0.001,
      "loss": 2.8294,
      "step": 38016
    },
    {
      "epoch": 7.3,
      "learning_rate": 0.001,
      "loss": 2.8235,
      "step": 38028
    },
    {
      "epoch": 7.3,
      "learning_rate": 0.001,
      "loss": 2.8288,
      "step": 38040
    },
    {
      "epoch": 7.31,
      "learning_rate": 0.001,
      "loss": 2.8333,
      "step": 38052
    },
    {
      "epoch": 7.31,
      "learning_rate": 0.001,
      "loss": 2.8315,
      "step": 38064
    },
    {
      "epoch": 7.31,
      "learning_rate": 0.001,
      "loss": 2.8288,
      "step": 38076
    },
    {
      "epoch": 7.31,
      "learning_rate": 0.001,
      "loss": 2.8248,
      "step": 38088
    },
    {
      "epoch": 7.32,
      "learning_rate": 0.001,
      "loss": 2.8288,
      "step": 38100
    },
    {
      "epoch": 7.32,
      "learning_rate": 0.001,
      "loss": 2.8197,
      "step": 38112
    },
    {
      "epoch": 7.32,
      "learning_rate": 0.001,
      "loss": 2.8248,
      "step": 38124
    },
    {
      "epoch": 7.32,
      "eval_ag_news_accuracy": 0.291375,
      "eval_ag_news_bleu_score": 4.111927591650485,
      "eval_ag_news_bleu_score_sem": 0.13942959236006863,
      "eval_ag_news_emb_cos_sim": 0.7579980492591858,
      "eval_ag_news_emb_cos_sim_sem": 0.0078110662149008785,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8864200115203857,
      "eval_ag_news_n_ngrams_match_1": 12.37,
      "eval_ag_news_n_ngrams_match_2": 2.552,
      "eval_ag_news_n_ngrams_match_3": 0.724,
      "eval_ag_news_num_pred_words": 46.098,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 48.736099168485765,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.29970997289982537,
      "eval_ag_news_runtime": 10.6196,
      "eval_ag_news_samples_per_second": 47.083,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.3134195474069055,
      "eval_ag_news_token_set_f1_sem": 0.004421768378840445,
      "eval_ag_news_token_set_precision": 0.28808458414132176,
      "eval_ag_news_token_set_recall": 0.3613300297957331,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 38125
    },
    {
      "epoch": 7.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.10078125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.5285294528171263,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10437604083084251,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6197388768196106,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009800552222574452,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.56309175491333,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.286,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.432,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.488,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.19,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 35.27208136757842,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1833968358569168,
      "eval_anthropic_toxic_prompts_runtime": 10.2566,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.749,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3225245430978992,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063695929910607945,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3682159403156869,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31861268834352285,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 38125
    },
    {
      "epoch": 7.32,
      "eval_arxiv_accuracy": 0.31896875,
      "eval_arxiv_bleu_score": 3.6042015729327757,
      "eval_arxiv_bleu_score_sem": 0.10093453348680773,
      "eval_arxiv_emb_cos_sim": 0.684664249420166,
      "eval_arxiv_emb_cos_sim_sem": 0.008271490900379536,
      "eval_arxiv_emb_top1_equal": 0.203125,
      "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.73223614692688,
      "eval_arxiv_n_ngrams_match_1": 13.178,
      "eval_arxiv_n_ngrams_match_2": 2.372,
      "eval_arxiv_n_ngrams_match_3": 0.458,
      "eval_arxiv_num_pred_words": 40.458,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 41.77241305650989,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3081740223146453,
      "eval_arxiv_runtime": 10.3119,
      "eval_arxiv_samples_per_second": 48.488,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.30799579401648186,
      "eval_arxiv_token_set_f1_sem": 0.00398755371803012,
      "eval_arxiv_token_set_precision": 0.25273382676480266,
      "eval_arxiv_token_set_recall": 0.4171221620812675,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 38125
    },
    {
      "epoch": 7.32,
      "eval_python_code_alpaca_accuracy": 0.1390625,
      "eval_python_code_alpaca_bleu_score": 3.1865279393231867,
      "eval_python_code_alpaca_bleu_score_sem": 0.09784036125422817,
      "eval_python_code_alpaca_emb_cos_sim": 0.6805405020713806,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009657885798909584,
      "eval_python_code_alpaca_emb_top1_equal": 0.046875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.25486159324646,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.106,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.866,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.428,
      "eval_python_code_alpaca_num_pred_words": 44.13,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 25.916027332550556,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.271618347647289,
      "eval_python_code_alpaca_runtime": 10.7784,
      "eval_python_code_alpaca_samples_per_second": 46.389,
      "eval_python_code_alpaca_steps_per_second": 0.093,
      "eval_python_code_alpaca_token_set_f1": 0.4130636735073163,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005273878767899933,
      "eval_python_code_alpaca_token_set_precision": 0.43130803287374025,
      "eval_python_code_alpaca_token_set_recall": 0.4222864183185088,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 38125
    },
    {
      "epoch": 7.32,
      "eval_wikibio_accuracy": 0.29465625,
      "eval_wikibio_bleu_score": 5.271262261283782,
      "eval_wikibio_bleu_score_sem": 0.19646156731593353,
      "eval_wikibio_emb_cos_sim": 0.6982460021972656,
      "eval_wikibio_emb_cos_sim_sem": 0.010816347247970178,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.171474456787109,
      "eval_wikibio_n_ngrams_match_1": 9.568,
      "eval_wikibio_n_ngrams_match_2": 3.022,
      "eval_wikibio_n_ngrams_match_3": 1.098,
      "eval_wikibio_num_pred_words": 37.092,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 64.8109426260154,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3197709786753735,
      "eval_wikibio_runtime": 10.0987,
      "eval_wikibio_samples_per_second": 49.511,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.29769667041718517,
      "eval_wikibio_token_set_f1_sem": 0.00563261630225679,
      "eval_wikibio_token_set_precision": 0.30834025800554304,
      "eval_wikibio_token_set_recall": 0.3052756926770025,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 38125
    },
    {
      "epoch": 7.32,
      "eval_nq_accuracy": 0.49228125,
      "eval_nq_bleu_score": 9.778886937357722,
      "eval_nq_bleu_score_sem": 0.4131434524416253,
      "eval_nq_emb_cos_sim": 0.7817389965057373,
      "eval_nq_emb_cos_sim_sem": 0.008876008416927353,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4626686573028564,
      "eval_nq_n_ngrams_match_1": 21.194,
      "eval_nq_n_ngrams_match_2": 7.186,
      "eval_nq_n_ngrams_match_3": 3.146,
      "eval_nq_num_pred_words": 48.8,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.736089387140503,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.403009996745112,
      "eval_nq_runtime": 14.774,
      "eval_nq_samples_per_second": 33.843,
      "eval_nq_steps_per_second": 0.068,
      "eval_nq_token_set_f1": 0.4231008441320113,
      "eval_nq_token_set_f1_sem": 0.0050237176397528475,
      "eval_nq_token_set_precision": 0.3735959931421409,
      "eval_nq_token_set_recall": 0.5010985353875919,
      "eval_nq_true_num_tokens": 64.0,
      "step": 38125
    },
    {
      "epoch": 7.32,
      "learning_rate": 0.001,
      "loss": 2.8287,
      "step": 38136
    },
    {
      "epoch": 7.32,
      "learning_rate": 0.001,
      "loss": 2.832,
      "step": 38148
    },
    {
      "epoch": 7.33,
      "learning_rate": 0.001,
      "loss": 2.8315,
      "step": 38160
    },
    {
      "epoch": 7.33,
      "learning_rate": 0.001,
      "loss": 2.825,
      "step": 38172
    },
    {
      "epoch": 7.33,
      "learning_rate": 0.001,
      "loss": 2.8255,
      "step": 38184
    },
    {
      "epoch": 7.33,
      "learning_rate": 0.001,
      "loss": 2.8236,
      "step": 38196
    },
    {
      "epoch": 7.34,
      "learning_rate": 0.001,
      "loss": 2.8253,
      "step": 38208
    },
    {
      "epoch": 7.34,
      "learning_rate": 0.001,
      "loss": 2.8243,
      "step": 38220
    },
    {
      "epoch": 7.34,
      "learning_rate": 0.001,
      "loss": 2.8307,
      "step": 38232
    },
    {
      "epoch": 7.34,
      "learning_rate": 0.001,
      "loss": 2.8263,
      "step": 38244
    },
    {
      "epoch": 7.35,
      "learning_rate": 0.001,
      "loss": 2.832,
      "step": 38256
    },
    {
      "epoch": 7.35,
      "learning_rate": 0.001,
      "loss": 2.829,
      "step": 38268
    },
    {
      "epoch": 7.35,
      "learning_rate": 0.001,
      "loss": 2.807,
      "step": 38280
    },
    {
      "epoch": 7.35,
      "learning_rate": 0.001,
      "loss": 2.8258,
      "step": 38292
    },
    {
      "epoch": 7.35,
      "learning_rate": 0.001,
      "loss": 2.8217,
      "step": 38304
    },
    {
      "epoch": 7.36,
      "learning_rate": 0.001,
      "loss": 2.8187,
      "step": 38316
    },
    {
      "epoch": 7.36,
      "learning_rate": 0.001,
      "loss": 2.8341,
      "step": 38328
    },
    {
      "epoch": 7.36,
      "learning_rate": 0.001,
      "loss": 2.8234,
      "step": 38340
    },
    {
      "epoch": 7.36,
      "learning_rate": 0.001,
      "loss": 2.8202,
      "step": 38352
    },
    {
      "epoch": 7.37,
      "learning_rate": 0.001,
      "loss": 2.8275,
      "step": 38364
    },
    {
      "epoch": 7.37,
      "learning_rate": 0.001,
      "loss": 2.8347,
      "step": 38376
    },
    {
      "epoch": 7.37,
      "learning_rate": 0.001,
      "loss": 2.817,
      "step": 38388
    },
    {
      "epoch": 7.37,
      "learning_rate": 0.001,
      "loss": 2.8151,
      "step": 38400
    },
    {
      "epoch": 7.38,
      "learning_rate": 0.001,
      "loss": 2.826,
      "step": 38412
    },
    {
      "epoch": 7.38,
      "learning_rate": 0.001,
      "loss": 2.8238,
      "step": 38424
    },
    {
      "epoch": 7.38,
      "learning_rate": 0.001,
      "loss": 2.8125,
      "step": 38436
    },
    {
      "epoch": 7.38,
      "learning_rate": 0.001,
      "loss": 2.8216,
      "step": 38448
    },
    {
      "epoch": 7.38,
      "learning_rate": 0.001,
      "loss": 2.815,
      "step": 38460
    },
    {
      "epoch": 7.39,
      "learning_rate": 0.001,
      "loss": 2.825,
      "step": 38472
    },
    {
      "epoch": 7.39,
      "learning_rate": 0.001,
      "loss": 2.8262,
      "step": 38484
    },
    {
      "epoch": 7.39,
      "learning_rate": 0.001,
      "loss": 2.8249,
      "step": 38496
    },
    {
      "epoch": 7.39,
      "learning_rate": 0.001,
      "loss": 2.8236,
      "step": 38508
    },
    {
      "epoch": 7.4,
      "learning_rate": 0.001,
      "loss": 2.8132,
      "step": 38520
    },
    {
      "epoch": 7.4,
      "learning_rate": 0.001,
      "loss": 2.8217,
      "step": 38532
    },
    {
      "epoch": 7.4,
      "learning_rate": 0.001,
      "loss": 2.8201,
      "step": 38544
    },
    {
      "epoch": 7.4,
      "learning_rate": 0.001,
      "loss": 2.8233,
      "step": 38556
    },
    {
      "epoch": 7.41,
      "learning_rate": 0.001,
      "loss": 2.8216,
      "step": 38568
    },
    {
      "epoch": 7.41,
      "learning_rate": 0.001,
      "loss": 2.8185,
      "step": 38580
    },
    {
      "epoch": 7.41,
      "learning_rate": 0.001,
      "loss": 2.8185,
      "step": 38592
    },
    {
      "epoch": 7.41,
      "learning_rate": 0.001,
      "loss": 2.8179,
      "step": 38604
    },
    {
      "epoch": 7.41,
      "learning_rate": 0.001,
      "loss": 2.8314,
      "step": 38616
    },
    {
      "epoch": 7.42,
      "learning_rate": 0.001,
      "loss": 2.8191,
      "step": 38628
    },
    {
      "epoch": 7.42,
      "learning_rate": 0.001,
      "loss": 2.8225,
      "step": 38640
    },
    {
      "epoch": 7.42,
      "learning_rate": 0.001,
      "loss": 2.8214,
      "step": 38652
    },
    {
      "epoch": 7.42,
      "learning_rate": 0.001,
      "loss": 2.8172,
      "step": 38664
    },
    {
      "epoch": 7.43,
      "learning_rate": 0.001,
      "loss": 2.8142,
      "step": 38676
    },
    {
      "epoch": 7.43,
      "learning_rate": 0.001,
      "loss": 2.8199,
      "step": 38688
    },
    {
      "epoch": 7.43,
      "learning_rate": 0.001,
      "loss": 2.8232,
      "step": 38700
    },
    {
      "epoch": 7.43,
      "learning_rate": 0.001,
      "loss": 2.8322,
      "step": 38712
    },
    {
      "epoch": 7.44,
      "learning_rate": 0.001,
      "loss": 2.8291,
      "step": 38724
    },
    {
      "epoch": 7.44,
      "learning_rate": 0.001,
      "loss": 2.8145,
      "step": 38736
    },
    {
      "epoch": 7.44,
      "learning_rate": 0.001,
      "loss": 2.8183,
      "step": 38748
    },
    {
      "epoch": 7.44,
      "eval_ag_news_accuracy": 0.29221875,
      "eval_ag_news_bleu_score": 4.092322841649292,
      "eval_ag_news_bleu_score_sem": 0.13873710704880135,
      "eval_ag_news_emb_cos_sim": 0.7574640512466431,
      "eval_ag_news_emb_cos_sim_sem": 0.007962351010541716,
      "eval_ag_news_emb_top1_equal": 0.140625,
      "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8607640266418457,
      "eval_ag_news_n_ngrams_match_1": 12.348,
      "eval_ag_news_n_ngrams_match_2": 2.478,
      "eval_ag_news_n_ngrams_match_3": 0.676,
      "eval_ag_news_num_pred_words": 45.734,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 47.50163001902676,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3018411152657613,
      "eval_ag_news_runtime": 11.495,
      "eval_ag_news_samples_per_second": 43.497,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.3163764829339234,
      "eval_ag_news_token_set_f1_sem": 0.004349552993178556,
      "eval_ag_news_token_set_precision": 0.29036863620803255,
      "eval_ag_news_token_set_recall": 0.36311352940842073,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 38750
    },
    {
      "epoch": 7.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.10159375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.503779413713762,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09726843972739116,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6251869201660156,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009606774184011522,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.5444931983947754,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.338,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.418,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.452,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.476,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 34.62213434193815,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.18346216916206595,
      "eval_anthropic_toxic_prompts_runtime": 9.9524,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.239,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3175617659110513,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005847210547486618,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.37391020862000574,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3082319678316208,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 38750
    },
    {
      "epoch": 7.44,
      "eval_arxiv_accuracy": 0.31990625,
      "eval_arxiv_bleu_score": 3.6723109283666857,
      "eval_arxiv_bleu_score_sem": 0.10926994837007345,
      "eval_arxiv_emb_cos_sim": 0.683717668056488,
      "eval_arxiv_emb_cos_sim_sem": 0.008375941986246168,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.735908269882202,
      "eval_arxiv_n_ngrams_match_1": 13.108,
      "eval_arxiv_n_ngrams_match_2": 2.36,
      "eval_arxiv_n_ngrams_match_3": 0.466,
      "eval_arxiv_num_pred_words": 39.274,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 41.926088478229644,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.31073900886607725,
      "eval_arxiv_runtime": 10.6028,
      "eval_arxiv_samples_per_second": 47.158,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.31002259454764525,
      "eval_arxiv_token_set_f1_sem": 0.0040055636315141354,
      "eval_arxiv_token_set_precision": 0.253605436479157,
      "eval_arxiv_token_set_recall": 0.41870470996861786,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 38750
    },
    {
      "epoch": 7.44,
      "eval_python_code_alpaca_accuracy": 0.1410625,
      "eval_python_code_alpaca_bleu_score": 3.6544305731855826,
      "eval_python_code_alpaca_bleu_score_sem": 0.1204447519376356,
      "eval_python_code_alpaca_emb_cos_sim": 0.6821444034576416,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00996385084567442,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.227888822555542,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.356,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.092,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.558,
      "eval_python_code_alpaca_num_pred_words": 42.0,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 25.226343426340456,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2830427325951065,
      "eval_python_code_alpaca_runtime": 10.2567,
      "eval_python_code_alpaca_samples_per_second": 48.749,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.41815996751662404,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005375304110234739,
      "eval_python_code_alpaca_token_set_precision": 0.4452742185040244,
      "eval_python_code_alpaca_token_set_recall": 0.4220512123802343,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 38750
    },
    {
      "epoch": 7.44,
      "eval_wikibio_accuracy": 0.29215625,
      "eval_wikibio_bleu_score": 5.254225044681138,
      "eval_wikibio_bleu_score_sem": 0.179976243470723,
      "eval_wikibio_emb_cos_sim": 0.6872029304504395,
      "eval_wikibio_emb_cos_sim_sem": 0.010441273881993127,
      "eval_wikibio_emb_top1_equal": 0.09375,
      "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.18137264251709,
      "eval_wikibio_n_ngrams_match_1": 9.648,
      "eval_wikibio_n_ngrams_match_2": 3.068,
      "eval_wikibio_n_ngrams_match_3": 1.054,
      "eval_wikibio_num_pred_words": 36.922,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 65.45563877093632,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.325080923640121,
      "eval_wikibio_runtime": 10.5243,
      "eval_wikibio_samples_per_second": 47.509,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.3024434052551784,
      "eval_wikibio_token_set_f1_sem": 0.005322398063439081,
      "eval_wikibio_token_set_precision": 0.30908104158307004,
      "eval_wikibio_token_set_recall": 0.31218924436671847,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 38750
    },
    {
      "epoch": 7.44,
      "eval_nq_accuracy": 0.49053125,
      "eval_nq_bleu_score": 9.805626824020768,
      "eval_nq_bleu_score_sem": 0.4191953099360163,
      "eval_nq_emb_cos_sim": 0.7905921936035156,
      "eval_nq_emb_cos_sim_sem": 0.008200836977207087,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4598374366760254,
      "eval_nq_n_ngrams_match_1": 21.226,
      "eval_nq_n_ngrams_match_2": 7.172,
      "eval_nq_n_ngrams_match_3": 3.116,
      "eval_nq_num_pred_words": 48.53,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.702908921562596,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.40809499842489294,
      "eval_nq_runtime": 10.5915,
      "eval_nq_samples_per_second": 47.208,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.42651127835422487,
      "eval_nq_token_set_f1_sem": 0.004904623713934699,
      "eval_nq_token_set_precision": 0.3773730221591363,
      "eval_nq_token_set_recall": 0.5004069504882075,
      "eval_nq_true_num_tokens": 64.0,
      "step": 38750
    },
    {
      "epoch": 7.44,
      "learning_rate": 0.001,
      "loss": 2.8275,
      "step": 38760
    },
    {
      "epoch": 7.44,
      "learning_rate": 0.001,
      "loss": 2.8203,
      "step": 38772
    },
    {
      "epoch": 7.45,
      "learning_rate": 0.001,
      "loss": 2.8069,
      "step": 38784
    },
    {
      "epoch": 7.45,
      "learning_rate": 0.001,
      "loss": 2.8236,
      "step": 38796
    },
    {
      "epoch": 7.45,
      "learning_rate": 0.001,
      "loss": 2.8185,
      "step": 38808
    },
    {
      "epoch": 7.45,
      "learning_rate": 0.001,
      "loss": 2.8194,
      "step": 38820
    },
    {
      "epoch": 7.46,
      "learning_rate": 0.001,
      "loss": 2.8139,
      "step": 38832
    },
    {
      "epoch": 7.46,
      "learning_rate": 0.001,
      "loss": 2.8256,
      "step": 38844
    },
    {
      "epoch": 7.46,
      "learning_rate": 0.001,
      "loss": 2.813,
      "step": 38856
    },
    {
      "epoch": 7.46,
      "learning_rate": 0.001,
      "loss": 2.8113,
      "step": 38868
    },
    {
      "epoch": 7.47,
      "learning_rate": 0.001,
      "loss": 2.8201,
      "step": 38880
    },
    {
      "epoch": 7.47,
      "learning_rate": 0.001,
      "loss": 2.8146,
      "step": 38892
    },
    {
      "epoch": 7.47,
      "learning_rate": 0.001,
      "loss": 2.8161,
      "step": 38904
    },
    {
      "epoch": 7.47,
      "learning_rate": 0.001,
      "loss": 2.8254,
      "step": 38916
    },
    {
      "epoch": 7.47,
      "learning_rate": 0.001,
      "loss": 2.8223,
      "step": 38928
    },
    {
      "epoch": 7.48,
      "learning_rate": 0.001,
      "loss": 2.8317,
      "step": 38940
    },
    {
      "epoch": 7.48,
      "learning_rate": 0.001,
      "loss": 2.8283,
      "step": 38952
    },
    {
      "epoch": 7.48,
      "learning_rate": 0.001,
      "loss": 2.8192,
      "step": 38964
    },
    {
      "epoch": 7.48,
      "learning_rate": 0.001,
      "loss": 2.8155,
      "step": 38976
    },
    {
      "epoch": 7.49,
      "learning_rate": 0.001,
      "loss": 2.8107,
      "step": 38988
    },
    {
      "epoch": 7.49,
      "learning_rate": 0.001,
      "loss": 2.8153,
      "step": 39000
    },
    {
      "epoch": 7.49,
      "learning_rate": 0.001,
      "loss": 2.8124,
      "step": 39012
    },
    {
      "epoch": 7.49,
      "learning_rate": 0.001,
      "loss": 2.8276,
      "step": 39024
    },
    {
      "epoch": 7.5,
      "learning_rate": 0.001,
      "loss": 2.8281,
      "step": 39036
    },
    {
      "epoch": 7.5,
      "learning_rate": 0.001,
      "loss": 2.8193,
      "step": 39048
    },
    {
      "epoch": 7.5,
      "learning_rate": 0.001,
      "loss": 2.8259,
      "step": 39060
    },
    {
      "epoch": 7.5,
      "learning_rate": 0.001,
      "loss": 2.8145,
      "step": 39072
    },
    {
      "epoch": 7.5,
      "learning_rate": 0.001,
      "loss": 2.8225,
      "step": 39084
    },
    {
      "epoch": 7.51,
      "learning_rate": 0.001,
      "loss": 2.8141,
      "step": 39096
    },
    {
      "epoch": 7.51,
      "learning_rate": 0.001,
      "loss": 2.8303,
      "step": 39108
    },
    {
      "epoch": 7.51,
      "learning_rate": 0.001,
      "loss": 2.8158,
      "step": 39120
    },
    {
      "epoch": 7.51,
      "learning_rate": 0.001,
      "loss": 2.8204,
      "step": 39132
    },
    {
      "epoch": 7.52,
      "learning_rate": 0.001,
      "loss": 2.8237,
      "step": 39144
    },
    {
      "epoch": 7.52,
      "learning_rate": 0.001,
      "loss": 2.8171,
      "step": 39156
    },
    {
      "epoch": 7.52,
      "learning_rate": 0.001,
      "loss": 2.8067,
      "step": 39168
    },
    {
      "epoch": 7.52,
      "learning_rate": 0.001,
      "loss": 2.815,
      "step": 39180
    },
    {
      "epoch": 7.53,
      "learning_rate": 0.001,
      "loss": 2.8203,
      "step": 39192
    },
    {
      "epoch": 7.53,
      "learning_rate": 0.001,
      "loss": 2.813,
      "step": 39204
    },
    {
      "epoch": 7.53,
      "learning_rate": 0.001,
      "loss": 2.8213,
      "step": 39216
    },
    {
      "epoch": 7.53,
      "learning_rate": 0.001,
      "loss": 2.8127,
      "step": 39228
    },
    {
      "epoch": 7.53,
      "learning_rate": 0.001,
      "loss": 2.8122,
      "step": 39240
    },
    {
      "epoch": 7.54,
      "learning_rate": 0.001,
      "loss": 2.8169,
      "step": 39252
    },
    {
      "epoch": 7.54,
      "learning_rate": 0.001,
      "loss": 2.8127,
      "step": 39264
    },
    {
      "epoch": 7.54,
      "learning_rate": 0.001,
      "loss": 2.8191,
      "step": 39276
    },
    {
      "epoch": 7.54,
      "learning_rate": 0.001,
      "loss": 2.8175,
      "step": 39288
    },
    {
      "epoch": 7.55,
      "learning_rate": 0.001,
      "loss": 2.8263,
      "step": 39300
    },
    {
      "epoch": 7.55,
      "learning_rate": 0.001,
      "loss": 2.8166,
      "step": 39312
    },
    {
      "epoch": 7.55,
      "learning_rate": 0.001,
      "loss": 2.8157,
      "step": 39324
    },
    {
      "epoch": 7.55,
      "learning_rate": 0.001,
      "loss": 2.8152,
      "step": 39336
    },
    {
      "epoch": 7.56,
      "learning_rate": 0.001,
      "loss": 2.8113,
      "step": 39348
    },
    {
      "epoch": 7.56,
      "learning_rate": 0.001,
      "loss": 2.8274,
      "step": 39360
    },
    {
      "epoch": 7.56,
      "learning_rate": 0.001,
      "loss": 2.8222,
      "step": 39372
    },
    {
      "epoch": 7.56,
      "eval_ag_news_accuracy": 0.2934375,
      "eval_ag_news_bleu_score": 4.232245651322968,
      "eval_ag_news_bleu_score_sem": 0.13472187155546678,
      "eval_ag_news_emb_cos_sim": 0.7605491280555725,
      "eval_ag_news_emb_cos_sim_sem": 0.008212281822211318,
      "eval_ag_news_emb_top1_equal": 0.1796875,
      "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8588790893554688,
      "eval_ag_news_n_ngrams_match_1": 12.464,
      "eval_ag_news_n_ngrams_match_2": 2.56,
      "eval_ag_news_n_ngrams_match_3": 0.708,
      "eval_ag_news_num_pred_words": 45.63,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 47.41217675881853,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.30793974285537384,
      "eval_ag_news_runtime": 11.0359,
      "eval_ag_news_samples_per_second": 45.307,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.31833152832966594,
      "eval_ag_news_token_set_f1_sem": 0.004331108932723995,
      "eval_ag_news_token_set_precision": 0.29271179587824764,
      "eval_ag_news_token_set_recall": 0.3666719128884171,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 39375
    },
    {
      "epoch": 7.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.10053125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.501564855564238,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09554543720920272,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6118472814559937,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01040809575113645,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.561389684677124,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.24,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.414,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.462,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.39,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 35.21209687110814,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1838394200757527,
      "eval_anthropic_toxic_prompts_runtime": 10.0901,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.553,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3112454745607655,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006341604969589165,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3638612525653445,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3025721379702232,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 39375
    },
    {
      "epoch": 7.56,
      "eval_arxiv_accuracy": 0.32,
      "eval_arxiv_bleu_score": 3.6777920054093856,
      "eval_arxiv_bleu_score_sem": 0.11388419298267789,
      "eval_arxiv_emb_cos_sim": 0.6902914643287659,
      "eval_arxiv_emb_cos_sim_sem": 0.008033634502263798,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.7115771770477295,
      "eval_arxiv_n_ngrams_match_1": 13.036,
      "eval_arxiv_n_ngrams_match_2": 2.39,
      "eval_arxiv_n_ngrams_match_3": 0.506,
      "eval_arxiv_num_pred_words": 39.628,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 40.918291051345854,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3072787419538987,
      "eval_arxiv_runtime": 10.6262,
      "eval_arxiv_samples_per_second": 47.054,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.31042993335090185,
      "eval_arxiv_token_set_f1_sem": 0.004080640908553339,
      "eval_arxiv_token_set_precision": 0.25353127484434584,
      "eval_arxiv_token_set_recall": 0.4239575036633763,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 39375
    },
    {
      "epoch": 7.56,
      "eval_python_code_alpaca_accuracy": 0.141625,
      "eval_python_code_alpaca_bleu_score": 3.5105260376012732,
      "eval_python_code_alpaca_bleu_score_sem": 0.10893562223743992,
      "eval_python_code_alpaca_emb_cos_sim": 0.6751098036766052,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009235796306329617,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.2059597969055176,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.188,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.02,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.576,
      "eval_python_code_alpaca_num_pred_words": 43.474,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 24.67917564915801,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.27569497009935495,
      "eval_python_code_alpaca_runtime": 10.1835,
      "eval_python_code_alpaca_samples_per_second": 49.099,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4221533717590738,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005083677924871681,
      "eval_python_code_alpaca_token_set_precision": 0.44041471237088187,
      "eval_python_code_alpaca_token_set_recall": 0.43207969115662226,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 39375
    },
    {
      "epoch": 7.56,
      "eval_wikibio_accuracy": 0.29253125,
      "eval_wikibio_bleu_score": 5.460954327502383,
      "eval_wikibio_bleu_score_sem": 0.18422578950178453,
      "eval_wikibio_emb_cos_sim": 0.7087117433547974,
      "eval_wikibio_emb_cos_sim_sem": 0.010381389137159719,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.178219318389893,
      "eval_wikibio_n_ngrams_match_1": 9.854,
      "eval_wikibio_n_ngrams_match_2": 3.244,
      "eval_wikibio_n_ngrams_match_3": 1.11,
      "eval_wikibio_num_pred_words": 37.582,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 65.24956101168432,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3318237601867632,
      "eval_wikibio_runtime": 11.0453,
      "eval_wikibio_samples_per_second": 45.268,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.3089731518117434,
      "eval_wikibio_token_set_f1_sem": 0.0049584876825302625,
      "eval_wikibio_token_set_precision": 0.31891424363255483,
      "eval_wikibio_token_set_recall": 0.31357305002437286,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 39375
    },
    {
      "epoch": 7.56,
      "eval_nq_accuracy": 0.49253125,
      "eval_nq_bleu_score": 9.833208098369386,
      "eval_nq_bleu_score_sem": 0.42513112227800615,
      "eval_nq_emb_cos_sim": 0.7943933010101318,
      "eval_nq_emb_cos_sim_sem": 0.007816172569015777,
      "eval_nq_emb_top1_equal": 0.1953125,
      "eval_nq_emb_top1_equal_sem": 0.035178457165496856,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.453270196914673,
      "eval_nq_n_ngrams_match_1": 21.134,
      "eval_nq_n_ngrams_match_2": 7.224,
      "eval_nq_n_ngrams_match_3": 3.188,
      "eval_nq_num_pred_words": 48.602,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.626304926489219,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4037706010589045,
      "eval_nq_runtime": 11.1033,
      "eval_nq_samples_per_second": 45.032,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.4258722839505337,
      "eval_nq_token_set_f1_sem": 0.0050227033138624125,
      "eval_nq_token_set_precision": 0.37511438114790796,
      "eval_nq_token_set_recall": 0.5048118445670626,
      "eval_nq_true_num_tokens": 64.0,
      "step": 39375
    },
    {
      "epoch": 7.56,
      "learning_rate": 0.001,
      "loss": 2.831,
      "step": 39384
    },
    {
      "epoch": 7.56,
      "learning_rate": 0.001,
      "loss": 2.8308,
      "step": 39396
    },
    {
      "epoch": 7.57,
      "learning_rate": 0.001,
      "loss": 2.8288,
      "step": 39408
    },
    {
      "epoch": 7.57,
      "learning_rate": 0.001,
      "loss": 2.8161,
      "step": 39420
    },
    {
      "epoch": 7.57,
      "learning_rate": 0.001,
      "loss": 2.8104,
      "step": 39432
    },
    {
      "epoch": 7.57,
      "learning_rate": 0.001,
      "loss": 2.8054,
      "step": 39444
    },
    {
      "epoch": 7.58,
      "learning_rate": 0.001,
      "loss": 2.8279,
      "step": 39456
    },
    {
      "epoch": 7.58,
      "learning_rate": 0.001,
      "loss": 2.82,
      "step": 39468
    },
    {
      "epoch": 7.58,
      "learning_rate": 0.001,
      "loss": 2.8221,
      "step": 39480
    },
    {
      "epoch": 7.58,
      "learning_rate": 0.001,
      "loss": 2.8232,
      "step": 39492
    },
    {
      "epoch": 7.59,
      "learning_rate": 0.001,
      "loss": 2.8194,
      "step": 39504
    },
    {
      "epoch": 7.59,
      "learning_rate": 0.001,
      "loss": 2.838,
      "step": 39516
    },
    {
      "epoch": 7.59,
      "learning_rate": 0.001,
      "loss": 2.8254,
      "step": 39528
    },
    {
      "epoch": 7.59,
      "learning_rate": 0.001,
      "loss": 2.8267,
      "step": 39540
    },
    {
      "epoch": 7.59,
      "learning_rate": 0.001,
      "loss": 2.8236,
      "step": 39552
    },
    {
      "epoch": 7.6,
      "learning_rate": 0.001,
      "loss": 2.8241,
      "step": 39564
    },
    {
      "epoch": 7.6,
      "learning_rate": 0.001,
      "loss": 2.8128,
      "step": 39576
    },
    {
      "epoch": 7.6,
      "learning_rate": 0.001,
      "loss": 2.8207,
      "step": 39588
    },
    {
      "epoch": 7.6,
      "learning_rate": 0.001,
      "loss": 2.8099,
      "step": 39600
    },
    {
      "epoch": 7.61,
      "learning_rate": 0.001,
      "loss": 2.8089,
      "step": 39612
    },
    {
      "epoch": 7.61,
      "learning_rate": 0.001,
      "loss": 2.8153,
      "step": 39624
    },
    {
      "epoch": 7.61,
      "learning_rate": 0.001,
      "loss": 2.8131,
      "step": 39636
    },
    {
      "epoch": 7.61,
      "learning_rate": 0.001,
      "loss": 2.8234,
      "step": 39648
    },
    {
      "epoch": 7.62,
      "learning_rate": 0.001,
      "loss": 2.8111,
      "step": 39660
    },
    {
      "epoch": 7.62,
      "learning_rate": 0.001,
      "loss": 2.8163,
      "step": 39672
    },
    {
      "epoch": 7.62,
      "learning_rate": 0.001,
      "loss": 2.8207,
      "step": 39684
    },
    {
      "epoch": 7.62,
      "learning_rate": 0.001,
      "loss": 2.8124,
      "step": 39696
    },
    {
      "epoch": 7.62,
      "learning_rate": 0.001,
      "loss": 2.8207,
      "step": 39708
    },
    {
      "epoch": 7.63,
      "learning_rate": 0.001,
      "loss": 2.817,
      "step": 39720
    },
    {
      "epoch": 7.63,
      "learning_rate": 0.001,
      "loss": 2.814,
      "step": 39732
    },
    {
      "epoch": 7.63,
      "learning_rate": 0.001,
      "loss": 2.815,
      "step": 39744
    },
    {
      "epoch": 7.63,
      "learning_rate": 0.001,
      "loss": 2.8181,
      "step": 39756
    },
    {
      "epoch": 7.64,
      "learning_rate": 0.001,
      "loss": 2.8105,
      "step": 39768
    },
    {
      "epoch": 7.64,
      "learning_rate": 0.001,
      "loss": 2.8187,
      "step": 39780
    },
    {
      "epoch": 7.64,
      "learning_rate": 0.001,
      "loss": 2.8116,
      "step": 39792
    },
    {
      "epoch": 7.64,
      "learning_rate": 0.001,
      "loss": 2.8081,
      "step": 39804
    },
    {
      "epoch": 7.65,
      "learning_rate": 0.001,
      "loss": 2.8137,
      "step": 39816
    },
    {
      "epoch": 7.65,
      "learning_rate": 0.001,
      "loss": 2.8222,
      "step": 39828
    },
    {
      "epoch": 7.65,
      "learning_rate": 0.001,
      "loss": 2.8052,
      "step": 39840
    },
    {
      "epoch": 7.65,
      "learning_rate": 0.001,
      "loss": 2.8164,
      "step": 39852
    },
    {
      "epoch": 7.65,
      "learning_rate": 0.001,
      "loss": 2.8154,
      "step": 39864
    },
    {
      "epoch": 7.66,
      "learning_rate": 0.001,
      "loss": 2.8179,
      "step": 39876
    },
    {
      "epoch": 7.66,
      "learning_rate": 0.001,
      "loss": 2.8073,
      "step": 39888
    },
    {
      "epoch": 7.66,
      "learning_rate": 0.001,
      "loss": 2.8167,
      "step": 39900
    },
    {
      "epoch": 7.66,
      "learning_rate": 0.001,
      "loss": 2.8165,
      "step": 39912
    },
    {
      "epoch": 7.67,
      "learning_rate": 0.001,
      "loss": 2.8188,
      "step": 39924
    },
    {
      "epoch": 7.67,
      "learning_rate": 0.001,
      "loss": 2.8268,
      "step": 39936
    },
    {
      "epoch": 7.67,
      "learning_rate": 0.001,
      "loss": 2.8147,
      "step": 39948
    },
    {
      "epoch": 7.67,
      "learning_rate": 0.001,
      "loss": 2.8206,
      "step": 39960
    },
    {
      "epoch": 7.68,
      "learning_rate": 0.001,
      "loss": 2.8176,
      "step": 39972
    },
    {
      "epoch": 7.68,
      "learning_rate": 0.001,
      "loss": 2.8258,
      "step": 39984
    },
    {
      "epoch": 7.68,
      "learning_rate": 0.001,
      "loss": 2.8287,
      "step": 39996
    },
    {
      "epoch": 7.68,
      "eval_ag_news_accuracy": 0.2924375,
      "eval_ag_news_bleu_score": 4.140216952249,
      "eval_ag_news_bleu_score_sem": 0.1383443335620197,
      "eval_ag_news_emb_cos_sim": 0.7485324144363403,
      "eval_ag_news_emb_cos_sim_sem": 0.009648313854486055,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8584718704223633,
      "eval_ag_news_n_ngrams_match_1": 12.436,
      "eval_ag_news_n_ngrams_match_2": 2.546,
      "eval_ag_news_n_ngrams_match_3": 0.684,
      "eval_ag_news_num_pred_words": 46.256,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 47.39287355336471,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.30593732841335797,
      "eval_ag_news_runtime": 10.8668,
      "eval_ag_news_samples_per_second": 46.012,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.31779052000531055,
      "eval_ag_news_token_set_f1_sem": 0.004517178711662994,
      "eval_ag_news_token_set_precision": 0.292657434325159,
      "eval_ag_news_token_set_recall": 0.3648653117395539,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 40000
    },
    {
      "epoch": 7.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.1010625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.4924053251186677,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0958977419730683,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6073935031890869,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010615059826382719,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.5465736389160156,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.186,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.418,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.484,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.86,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 34.69423861146998,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1808282606779184,
      "eval_anthropic_toxic_prompts_runtime": 10.2638,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.715,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3196874132490534,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006190038015658034,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3633774501719923,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3225252960679559,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 40000
    },
    {
      "epoch": 7.68,
      "eval_arxiv_accuracy": 0.3200625,
      "eval_arxiv_bleu_score": 3.621264815878394,
      "eval_arxiv_bleu_score_sem": 0.1081188396662897,
      "eval_arxiv_emb_cos_sim": 0.6864925622940063,
      "eval_arxiv_emb_cos_sim_sem": 0.007601289496061015,
      "eval_arxiv_emb_top1_equal": 0.171875,
      "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.707294225692749,
      "eval_arxiv_n_ngrams_match_1": 12.866,
      "eval_arxiv_n_ngrams_match_2": 2.33,
      "eval_arxiv_n_ngrams_match_3": 0.48,
      "eval_arxiv_num_pred_words": 38.898,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 40.74341476188715,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3067538649257645,
      "eval_arxiv_runtime": 10.7068,
      "eval_arxiv_samples_per_second": 46.7,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.30269986008952854,
      "eval_arxiv_token_set_f1_sem": 0.004115864401618083,
      "eval_arxiv_token_set_precision": 0.24789558209074794,
      "eval_arxiv_token_set_recall": 0.4130067723409454,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 40000
    },
    {
      "epoch": 7.68,
      "eval_python_code_alpaca_accuracy": 0.14084375,
      "eval_python_code_alpaca_bleu_score": 3.431169638393987,
      "eval_python_code_alpaca_bleu_score_sem": 0.11242873401818722,
      "eval_python_code_alpaca_emb_cos_sim": 0.672085165977478,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009780764277198588,
      "eval_python_code_alpaca_emb_top1_equal": 0.015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.2109286785125732,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.304,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.006,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.552,
      "eval_python_code_alpaca_num_pred_words": 43.478,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 24.80210871811856,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.27806485172692175,
      "eval_python_code_alpaca_runtime": 11.0289,
      "eval_python_code_alpaca_samples_per_second": 45.335,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4180941118980831,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005268215952449477,
      "eval_python_code_alpaca_token_set_precision": 0.44559717967585744,
      "eval_python_code_alpaca_token_set_recall": 0.4197696143230586,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 40000
    },
    {
      "epoch": 7.68,
      "eval_wikibio_accuracy": 0.29596875,
      "eval_wikibio_bleu_score": 5.171943944801153,
      "eval_wikibio_bleu_score_sem": 0.17957982544618623,
      "eval_wikibio_emb_cos_sim": 0.7065252661705017,
      "eval_wikibio_emb_cos_sim_sem": 0.010924187203874888,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.171440601348877,
      "eval_wikibio_n_ngrams_match_1": 9.714,
      "eval_wikibio_n_ngrams_match_2": 3.114,
      "eval_wikibio_n_ngrams_match_3": 1.032,
      "eval_wikibio_num_pred_words": 37.366,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 64.80874846029297,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3284593122552165,
      "eval_wikibio_runtime": 10.2532,
      "eval_wikibio_samples_per_second": 48.765,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.30357482221895987,
      "eval_wikibio_token_set_f1_sem": 0.0053931187158150744,
      "eval_wikibio_token_set_precision": 0.3138596259243643,
      "eval_wikibio_token_set_recall": 0.30868501911837587,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 40000
    },
    {
      "epoch": 7.68,
      "eval_nq_accuracy": 0.49109375,
      "eval_nq_bleu_score": 9.820259624837865,
      "eval_nq_bleu_score_sem": 0.4250234602086885,
      "eval_nq_emb_cos_sim": 0.7942675352096558,
      "eval_nq_emb_cos_sim_sem": 0.007867145231022655,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4483070373535156,
      "eval_nq_n_ngrams_match_1": 21.216,
      "eval_nq_n_ngrams_match_2": 7.218,
      "eval_nq_n_ngrams_match_3": 3.176,
      "eval_nq_num_pred_words": 48.662,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.568744678536394,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.404875463760787,
      "eval_nq_runtime": 11.6787,
      "eval_nq_samples_per_second": 42.813,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.42698738512552853,
      "eval_nq_token_set_f1_sem": 0.00503677341066409,
      "eval_nq_token_set_precision": 0.3771447610985095,
      "eval_nq_token_set_recall": 0.5050118819720776,
      "eval_nq_true_num_tokens": 64.0,
      "step": 40000
    },
    {
      "epoch": 7.68,
      "learning_rate": 0.001,
      "loss": 2.8281,
      "step": 40008
    },
    {
      "epoch": 7.68,
      "learning_rate": 0.001,
      "loss": 2.8157,
      "step": 40020
    },
    {
      "epoch": 7.69,
      "learning_rate": 0.001,
      "loss": 2.8209,
      "step": 40032
    },
    {
      "epoch": 7.69,
      "learning_rate": 0.001,
      "loss": 2.8125,
      "step": 40044
    },
    {
      "epoch": 7.69,
      "learning_rate": 0.001,
      "loss": 2.8116,
      "step": 40056
    },
    {
      "epoch": 7.69,
      "learning_rate": 0.001,
      "loss": 2.8113,
      "step": 40068
    },
    {
      "epoch": 7.7,
      "learning_rate": 0.001,
      "loss": 2.818,
      "step": 40080
    },
    {
      "epoch": 7.7,
      "learning_rate": 0.001,
      "loss": 2.8147,
      "step": 40092
    },
    {
      "epoch": 7.7,
      "learning_rate": 0.001,
      "loss": 2.8093,
      "step": 40104
    },
    {
      "epoch": 7.7,
      "learning_rate": 0.001,
      "loss": 2.8197,
      "step": 40116
    },
    {
      "epoch": 7.71,
      "learning_rate": 0.001,
      "loss": 2.8151,
      "step": 40128
    },
    {
      "epoch": 7.71,
      "learning_rate": 0.001,
      "loss": 2.8225,
      "step": 40140
    },
    {
      "epoch": 7.71,
      "learning_rate": 0.001,
      "loss": 2.8144,
      "step": 40152
    },
    {
      "epoch": 7.71,
      "learning_rate": 0.001,
      "loss": 2.8209,
      "step": 40164
    },
    {
      "epoch": 7.71,
      "learning_rate": 0.001,
      "loss": 2.8147,
      "step": 40176
    },
    {
      "epoch": 7.72,
      "learning_rate": 0.001,
      "loss": 2.8059,
      "step": 40188
    },
    {
      "epoch": 7.72,
      "learning_rate": 0.001,
      "loss": 2.8107,
      "step": 40200
    },
    {
      "epoch": 7.72,
      "learning_rate": 0.001,
      "loss": 2.8189,
      "step": 40212
    },
    {
      "epoch": 7.72,
      "learning_rate": 0.001,
      "loss": 2.8034,
      "step": 40224
    },
    {
      "epoch": 7.73,
      "learning_rate": 0.001,
      "loss": 2.81,
      "step": 40236
    },
    {
      "epoch": 7.73,
      "learning_rate": 0.001,
      "loss": 2.8135,
      "step": 40248
    },
    {
      "epoch": 7.73,
      "learning_rate": 0.001,
      "loss": 2.8087,
      "step": 40260
    },
    {
      "epoch": 7.73,
      "learning_rate": 0.001,
      "loss": 2.8111,
      "step": 40272
    },
    {
      "epoch": 7.74,
      "learning_rate": 0.001,
      "loss": 2.8172,
      "step": 40284
    },
    {
      "epoch": 7.74,
      "learning_rate": 0.001,
      "loss": 2.814,
      "step": 40296
    },
    {
      "epoch": 7.74,
      "learning_rate": 0.001,
      "loss": 2.8064,
      "step": 40308
    },
    {
      "epoch": 7.74,
      "learning_rate": 0.001,
      "loss": 2.8208,
      "step": 40320
    },
    {
      "epoch": 7.74,
      "learning_rate": 0.001,
      "loss": 2.821,
      "step": 40332
    },
    {
      "epoch": 7.75,
      "learning_rate": 0.001,
      "loss": 2.8185,
      "step": 40344
    },
    {
      "epoch": 7.75,
      "learning_rate": 0.001,
      "loss": 2.817,
      "step": 40356
    },
    {
      "epoch": 7.75,
      "learning_rate": 0.001,
      "loss": 2.8162,
      "step": 40368
    },
    {
      "epoch": 7.75,
      "learning_rate": 0.001,
      "loss": 2.819,
      "step": 40380
    },
    {
      "epoch": 7.76,
      "learning_rate": 0.001,
      "loss": 2.8049,
      "step": 40392
    },
    {
      "epoch": 7.76,
      "learning_rate": 0.001,
      "loss": 2.8036,
      "step": 40404
    },
    {
      "epoch": 7.76,
      "learning_rate": 0.001,
      "loss": 2.8131,
      "step": 40416
    },
    {
      "epoch": 7.76,
      "learning_rate": 0.001,
      "loss": 2.8061,
      "step": 40428
    },
    {
      "epoch": 7.76,
      "learning_rate": 0.001,
      "loss": 2.8096,
      "step": 40440
    },
    {
      "epoch": 7.77,
      "learning_rate": 0.001,
      "loss": 2.8056,
      "step": 40452
    },
    {
      "epoch": 7.77,
      "learning_rate": 0.001,
      "loss": 2.812,
      "step": 40464
    },
    {
      "epoch": 7.77,
      "learning_rate": 0.001,
      "loss": 2.8169,
      "step": 40476
    },
    {
      "epoch": 7.77,
      "learning_rate": 0.001,
      "loss": 2.8156,
      "step": 40488
    },
    {
      "epoch": 7.78,
      "learning_rate": 0.001,
      "loss": 2.8143,
      "step": 40500
    },
    {
      "epoch": 7.78,
      "learning_rate": 0.001,
      "loss": 2.7972,
      "step": 40512
    },
    {
      "epoch": 7.78,
      "learning_rate": 0.001,
      "loss": 2.8215,
      "step": 40524
    },
    {
      "epoch": 7.78,
      "learning_rate": 0.001,
      "loss": 2.8117,
      "step": 40536
    },
    {
      "epoch": 7.79,
      "learning_rate": 0.001,
      "loss": 2.8097,
      "step": 40548
    },
    {
      "epoch": 7.79,
      "learning_rate": 0.001,
      "loss": 2.8149,
      "step": 40560
    },
    {
      "epoch": 7.79,
      "learning_rate": 0.001,
      "loss": 2.8134,
      "step": 40572
    },
    {
      "epoch": 7.79,
      "learning_rate": 0.001,
      "loss": 2.8077,
      "step": 40584
    },
    {
      "epoch": 7.79,
      "learning_rate": 0.001,
      "loss": 2.8043,
      "step": 40596
    },
    {
      "epoch": 7.8,
      "learning_rate": 0.001,
      "loss": 2.8064,
      "step": 40608
    },
    {
      "epoch": 7.8,
      "learning_rate": 0.001,
      "loss": 2.8065,
      "step": 40620
    },
    {
      "epoch": 7.8,
      "eval_ag_news_accuracy": 0.29590625,
      "eval_ag_news_bleu_score": 4.187945534360631,
      "eval_ag_news_bleu_score_sem": 0.1398010890099884,
      "eval_ag_news_emb_cos_sim": 0.7519018054008484,
      "eval_ag_news_emb_cos_sim_sem": 0.008621114155301085,
      "eval_ag_news_emb_top1_equal": 0.140625,
      "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8446338176727295,
      "eval_ag_news_n_ngrams_match_1": 12.398,
      "eval_ag_news_n_ngrams_match_2": 2.512,
      "eval_ag_news_n_ngrams_match_3": 0.666,
      "eval_ag_news_num_pred_words": 45.26,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 46.74156528159807,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3055589003690906,
      "eval_ag_news_runtime": 10.5921,
      "eval_ag_news_samples_per_second": 47.205,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.3209573739907631,
      "eval_ag_news_token_set_f1_sem": 0.004328376611851938,
      "eval_ag_news_token_set_precision": 0.29215007611101823,
      "eval_ag_news_token_set_recall": 0.37500439803465396,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 40625
    },
    {
      "epoch": 7.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.102375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.4227166833591163,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09118447815273842,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6076961755752563,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010887983719548537,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.5038912296295166,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.226,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.388,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.442,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.72,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 33.244562823647634,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1809839020715016,
      "eval_anthropic_toxic_prompts_runtime": 10.1648,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.19,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3221772628366725,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006381706208999953,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3665683970802171,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3203213777646176,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 40625
    },
    {
      "epoch": 7.8,
      "eval_arxiv_accuracy": 0.3235,
      "eval_arxiv_bleu_score": 3.616187518736761,
      "eval_arxiv_bleu_score_sem": 0.09964012874421592,
      "eval_arxiv_emb_cos_sim": 0.6798217296600342,
      "eval_arxiv_emb_cos_sim_sem": 0.008391075997584119,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6890108585357666,
      "eval_arxiv_n_ngrams_match_1": 12.916,
      "eval_arxiv_n_ngrams_match_2": 2.334,
      "eval_arxiv_n_ngrams_match_3": 0.468,
      "eval_arxiv_num_pred_words": 38.716,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 40.00525652223078,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.308544018687944,
      "eval_arxiv_runtime": 10.3625,
      "eval_arxiv_samples_per_second": 48.251,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3081009332619882,
      "eval_arxiv_token_set_f1_sem": 0.004253189007534848,
      "eval_arxiv_token_set_precision": 0.24966233377607916,
      "eval_arxiv_token_set_recall": 0.42339492294018294,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 40625
    },
    {
      "epoch": 7.8,
      "eval_python_code_alpaca_accuracy": 0.14378125,
      "eval_python_code_alpaca_bleu_score": 3.4505165418448582,
      "eval_python_code_alpaca_bleu_score_sem": 0.10040792060601474,
      "eval_python_code_alpaca_emb_cos_sim": 0.6895774006843567,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009768596350324591,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.199202060699463,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.442,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.018,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.528,
      "eval_python_code_alpaca_num_pred_words": 43.73,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 24.512962535064982,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.28123435590066515,
      "eval_python_code_alpaca_runtime": 13.0735,
      "eval_python_code_alpaca_samples_per_second": 38.245,
      "eval_python_code_alpaca_steps_per_second": 0.076,
      "eval_python_code_alpaca_token_set_f1": 0.4257524777173994,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005134369614680399,
      "eval_python_code_alpaca_token_set_precision": 0.4522889066162074,
      "eval_python_code_alpaca_token_set_recall": 0.42402667478777445,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 40625
    },
    {
      "epoch": 7.8,
      "eval_wikibio_accuracy": 0.2949375,
      "eval_wikibio_bleu_score": 5.144805388924942,
      "eval_wikibio_bleu_score_sem": 0.19115575748875538,
      "eval_wikibio_emb_cos_sim": 0.6942859292030334,
      "eval_wikibio_emb_cos_sim_sem": 0.010799363385109303,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.108237266540527,
      "eval_wikibio_n_ngrams_match_1": 9.454,
      "eval_wikibio_n_ngrams_match_2": 3.01,
      "eval_wikibio_n_ngrams_match_3": 1.024,
      "eval_wikibio_num_pred_words": 36.324,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 60.839379383417054,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32059685715014324,
      "eval_wikibio_runtime": 10.2524,
      "eval_wikibio_samples_per_second": 48.769,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.2979910034566148,
      "eval_wikibio_token_set_f1_sem": 0.005759204884132401,
      "eval_wikibio_token_set_precision": 0.30381430918912883,
      "eval_wikibio_token_set_recall": 0.3086597426916664,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 40625
    },
    {
      "epoch": 7.8,
      "eval_nq_accuracy": 0.4915,
      "eval_nq_bleu_score": 10.038028004828684,
      "eval_nq_bleu_score_sem": 0.43433816005689,
      "eval_nq_emb_cos_sim": 0.7980464696884155,
      "eval_nq_emb_cos_sim_sem": 0.007813135361744961,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.442344903945923,
      "eval_nq_n_ngrams_match_1": 21.238,
      "eval_nq_n_ngrams_match_2": 7.314,
      "eval_nq_n_ngrams_match_3": 3.238,
      "eval_nq_num_pred_words": 48.518,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.499975488658384,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.40468805185802637,
      "eval_nq_runtime": 10.5702,
      "eval_nq_samples_per_second": 47.303,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4289723217509848,
      "eval_nq_token_set_f1_sem": 0.005111068929936343,
      "eval_nq_token_set_precision": 0.37697849387241883,
      "eval_nq_token_set_recall": 0.5090523696248492,
      "eval_nq_true_num_tokens": 64.0,
      "step": 40625
    },
    {
      "epoch": 7.8,
      "learning_rate": 0.001,
      "loss": 2.8153,
      "step": 40632
    },
    {
      "epoch": 7.8,
      "learning_rate": 0.001,
      "loss": 2.8023,
      "step": 40644
    },
    {
      "epoch": 7.81,
      "learning_rate": 0.001,
      "loss": 2.8064,
      "step": 40656
    },
    {
      "epoch": 7.81,
      "learning_rate": 0.001,
      "loss": 2.8067,
      "step": 40668
    },
    {
      "epoch": 7.81,
      "learning_rate": 0.001,
      "loss": 2.8056,
      "step": 40680
    },
    {
      "epoch": 7.81,
      "learning_rate": 0.001,
      "loss": 2.8091,
      "step": 40692
    },
    {
      "epoch": 7.82,
      "learning_rate": 0.001,
      "loss": 2.8008,
      "step": 40704
    },
    {
      "epoch": 7.82,
      "learning_rate": 0.001,
      "loss": 2.8087,
      "step": 40716
    },
    {
      "epoch": 7.82,
      "learning_rate": 0.001,
      "loss": 2.8108,
      "step": 40728
    },
    {
      "epoch": 7.82,
      "learning_rate": 0.001,
      "loss": 2.8141,
      "step": 40740
    },
    {
      "epoch": 7.82,
      "learning_rate": 0.001,
      "loss": 2.8075,
      "step": 40752
    },
    {
      "epoch": 7.83,
      "learning_rate": 0.001,
      "loss": 2.7981,
      "step": 40764
    },
    {
      "epoch": 7.83,
      "learning_rate": 0.001,
      "loss": 2.8175,
      "step": 40776
    },
    {
      "epoch": 7.83,
      "learning_rate": 0.001,
      "loss": 2.8087,
      "step": 40788
    },
    {
      "epoch": 7.83,
      "learning_rate": 0.001,
      "loss": 2.8076,
      "step": 40800
    },
    {
      "epoch": 7.84,
      "learning_rate": 0.001,
      "loss": 2.8103,
      "step": 40812
    },
    {
      "epoch": 7.84,
      "learning_rate": 0.001,
      "loss": 2.7975,
      "step": 40824
    },
    {
      "epoch": 7.84,
      "learning_rate": 0.001,
      "loss": 2.8146,
      "step": 40836
    },
    {
      "epoch": 7.84,
      "learning_rate": 0.001,
      "loss": 2.8108,
      "step": 40848
    },
    {
      "epoch": 7.85,
      "learning_rate": 0.001,
      "loss": 2.8077,
      "step": 40860
    },
    {
      "epoch": 7.85,
      "learning_rate": 0.001,
      "loss": 2.814,
      "step": 40872
    },
    {
      "epoch": 7.85,
      "learning_rate": 0.001,
      "loss": 2.8142,
      "step": 40884
    },
    {
      "epoch": 7.85,
      "learning_rate": 0.001,
      "loss": 2.8021,
      "step": 40896
    },
    {
      "epoch": 7.85,
      "learning_rate": 0.001,
      "loss": 2.7992,
      "step": 40908
    },
    {
      "epoch": 7.86,
      "learning_rate": 0.001,
      "loss": 2.8041,
      "step": 40920
    },
    {
      "epoch": 7.86,
      "learning_rate": 0.001,
      "loss": 2.8072,
      "step": 40932
    },
    {
      "epoch": 7.86,
      "learning_rate": 0.001,
      "loss": 2.8022,
      "step": 40944
    },
    {
      "epoch": 7.86,
      "learning_rate": 0.001,
      "loss": 2.8137,
      "step": 40956
    },
    {
      "epoch": 7.87,
      "learning_rate": 0.001,
      "loss": 2.8154,
      "step": 40968
    },
    {
      "epoch": 7.87,
      "learning_rate": 0.001,
      "loss": 2.8197,
      "step": 40980
    },
    {
      "epoch": 7.87,
      "learning_rate": 0.001,
      "loss": 2.826,
      "step": 40992
    },
    {
      "epoch": 7.87,
      "learning_rate": 0.001,
      "loss": 2.8034,
      "step": 41004
    },
    {
      "epoch": 7.88,
      "learning_rate": 0.001,
      "loss": 2.8072,
      "step": 41016
    },
    {
      "epoch": 7.88,
      "learning_rate": 0.001,
      "loss": 2.8039,
      "step": 41028
    },
    {
      "epoch": 7.88,
      "learning_rate": 0.001,
      "loss": 2.8114,
      "step": 41040
    },
    {
      "epoch": 7.88,
      "learning_rate": 0.001,
      "loss": 2.8191,
      "step": 41052
    },
    {
      "epoch": 7.88,
      "learning_rate": 0.001,
      "loss": 2.8163,
      "step": 41064
    },
    {
      "epoch": 7.89,
      "learning_rate": 0.001,
      "loss": 2.8037,
      "step": 41076
    },
    {
      "epoch": 7.89,
      "learning_rate": 0.001,
      "loss": 2.8145,
      "step": 41088
    },
    {
      "epoch": 7.89,
      "learning_rate": 0.001,
      "loss": 2.8048,
      "step": 41100
    },
    {
      "epoch": 7.89,
      "learning_rate": 0.001,
      "loss": 2.8228,
      "step": 41112
    },
    {
      "epoch": 7.9,
      "learning_rate": 0.001,
      "loss": 2.8036,
      "step": 41124
    },
    {
      "epoch": 7.9,
      "learning_rate": 0.001,
      "loss": 2.8107,
      "step": 41136
    },
    {
      "epoch": 7.9,
      "learning_rate": 0.001,
      "loss": 2.8136,
      "step": 41148
    },
    {
      "epoch": 7.9,
      "learning_rate": 0.001,
      "loss": 2.8043,
      "step": 41160
    },
    {
      "epoch": 7.91,
      "learning_rate": 0.001,
      "loss": 2.8011,
      "step": 41172
    },
    {
      "epoch": 7.91,
      "learning_rate": 0.001,
      "loss": 2.8096,
      "step": 41184
    },
    {
      "epoch": 7.91,
      "learning_rate": 0.001,
      "loss": 2.8014,
      "step": 41196
    },
    {
      "epoch": 7.91,
      "learning_rate": 0.001,
      "loss": 2.8209,
      "step": 41208
    },
    {
      "epoch": 7.91,
      "learning_rate": 0.001,
      "loss": 2.8052,
      "step": 41220
    },
    {
      "epoch": 7.92,
      "learning_rate": 0.001,
      "loss": 2.8069,
      "step": 41232
    },
    {
      "epoch": 7.92,
      "learning_rate": 0.001,
      "loss": 2.8139,
      "step": 41244
    },
    {
      "epoch": 7.92,
      "eval_ag_news_accuracy": 0.295375,
      "eval_ag_news_bleu_score": 4.177243248663384,
      "eval_ag_news_bleu_score_sem": 0.14355102147081386,
      "eval_ag_news_emb_cos_sim": 0.7614268064498901,
      "eval_ag_news_emb_cos_sim_sem": 0.008624405560751985,
      "eval_ag_news_emb_top1_equal": 0.109375,
      "eval_ag_news_emb_top1_equal_sem": 0.027695207821224692,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8472166061401367,
      "eval_ag_news_n_ngrams_match_1": 12.584,
      "eval_ag_news_n_ngrams_match_2": 2.516,
      "eval_ag_news_n_ngrams_match_3": 0.656,
      "eval_ag_news_num_pred_words": 45.628,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 46.862444893392656,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3127816446024006,
      "eval_ag_news_runtime": 11.8325,
      "eval_ag_news_samples_per_second": 42.256,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.3203641725089709,
      "eval_ag_news_token_set_f1_sem": 0.004295153381906319,
      "eval_ag_news_token_set_precision": 0.29743015779836435,
      "eval_ag_news_token_set_recall": 0.363793417571401,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 41250
    },
    {
      "epoch": 7.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.1014375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.5142923835005013,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09967327239409667,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6296899318695068,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009661943750167774,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.528766393661499,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.29,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.456,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.482,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.448,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 34.081898026000744,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1844820827318796,
      "eval_anthropic_toxic_prompts_runtime": 10.0815,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.596,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.31619125754705546,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006245202508929617,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.36581050128141396,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3068939653457516,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 41250
    },
    {
      "epoch": 7.92,
      "eval_arxiv_accuracy": 0.319375,
      "eval_arxiv_bleu_score": 3.761050652950992,
      "eval_arxiv_bleu_score_sem": 0.10826957781035133,
      "eval_arxiv_emb_cos_sim": 0.6977087259292603,
      "eval_arxiv_emb_cos_sim_sem": 0.007638775166544408,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.7040607929229736,
      "eval_arxiv_n_ngrams_match_1": 13.176,
      "eval_arxiv_n_ngrams_match_2": 2.45,
      "eval_arxiv_n_ngrams_match_3": 0.498,
      "eval_arxiv_num_pred_words": 39.362,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 40.6118864280509,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.31268868526372107,
      "eval_arxiv_runtime": 10.5471,
      "eval_arxiv_samples_per_second": 47.406,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3121361393205705,
      "eval_arxiv_token_set_f1_sem": 0.004010391958789305,
      "eval_arxiv_token_set_precision": 0.2563655983833098,
      "eval_arxiv_token_set_recall": 0.421282631478863,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 41250
    },
    {
      "epoch": 7.92,
      "eval_python_code_alpaca_accuracy": 0.14034375,
      "eval_python_code_alpaca_bleu_score": 3.41548439262854,
      "eval_python_code_alpaca_bleu_score_sem": 0.11493549322295714,
      "eval_python_code_alpaca_emb_cos_sim": 0.6701538562774658,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009477135087803568,
      "eval_python_code_alpaca_emb_top1_equal": 0.0625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.21610426902771,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.056,
      "eval_python_code_alpaca_n_ngrams_match_2": 1.93,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.48,
      "eval_python_code_alpaca_num_pred_words": 42.104,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 24.930807034364012,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.27747833785570963,
      "eval_python_code_alpaca_runtime": 9.8767,
      "eval_python_code_alpaca_samples_per_second": 50.624,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.41409101157084693,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005415821955688519,
      "eval_python_code_alpaca_token_set_precision": 0.4317412697423453,
      "eval_python_code_alpaca_token_set_recall": 0.4210692909568659,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 41250
    },
    {
      "epoch": 7.92,
      "eval_wikibio_accuracy": 0.29575,
      "eval_wikibio_bleu_score": 5.240046314179688,
      "eval_wikibio_bleu_score_sem": 0.19925473711020505,
      "eval_wikibio_emb_cos_sim": 0.6854864358901978,
      "eval_wikibio_emb_cos_sim_sem": 0.012112673672687936,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.137763977050781,
      "eval_wikibio_n_ngrams_match_1": 9.428,
      "eval_wikibio_n_ngrams_match_2": 3.016,
      "eval_wikibio_n_ngrams_match_3": 1.034,
      "eval_wikibio_num_pred_words": 36.42,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 62.66254978294218,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32025842546471583,
      "eval_wikibio_runtime": 10.5785,
      "eval_wikibio_samples_per_second": 47.265,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.29862073475428635,
      "eval_wikibio_token_set_f1_sem": 0.005543650424542434,
      "eval_wikibio_token_set_precision": 0.3045874857049161,
      "eval_wikibio_token_set_recall": 0.31166296994765863,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 41250
    },
    {
      "epoch": 7.92,
      "eval_nq_accuracy": 0.49271875,
      "eval_nq_bleu_score": 9.950861057961662,
      "eval_nq_bleu_score_sem": 0.4292405034709257,
      "eval_nq_emb_cos_sim": 0.7908686399459839,
      "eval_nq_emb_cos_sim_sem": 0.008533793787720609,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4374239444732666,
      "eval_nq_n_ngrams_match_1": 21.256,
      "eval_nq_n_ngrams_match_2": 7.31,
      "eval_nq_n_ngrams_match_3": 3.186,
      "eval_nq_num_pred_words": 48.534,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.443523588018904,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4075687612438058,
      "eval_nq_runtime": 12.0139,
      "eval_nq_samples_per_second": 41.618,
      "eval_nq_steps_per_second": 0.083,
      "eval_nq_token_set_f1": 0.4253036128874263,
      "eval_nq_token_set_f1_sem": 0.004909166781233974,
      "eval_nq_token_set_precision": 0.3760358083916659,
      "eval_nq_token_set_recall": 0.4997178555960679,
      "eval_nq_true_num_tokens": 64.0,
      "step": 41250
    },
    {
      "epoch": 7.92,
      "learning_rate": 0.001,
      "loss": 2.8079,
      "step": 41256
    },
    {
      "epoch": 7.92,
      "learning_rate": 0.001,
      "loss": 2.8034,
      "step": 41268
    },
    {
      "epoch": 7.93,
      "learning_rate": 0.001,
      "loss": 2.8039,
      "step": 41280
    },
    {
      "epoch": 7.93,
      "learning_rate": 0.001,
      "loss": 2.805,
      "step": 41292
    },
    {
      "epoch": 7.93,
      "learning_rate": 0.001,
      "loss": 2.803,
      "step": 41304
    },
    {
      "epoch": 7.93,
      "learning_rate": 0.001,
      "loss": 2.8036,
      "step": 41316
    },
    {
      "epoch": 7.94,
      "learning_rate": 0.001,
      "loss": 2.8032,
      "step": 41328
    },
    {
      "epoch": 7.94,
      "learning_rate": 0.001,
      "loss": 2.802,
      "step": 41340
    },
    {
      "epoch": 7.94,
      "learning_rate": 0.001,
      "loss": 2.8106,
      "step": 41352
    },
    {
      "epoch": 7.94,
      "learning_rate": 0.001,
      "loss": 2.8119,
      "step": 41364
    },
    {
      "epoch": 7.94,
      "learning_rate": 0.001,
      "loss": 2.8121,
      "step": 41376
    },
    {
      "epoch": 7.95,
      "learning_rate": 0.001,
      "loss": 2.8116,
      "step": 41388
    },
    {
      "epoch": 7.95,
      "learning_rate": 0.001,
      "loss": 2.7999,
      "step": 41400
    },
    {
      "epoch": 7.95,
      "learning_rate": 0.001,
      "loss": 2.7944,
      "step": 41412
    },
    {
      "epoch": 7.95,
      "learning_rate": 0.001,
      "loss": 2.808,
      "step": 41424
    },
    {
      "epoch": 7.96,
      "learning_rate": 0.001,
      "loss": 2.8095,
      "step": 41436
    },
    {
      "epoch": 7.96,
      "learning_rate": 0.001,
      "loss": 2.8134,
      "step": 41448
    },
    {
      "epoch": 7.96,
      "learning_rate": 0.001,
      "loss": 2.8068,
      "step": 41460
    },
    {
      "epoch": 7.96,
      "learning_rate": 0.001,
      "loss": 2.8013,
      "step": 41472
    },
    {
      "epoch": 7.97,
      "learning_rate": 0.001,
      "loss": 2.8034,
      "step": 41484
    },
    {
      "epoch": 7.97,
      "learning_rate": 0.001,
      "loss": 2.7999,
      "step": 41496
    },
    {
      "epoch": 7.97,
      "learning_rate": 0.001,
      "loss": 2.8067,
      "step": 41508
    },
    {
      "epoch": 7.97,
      "learning_rate": 0.001,
      "loss": 2.8093,
      "step": 41520
    },
    {
      "epoch": 7.97,
      "learning_rate": 0.001,
      "loss": 2.8152,
      "step": 41532
    },
    {
      "epoch": 7.98,
      "learning_rate": 0.001,
      "loss": 2.8127,
      "step": 41544
    },
    {
      "epoch": 7.98,
      "learning_rate": 0.001,
      "loss": 2.8125,
      "step": 41556
    },
    {
      "epoch": 7.98,
      "learning_rate": 0.001,
      "loss": 2.8059,
      "step": 41568
    },
    {
      "epoch": 7.98,
      "learning_rate": 0.001,
      "loss": 2.8018,
      "step": 41580
    },
    {
      "epoch": 7.99,
      "learning_rate": 0.001,
      "loss": 2.7959,
      "step": 41592
    },
    {
      "epoch": 7.99,
      "learning_rate": 0.001,
      "loss": 2.8037,
      "step": 41604
    },
    {
      "epoch": 7.99,
      "learning_rate": 0.001,
      "loss": 2.8089,
      "step": 41616
    },
    {
      "epoch": 7.99,
      "learning_rate": 0.001,
      "loss": 2.8035,
      "step": 41628
    },
    {
      "epoch": 8.0,
      "learning_rate": 0.001,
      "loss": 2.8035,
      "step": 41640
    },
    {
      "epoch": 8.0,
      "learning_rate": 0.001,
      "loss": 2.8021,
      "step": 41652
    },
    {
      "epoch": 8.0,
      "learning_rate": 0.001,
      "loss": 2.7977,
      "step": 41664
    },
    {
      "epoch": 8.0,
      "learning_rate": 0.001,
      "loss": 2.7911,
      "step": 41676
    },
    {
      "epoch": 8.0,
      "learning_rate": 0.001,
      "loss": 2.7865,
      "step": 41688
    },
    {
      "epoch": 8.01,
      "learning_rate": 0.001,
      "loss": 2.789,
      "step": 41700
    },
    {
      "epoch": 8.01,
      "learning_rate": 0.001,
      "loss": 2.7829,
      "step": 41712
    },
    {
      "epoch": 8.01,
      "learning_rate": 0.001,
      "loss": 2.7769,
      "step": 41724
    },
    {
      "epoch": 8.01,
      "learning_rate": 0.001,
      "loss": 2.7864,
      "step": 41736
    },
    {
      "epoch": 8.02,
      "learning_rate": 0.001,
      "loss": 2.7816,
      "step": 41748
    },
    {
      "epoch": 8.02,
      "learning_rate": 0.001,
      "loss": 2.7839,
      "step": 41760
    },
    {
      "epoch": 8.02,
      "learning_rate": 0.001,
      "loss": 2.7808,
      "step": 41772
    },
    {
      "epoch": 8.02,
      "learning_rate": 0.001,
      "loss": 2.7745,
      "step": 41784
    },
    {
      "epoch": 8.03,
      "learning_rate": 0.001,
      "loss": 2.7806,
      "step": 41796
    },
    {
      "epoch": 8.03,
      "learning_rate": 0.001,
      "loss": 2.7783,
      "step": 41808
    },
    {
      "epoch": 8.03,
      "learning_rate": 0.001,
      "loss": 2.7971,
      "step": 41820
    },
    {
      "epoch": 8.03,
      "learning_rate": 0.001,
      "loss": 2.7891,
      "step": 41832
    },
    {
      "epoch": 8.03,
      "learning_rate": 0.001,
      "loss": 2.7824,
      "step": 41844
    },
    {
      "epoch": 8.04,
      "learning_rate": 0.001,
      "loss": 2.7817,
      "step": 41856
    },
    {
      "epoch": 8.04,
      "learning_rate": 0.001,
      "loss": 2.7798,
      "step": 41868
    },
    {
      "epoch": 8.04,
      "eval_ag_news_accuracy": 0.298875,
      "eval_ag_news_bleu_score": 4.216803539825259,
      "eval_ag_news_bleu_score_sem": 0.1391509284087185,
      "eval_ag_news_emb_cos_sim": 0.7642207145690918,
      "eval_ag_news_emb_cos_sim_sem": 0.008319944629643382,
      "eval_ag_news_emb_top1_equal": 0.1796875,
      "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8362133502960205,
      "eval_ag_news_n_ngrams_match_1": 12.586,
      "eval_ag_news_n_ngrams_match_2": 2.526,
      "eval_ag_news_n_ngrams_match_3": 0.702,
      "eval_ag_news_num_pred_words": 46.136,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 46.34963190293231,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3104282891347031,
      "eval_ag_news_runtime": 11.7509,
      "eval_ag_news_samples_per_second": 42.55,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.31949873768250103,
      "eval_ag_news_token_set_f1_sem": 0.0044022580996051636,
      "eval_ag_news_token_set_precision": 0.29650399861574117,
      "eval_ag_news_token_set_recall": 0.359295249581459,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 41875
    },
    {
      "epoch": 8.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.103,
      "eval_anthropic_toxic_prompts_bleu_score": 2.692852577293663,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11015006852059467,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6287031173706055,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010656534300950963,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.488476037979126,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.47,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.546,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.524,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.588,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 32.73602121980649,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1890378390716641,
      "eval_anthropic_toxic_prompts_runtime": 10.095,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.53,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.324569476821007,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0060201748353898195,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3810019545048906,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3139210790656579,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 41875
    },
    {
      "epoch": 8.04,
      "eval_arxiv_accuracy": 0.32178125,
      "eval_arxiv_bleu_score": 3.9026349411821912,
      "eval_arxiv_bleu_score_sem": 0.10981482156059526,
      "eval_arxiv_emb_cos_sim": 0.6997609734535217,
      "eval_arxiv_emb_cos_sim_sem": 0.00893799589437475,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.685302495956421,
      "eval_arxiv_n_ngrams_match_1": 13.678,
      "eval_arxiv_n_ngrams_match_2": 2.644,
      "eval_arxiv_n_ngrams_match_3": 0.574,
      "eval_arxiv_num_pred_words": 40.272,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 39.85717726145954,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.31819758297255984,
      "eval_arxiv_runtime": 10.2214,
      "eval_arxiv_samples_per_second": 48.917,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3179237768339998,
      "eval_arxiv_token_set_f1_sem": 0.0042574798222937155,
      "eval_arxiv_token_set_precision": 0.2635666782952128,
      "eval_arxiv_token_set_recall": 0.4224262657217067,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 41875
    },
    {
      "epoch": 8.04,
      "eval_python_code_alpaca_accuracy": 0.14471875,
      "eval_python_code_alpaca_bleu_score": 3.6295983478892175,
      "eval_python_code_alpaca_bleu_score_sem": 0.11931880643535772,
      "eval_python_code_alpaca_emb_cos_sim": 0.7095414400100708,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009777772509498822,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.1657066345214844,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.67,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.13,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.604,
      "eval_python_code_alpaca_num_pred_words": 43.742,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.7054892328067,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2863440812496213,
      "eval_python_code_alpaca_runtime": 10.1339,
      "eval_python_code_alpaca_samples_per_second": 49.339,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.4300042660492909,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005728997846493136,
      "eval_python_code_alpaca_token_set_precision": 0.46406024475119134,
      "eval_python_code_alpaca_token_set_recall": 0.4226458078759465,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 41875
    },
    {
      "epoch": 8.04,
      "eval_wikibio_accuracy": 0.29603125,
      "eval_wikibio_bleu_score": 5.460197619821154,
      "eval_wikibio_bleu_score_sem": 0.21460976470417648,
      "eval_wikibio_emb_cos_sim": 0.6786633729934692,
      "eval_wikibio_emb_cos_sim_sem": 0.01287330838447617,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.167712688446045,
      "eval_wikibio_n_ngrams_match_1": 9.56,
      "eval_wikibio_n_ngrams_match_2": 3.084,
      "eval_wikibio_n_ngrams_match_3": 1.102,
      "eval_wikibio_num_pred_words": 36.522,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 64.56759686604164,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3259823976842989,
      "eval_wikibio_runtime": 10.2692,
      "eval_wikibio_samples_per_second": 48.689,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3011836760564445,
      "eval_wikibio_token_set_f1_sem": 0.005640154974613203,
      "eval_wikibio_token_set_precision": 0.30988461485318836,
      "eval_wikibio_token_set_recall": 0.30740702169879425,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 41875
    },
    {
      "epoch": 8.04,
      "eval_nq_accuracy": 0.49340625,
      "eval_nq_bleu_score": 9.898326171736961,
      "eval_nq_bleu_score_sem": 0.41346381222116096,
      "eval_nq_emb_cos_sim": 0.797620952129364,
      "eval_nq_emb_cos_sim_sem": 0.007820779746393624,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.432384967803955,
      "eval_nq_n_ngrams_match_1": 21.38,
      "eval_nq_n_ngrams_match_2": 7.334,
      "eval_nq_n_ngrams_match_3": 3.188,
      "eval_nq_num_pred_words": 48.608,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.386004978814977,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4102706025229157,
      "eval_nq_runtime": 10.9539,
      "eval_nq_samples_per_second": 45.646,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.43081662352813604,
      "eval_nq_token_set_f1_sem": 0.004871615340993273,
      "eval_nq_token_set_precision": 0.3823319508357155,
      "eval_nq_token_set_recall": 0.5045832181826024,
      "eval_nq_true_num_tokens": 64.0,
      "step": 41875
    },
    {
      "epoch": 8.04,
      "learning_rate": 0.001,
      "loss": 2.7836,
      "step": 41880
    },
    {
      "epoch": 8.04,
      "learning_rate": 0.001,
      "loss": 2.7917,
      "step": 41892
    },
    {
      "epoch": 8.05,
      "learning_rate": 0.001,
      "loss": 2.7972,
      "step": 41904
    },
    {
      "epoch": 8.05,
      "learning_rate": 0.001,
      "loss": 2.786,
      "step": 41916
    },
    {
      "epoch": 8.05,
      "learning_rate": 0.001,
      "loss": 2.781,
      "step": 41928
    },
    {
      "epoch": 8.05,
      "learning_rate": 0.001,
      "loss": 2.7773,
      "step": 41940
    },
    {
      "epoch": 8.06,
      "learning_rate": 0.001,
      "loss": 2.7867,
      "step": 41952
    },
    {
      "epoch": 8.06,
      "learning_rate": 0.001,
      "loss": 2.7793,
      "step": 41964
    },
    {
      "epoch": 8.06,
      "learning_rate": 0.001,
      "loss": 2.7882,
      "step": 41976
    },
    {
      "epoch": 8.06,
      "learning_rate": 0.001,
      "loss": 2.7921,
      "step": 41988
    },
    {
      "epoch": 8.06,
      "learning_rate": 0.001,
      "loss": 2.7739,
      "step": 42000
    },
    {
      "epoch": 8.07,
      "learning_rate": 0.001,
      "loss": 2.776,
      "step": 42012
    },
    {
      "epoch": 8.07,
      "learning_rate": 0.001,
      "loss": 2.7826,
      "step": 42024
    },
    {
      "epoch": 8.07,
      "learning_rate": 0.001,
      "loss": 2.7925,
      "step": 42036
    },
    {
      "epoch": 8.07,
      "learning_rate": 0.001,
      "loss": 2.7859,
      "step": 42048
    },
    {
      "epoch": 8.08,
      "learning_rate": 0.001,
      "loss": 2.7863,
      "step": 42060
    },
    {
      "epoch": 8.08,
      "learning_rate": 0.001,
      "loss": 2.7818,
      "step": 42072
    },
    {
      "epoch": 8.08,
      "learning_rate": 0.001,
      "loss": 2.7965,
      "step": 42084
    },
    {
      "epoch": 8.08,
      "learning_rate": 0.001,
      "loss": 2.7978,
      "step": 42096
    },
    {
      "epoch": 8.09,
      "learning_rate": 0.001,
      "loss": 2.7906,
      "step": 42108
    },
    {
      "epoch": 8.09,
      "learning_rate": 0.001,
      "loss": 2.7857,
      "step": 42120
    },
    {
      "epoch": 8.09,
      "learning_rate": 0.001,
      "loss": 2.7746,
      "step": 42132
    },
    {
      "epoch": 8.09,
      "learning_rate": 0.001,
      "loss": 2.7751,
      "step": 42144
    },
    {
      "epoch": 8.09,
      "learning_rate": 0.001,
      "loss": 2.7789,
      "step": 42156
    },
    {
      "epoch": 8.1,
      "learning_rate": 0.001,
      "loss": 2.7903,
      "step": 42168
    },
    {
      "epoch": 8.1,
      "learning_rate": 0.001,
      "loss": 2.7946,
      "step": 42180
    },
    {
      "epoch": 8.1,
      "learning_rate": 0.001,
      "loss": 2.7783,
      "step": 42192
    },
    {
      "epoch": 8.1,
      "learning_rate": 0.001,
      "loss": 2.7893,
      "step": 42204
    },
    {
      "epoch": 8.11,
      "learning_rate": 0.001,
      "loss": 2.7776,
      "step": 42216
    },
    {
      "epoch": 8.11,
      "learning_rate": 0.001,
      "loss": 2.784,
      "step": 42228
    },
    {
      "epoch": 8.11,
      "learning_rate": 0.001,
      "loss": 2.7696,
      "step": 42240
    },
    {
      "epoch": 8.11,
      "learning_rate": 0.001,
      "loss": 2.7725,
      "step": 42252
    },
    {
      "epoch": 8.12,
      "learning_rate": 0.001,
      "loss": 2.8001,
      "step": 42264
    },
    {
      "epoch": 8.12,
      "learning_rate": 0.001,
      "loss": 2.788,
      "step": 42276
    },
    {
      "epoch": 8.12,
      "learning_rate": 0.001,
      "loss": 2.7794,
      "step": 42288
    },
    {
      "epoch": 8.12,
      "learning_rate": 0.001,
      "loss": 2.789,
      "step": 42300
    },
    {
      "epoch": 8.12,
      "learning_rate": 0.001,
      "loss": 2.78,
      "step": 42312
    },
    {
      "epoch": 8.13,
      "learning_rate": 0.001,
      "loss": 2.7789,
      "step": 42324
    },
    {
      "epoch": 8.13,
      "learning_rate": 0.001,
      "loss": 2.7854,
      "step": 42336
    },
    {
      "epoch": 8.13,
      "learning_rate": 0.001,
      "loss": 2.7901,
      "step": 42348
    },
    {
      "epoch": 8.13,
      "learning_rate": 0.001,
      "loss": 2.7913,
      "step": 42360
    },
    {
      "epoch": 8.14,
      "learning_rate": 0.001,
      "loss": 2.7893,
      "step": 42372
    },
    {
      "epoch": 8.14,
      "learning_rate": 0.001,
      "loss": 2.7824,
      "step": 42384
    },
    {
      "epoch": 8.14,
      "learning_rate": 0.001,
      "loss": 2.7815,
      "step": 42396
    },
    {
      "epoch": 8.14,
      "learning_rate": 0.001,
      "loss": 2.7916,
      "step": 42408
    },
    {
      "epoch": 8.15,
      "learning_rate": 0.001,
      "loss": 2.7818,
      "step": 42420
    },
    {
      "epoch": 8.15,
      "learning_rate": 0.001,
      "loss": 2.7879,
      "step": 42432
    },
    {
      "epoch": 8.15,
      "learning_rate": 0.001,
      "loss": 2.793,
      "step": 42444
    },
    {
      "epoch": 8.15,
      "learning_rate": 0.001,
      "loss": 2.7963,
      "step": 42456
    },
    {
      "epoch": 8.15,
      "learning_rate": 0.001,
      "loss": 2.7811,
      "step": 42468
    },
    {
      "epoch": 8.16,
      "learning_rate": 0.001,
      "loss": 2.7965,
      "step": 42480
    },
    {
      "epoch": 8.16,
      "learning_rate": 0.001,
      "loss": 2.7928,
      "step": 42492
    },
    {
      "epoch": 8.16,
      "eval_ag_news_accuracy": 0.297375,
      "eval_ag_news_bleu_score": 4.077741056324943,
      "eval_ag_news_bleu_score_sem": 0.1350246057891333,
      "eval_ag_news_emb_cos_sim": 0.7508853673934937,
      "eval_ag_news_emb_cos_sim_sem": 0.00949579099517021,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.833709955215454,
      "eval_ag_news_n_ngrams_match_1": 12.524,
      "eval_ag_news_n_ngrams_match_2": 2.474,
      "eval_ag_news_n_ngrams_match_3": 0.656,
      "eval_ag_news_num_pred_words": 45.466,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 46.23374557759033,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3063146823454444,
      "eval_ag_news_runtime": 10.6701,
      "eval_ag_news_samples_per_second": 46.86,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.3184797730117816,
      "eval_ag_news_token_set_f1_sem": 0.004624105725727724,
      "eval_ag_news_token_set_precision": 0.2937868065160725,
      "eval_ag_news_token_set_recall": 0.36516289536224156,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 42500
    },
    {
      "epoch": 8.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.10253125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.6640877193255896,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10581202263670453,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6246901750564575,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010404943298576738,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.5179667472839355,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.426,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.554,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.53,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.976,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 33.71580596999187,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.18708885265436348,
      "eval_anthropic_toxic_prompts_runtime": 10.203,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.005,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3264954087635187,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006192588117650118,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3819508454617556,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3202988365945977,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 42500
    },
    {
      "epoch": 8.16,
      "eval_arxiv_accuracy": 0.3184375,
      "eval_arxiv_bleu_score": 3.580929209552589,
      "eval_arxiv_bleu_score_sem": 0.10186227418187782,
      "eval_arxiv_emb_cos_sim": 0.6993412971496582,
      "eval_arxiv_emb_cos_sim_sem": 0.009551301687381598,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.711540460586548,
      "eval_arxiv_n_ngrams_match_1": 13.08,
      "eval_arxiv_n_ngrams_match_2": 2.39,
      "eval_arxiv_n_ngrams_match_3": 0.456,
      "eval_arxiv_num_pred_words": 39.25,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 40.916788704081455,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3121365437937691,
      "eval_arxiv_runtime": 10.9533,
      "eval_arxiv_samples_per_second": 45.648,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.30865046215223163,
      "eval_arxiv_token_set_f1_sem": 0.004286575269854986,
      "eval_arxiv_token_set_precision": 0.2526619170063179,
      "eval_arxiv_token_set_recall": 0.4225832185125916,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 42500
    },
    {
      "epoch": 8.16,
      "eval_python_code_alpaca_accuracy": 0.14125,
      "eval_python_code_alpaca_bleu_score": 3.5710510886767723,
      "eval_python_code_alpaca_bleu_score_sem": 0.11153592195455794,
      "eval_python_code_alpaca_emb_cos_sim": 0.7040446996688843,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008902066364563721,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.198880434036255,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.64,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.144,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.568,
      "eval_python_code_alpaca_num_pred_words": 43.582,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 24.505079780439473,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.28752798008359337,
      "eval_python_code_alpaca_runtime": 10.9883,
      "eval_python_code_alpaca_samples_per_second": 45.503,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.43116652732053623,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005447268684502152,
      "eval_python_code_alpaca_token_set_precision": 0.46050557861543545,
      "eval_python_code_alpaca_token_set_recall": 0.429471640728056,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 42500
    },
    {
      "epoch": 8.16,
      "eval_wikibio_accuracy": 0.2949375,
      "eval_wikibio_bleu_score": 5.27167073038927,
      "eval_wikibio_bleu_score_sem": 0.1909683163189286,
      "eval_wikibio_emb_cos_sim": 0.6939498782157898,
      "eval_wikibio_emb_cos_sim_sem": 0.011721736628058092,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.1994404792785645,
      "eval_wikibio_n_ngrams_match_1": 9.294,
      "eval_wikibio_n_ngrams_match_2": 2.986,
      "eval_wikibio_n_ngrams_match_3": 0.996,
      "eval_wikibio_num_pred_words": 35.432,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 66.64902909345066,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3216035112591869,
      "eval_wikibio_runtime": 10.5796,
      "eval_wikibio_samples_per_second": 47.261,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.29555198082348944,
      "eval_wikibio_token_set_f1_sem": 0.005628781659182481,
      "eval_wikibio_token_set_precision": 0.30057251601266827,
      "eval_wikibio_token_set_recall": 0.3074309043198922,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 42500
    },
    {
      "epoch": 8.16,
      "eval_nq_accuracy": 0.4941875,
      "eval_nq_bleu_score": 9.86346669547191,
      "eval_nq_bleu_score_sem": 0.41970100155541007,
      "eval_nq_emb_cos_sim": 0.7970328330993652,
      "eval_nq_emb_cos_sim_sem": 0.008574894867552828,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.434400796890259,
      "eval_nq_n_ngrams_match_1": 21.284,
      "eval_nq_n_ngrams_match_2": 7.254,
      "eval_nq_n_ngrams_match_3": 3.158,
      "eval_nq_num_pred_words": 48.788,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.408980368277076,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.40653414259813897,
      "eval_nq_runtime": 10.8612,
      "eval_nq_samples_per_second": 46.035,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.424985100738458,
      "eval_nq_token_set_f1_sem": 0.0050258083679207526,
      "eval_nq_token_set_precision": 0.3769450608963914,
      "eval_nq_token_set_recall": 0.49888037346179454,
      "eval_nq_true_num_tokens": 64.0,
      "step": 42500
    },
    {
      "epoch": 8.16,
      "learning_rate": 0.001,
      "loss": 2.7843,
      "step": 42504
    },
    {
      "epoch": 8.16,
      "learning_rate": 0.001,
      "loss": 2.7901,
      "step": 42516
    },
    {
      "epoch": 8.17,
      "learning_rate": 0.001,
      "loss": 2.7809,
      "step": 42528
    },
    {
      "epoch": 8.17,
      "learning_rate": 0.001,
      "loss": 2.7845,
      "step": 42540
    },
    {
      "epoch": 8.17,
      "learning_rate": 0.001,
      "loss": 2.7916,
      "step": 42552
    },
    {
      "epoch": 8.17,
      "learning_rate": 0.001,
      "loss": 2.7911,
      "step": 42564
    },
    {
      "epoch": 8.18,
      "learning_rate": 0.001,
      "loss": 2.7982,
      "step": 42576
    },
    {
      "epoch": 8.18,
      "learning_rate": 0.001,
      "loss": 2.7913,
      "step": 42588
    },
    {
      "epoch": 8.18,
      "learning_rate": 0.001,
      "loss": 2.7861,
      "step": 42600
    },
    {
      "epoch": 8.18,
      "learning_rate": 0.001,
      "loss": 2.7847,
      "step": 42612
    },
    {
      "epoch": 8.18,
      "learning_rate": 0.001,
      "loss": 2.7814,
      "step": 42624
    },
    {
      "epoch": 8.19,
      "learning_rate": 0.001,
      "loss": 2.7878,
      "step": 42636
    },
    {
      "epoch": 8.19,
      "learning_rate": 0.001,
      "loss": 2.7898,
      "step": 42648
    },
    {
      "epoch": 8.19,
      "learning_rate": 0.001,
      "loss": 2.7916,
      "step": 42660
    },
    {
      "epoch": 8.19,
      "learning_rate": 0.001,
      "loss": 2.7857,
      "step": 42672
    },
    {
      "epoch": 8.2,
      "learning_rate": 0.001,
      "loss": 2.7969,
      "step": 42684
    },
    {
      "epoch": 8.2,
      "learning_rate": 0.001,
      "loss": 2.7868,
      "step": 42696
    },
    {
      "epoch": 8.2,
      "learning_rate": 0.001,
      "loss": 2.7846,
      "step": 42708
    },
    {
      "epoch": 8.2,
      "learning_rate": 0.001,
      "loss": 2.7865,
      "step": 42720
    },
    {
      "epoch": 8.21,
      "learning_rate": 0.001,
      "loss": 2.7857,
      "step": 42732
    },
    {
      "epoch": 8.21,
      "learning_rate": 0.001,
      "loss": 2.7814,
      "step": 42744
    },
    {
      "epoch": 8.21,
      "learning_rate": 0.001,
      "loss": 2.7769,
      "step": 42756
    },
    {
      "epoch": 8.21,
      "learning_rate": 0.001,
      "loss": 2.7811,
      "step": 42768
    },
    {
      "epoch": 8.21,
      "learning_rate": 0.001,
      "loss": 2.7775,
      "step": 42780
    },
    {
      "epoch": 8.22,
      "learning_rate": 0.001,
      "loss": 2.7836,
      "step": 42792
    },
    {
      "epoch": 8.22,
      "learning_rate": 0.001,
      "loss": 2.7802,
      "step": 42804
    },
    {
      "epoch": 8.22,
      "learning_rate": 0.001,
      "loss": 2.7857,
      "step": 42816
    },
    {
      "epoch": 8.22,
      "learning_rate": 0.001,
      "loss": 2.7701,
      "step": 42828
    },
    {
      "epoch": 8.23,
      "learning_rate": 0.001,
      "loss": 2.7836,
      "step": 42840
    },
    {
      "epoch": 8.23,
      "learning_rate": 0.001,
      "loss": 2.7806,
      "step": 42852
    },
    {
      "epoch": 8.23,
      "learning_rate": 0.001,
      "loss": 2.7904,
      "step": 42864
    },
    {
      "epoch": 8.23,
      "learning_rate": 0.001,
      "loss": 2.7826,
      "step": 42876
    },
    {
      "epoch": 8.24,
      "learning_rate": 0.001,
      "loss": 2.7906,
      "step": 42888
    },
    {
      "epoch": 8.24,
      "learning_rate": 0.001,
      "loss": 2.7784,
      "step": 42900
    },
    {
      "epoch": 8.24,
      "learning_rate": 0.001,
      "loss": 2.787,
      "step": 42912
    },
    {
      "epoch": 8.24,
      "learning_rate": 0.001,
      "loss": 2.77,
      "step": 42924
    },
    {
      "epoch": 8.24,
      "learning_rate": 0.001,
      "loss": 2.7838,
      "step": 42936
    },
    {
      "epoch": 8.25,
      "learning_rate": 0.001,
      "loss": 2.7824,
      "step": 42948
    },
    {
      "epoch": 8.25,
      "learning_rate": 0.001,
      "loss": 2.7813,
      "step": 42960
    },
    {
      "epoch": 8.25,
      "learning_rate": 0.001,
      "loss": 2.7774,
      "step": 42972
    },
    {
      "epoch": 8.25,
      "learning_rate": 0.001,
      "loss": 2.7897,
      "step": 42984
    },
    {
      "epoch": 8.26,
      "learning_rate": 0.001,
      "loss": 2.7716,
      "step": 42996
    },
    {
      "epoch": 8.26,
      "learning_rate": 0.001,
      "loss": 2.7899,
      "step": 43008
    },
    {
      "epoch": 8.26,
      "learning_rate": 0.001,
      "loss": 2.7809,
      "step": 43020
    },
    {
      "epoch": 8.26,
      "learning_rate": 0.001,
      "loss": 2.7779,
      "step": 43032
    },
    {
      "epoch": 8.26,
      "learning_rate": 0.001,
      "loss": 2.7801,
      "step": 43044
    },
    {
      "epoch": 8.27,
      "learning_rate": 0.001,
      "loss": 2.7778,
      "step": 43056
    },
    {
      "epoch": 8.27,
      "learning_rate": 0.001,
      "loss": 2.8035,
      "step": 43068
    },
    {
      "epoch": 8.27,
      "learning_rate": 0.001,
      "loss": 2.7826,
      "step": 43080
    },
    {
      "epoch": 8.27,
      "learning_rate": 0.001,
      "loss": 2.784,
      "step": 43092
    },
    {
      "epoch": 8.28,
      "learning_rate": 0.001,
      "loss": 2.7824,
      "step": 43104
    },
    {
      "epoch": 8.28,
      "learning_rate": 0.001,
      "loss": 2.7747,
      "step": 43116
    },
    {
      "epoch": 8.28,
      "eval_ag_news_accuracy": 0.29796875,
      "eval_ag_news_bleu_score": 4.289186033478596,
      "eval_ag_news_bleu_score_sem": 0.1472273291851427,
      "eval_ag_news_emb_cos_sim": 0.7602236270904541,
      "eval_ag_news_emb_cos_sim_sem": 0.008700125008279836,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8286235332489014,
      "eval_ag_news_n_ngrams_match_1": 12.692,
      "eval_ag_news_n_ngrams_match_2": 2.608,
      "eval_ag_news_n_ngrams_match_3": 0.724,
      "eval_ag_news_num_pred_words": 46.834,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 45.99917829829024,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.30951333819984755,
      "eval_ag_news_runtime": 11.2096,
      "eval_ag_news_samples_per_second": 44.605,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.32048263955705764,
      "eval_ag_news_token_set_f1_sem": 0.004396185704757408,
      "eval_ag_news_token_set_precision": 0.2992848077245601,
      "eval_ag_news_token_set_recall": 0.35936025524546333,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 43125
    },
    {
      "epoch": 8.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.10271875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.475494185401394,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09886072913482825,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6274254322052002,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009971455059108096,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4859540462493896,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.4,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.426,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.452,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.574,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 32.653565265347964,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.18590882169211093,
      "eval_anthropic_toxic_prompts_runtime": 11.2239,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.548,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3254527599800401,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006217887336521121,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.37812043057603234,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31746609671449844,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 43125
    },
    {
      "epoch": 8.28,
      "eval_arxiv_accuracy": 0.320625,
      "eval_arxiv_bleu_score": 3.7092596609580166,
      "eval_arxiv_bleu_score_sem": 0.10988733229751972,
      "eval_arxiv_emb_cos_sim": 0.70307457447052,
      "eval_arxiv_emb_cos_sim_sem": 0.008716305446840973,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6934268474578857,
      "eval_arxiv_n_ngrams_match_1": 13.586,
      "eval_arxiv_n_ngrams_match_2": 2.464,
      "eval_arxiv_n_ngrams_match_3": 0.466,
      "eval_arxiv_num_pred_words": 40.584,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 40.182309937092974,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3200854283346377,
      "eval_arxiv_runtime": 10.5425,
      "eval_arxiv_samples_per_second": 47.427,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3161266797735679,
      "eval_arxiv_token_set_f1_sem": 0.00410605037504695,
      "eval_arxiv_token_set_precision": 0.26409995385144985,
      "eval_arxiv_token_set_recall": 0.42019213761650775,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 43125
    },
    {
      "epoch": 8.28,
      "eval_python_code_alpaca_accuracy": 0.14328125,
      "eval_python_code_alpaca_bleu_score": 3.6580480867216787,
      "eval_python_code_alpaca_bleu_score_sem": 0.12125716275977651,
      "eval_python_code_alpaca_emb_cos_sim": 0.7039538025856018,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009341827240470899,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.166282892227173,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.592,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.182,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.65,
      "eval_python_code_alpaca_num_pred_words": 43.54,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.719153640376163,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2852001742178809,
      "eval_python_code_alpaca_runtime": 10.3977,
      "eval_python_code_alpaca_samples_per_second": 48.087,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.43869896485484505,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00560460810782458,
      "eval_python_code_alpaca_token_set_precision": 0.46536812284666607,
      "eval_python_code_alpaca_token_set_recall": 0.4419565581122088,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 43125
    },
    {
      "epoch": 8.28,
      "eval_wikibio_accuracy": 0.29796875,
      "eval_wikibio_bleu_score": 5.451557623292682,
      "eval_wikibio_bleu_score_sem": 0.1839750438125555,
      "eval_wikibio_emb_cos_sim": 0.7180080413818359,
      "eval_wikibio_emb_cos_sim_sem": 0.009667340522410693,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.098819732666016,
      "eval_wikibio_n_ngrams_match_1": 9.702,
      "eval_wikibio_n_ngrams_match_2": 3.142,
      "eval_wikibio_n_ngrams_match_3": 1.104,
      "eval_wikibio_num_pred_words": 36.826,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 60.269111938402695,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3337485597739648,
      "eval_wikibio_runtime": 9.9718,
      "eval_wikibio_samples_per_second": 50.141,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3083721893814285,
      "eval_wikibio_token_set_f1_sem": 0.005254341766396219,
      "eval_wikibio_token_set_precision": 0.313680133888289,
      "eval_wikibio_token_set_recall": 0.3167676217626774,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 43125
    },
    {
      "epoch": 8.28,
      "eval_nq_accuracy": 0.49509375,
      "eval_nq_bleu_score": 9.962303375764531,
      "eval_nq_bleu_score_sem": 0.4212394240939915,
      "eval_nq_emb_cos_sim": 0.8032087087631226,
      "eval_nq_emb_cos_sim_sem": 0.007502148913106757,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.424966812133789,
      "eval_nq_n_ngrams_match_1": 21.508,
      "eval_nq_n_ngrams_match_2": 7.358,
      "eval_nq_n_ngrams_match_3": 3.202,
      "eval_nq_num_pred_words": 49.312,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.301854328625993,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4108295881109665,
      "eval_nq_runtime": 11.256,
      "eval_nq_samples_per_second": 44.421,
      "eval_nq_steps_per_second": 0.089,
      "eval_nq_token_set_f1": 0.43293763220999165,
      "eval_nq_token_set_f1_sem": 0.004937672230892428,
      "eval_nq_token_set_precision": 0.38471865967487573,
      "eval_nq_token_set_recall": 0.506869447852444,
      "eval_nq_true_num_tokens": 64.0,
      "step": 43125
    },
    {
      "epoch": 8.28,
      "learning_rate": 0.001,
      "loss": 2.7911,
      "step": 43128
    },
    {
      "epoch": 8.28,
      "learning_rate": 0.001,
      "loss": 2.7842,
      "step": 43140
    },
    {
      "epoch": 8.29,
      "learning_rate": 0.001,
      "loss": 2.7887,
      "step": 43152
    },
    {
      "epoch": 8.29,
      "learning_rate": 0.001,
      "loss": 2.7756,
      "step": 43164
    },
    {
      "epoch": 8.29,
      "learning_rate": 0.001,
      "loss": 2.7861,
      "step": 43176
    },
    {
      "epoch": 8.29,
      "learning_rate": 0.001,
      "loss": 2.7899,
      "step": 43188
    },
    {
      "epoch": 8.29,
      "learning_rate": 0.001,
      "loss": 2.7839,
      "step": 43200
    },
    {
      "epoch": 8.3,
      "learning_rate": 0.001,
      "loss": 2.7764,
      "step": 43212
    },
    {
      "epoch": 8.3,
      "learning_rate": 0.001,
      "loss": 2.7805,
      "step": 43224
    },
    {
      "epoch": 8.3,
      "learning_rate": 0.001,
      "loss": 2.7846,
      "step": 43236
    },
    {
      "epoch": 8.3,
      "learning_rate": 0.001,
      "loss": 2.7854,
      "step": 43248
    },
    {
      "epoch": 8.31,
      "learning_rate": 0.001,
      "loss": 2.7817,
      "step": 43260
    },
    {
      "epoch": 8.31,
      "learning_rate": 0.001,
      "loss": 2.7878,
      "step": 43272
    },
    {
      "epoch": 8.31,
      "learning_rate": 0.001,
      "loss": 2.783,
      "step": 43284
    },
    {
      "epoch": 8.31,
      "learning_rate": 0.001,
      "loss": 2.7924,
      "step": 43296
    },
    {
      "epoch": 8.32,
      "learning_rate": 0.001,
      "loss": 2.7858,
      "step": 43308
    },
    {
      "epoch": 8.32,
      "learning_rate": 0.001,
      "loss": 2.7888,
      "step": 43320
    },
    {
      "epoch": 8.32,
      "learning_rate": 0.001,
      "loss": 2.7817,
      "step": 43332
    },
    {
      "epoch": 8.32,
      "learning_rate": 0.001,
      "loss": 2.7796,
      "step": 43344
    },
    {
      "epoch": 8.32,
      "learning_rate": 0.001,
      "loss": 2.7882,
      "step": 43356
    },
    {
      "epoch": 8.33,
      "learning_rate": 0.001,
      "loss": 2.789,
      "step": 43368
    },
    {
      "epoch": 8.33,
      "learning_rate": 0.001,
      "loss": 2.7892,
      "step": 43380
    },
    {
      "epoch": 8.33,
      "learning_rate": 0.001,
      "loss": 2.7771,
      "step": 43392
    },
    {
      "epoch": 8.33,
      "learning_rate": 0.001,
      "loss": 2.7921,
      "step": 43404
    },
    {
      "epoch": 8.34,
      "learning_rate": 0.001,
      "loss": 2.7865,
      "step": 43416
    },
    {
      "epoch": 8.34,
      "learning_rate": 0.001,
      "loss": 2.7811,
      "step": 43428
    },
    {
      "epoch": 8.34,
      "learning_rate": 0.001,
      "loss": 2.7865,
      "step": 43440
    },
    {
      "epoch": 8.34,
      "learning_rate": 0.001,
      "loss": 2.7826,
      "step": 43452
    },
    {
      "epoch": 8.35,
      "learning_rate": 0.001,
      "loss": 2.7732,
      "step": 43464
    },
    {
      "epoch": 8.35,
      "learning_rate": 0.001,
      "loss": 2.7793,
      "step": 43476
    },
    {
      "epoch": 8.35,
      "learning_rate": 0.001,
      "loss": 2.7883,
      "step": 43488
    },
    {
      "epoch": 8.35,
      "learning_rate": 0.001,
      "loss": 2.7863,
      "step": 43500
    },
    {
      "epoch": 8.35,
      "learning_rate": 0.001,
      "loss": 2.7837,
      "step": 43512
    },
    {
      "epoch": 8.36,
      "learning_rate": 0.001,
      "loss": 2.7856,
      "step": 43524
    },
    {
      "epoch": 8.36,
      "learning_rate": 0.001,
      "loss": 2.7796,
      "step": 43536
    },
    {
      "epoch": 8.36,
      "learning_rate": 0.001,
      "loss": 2.7888,
      "step": 43548
    },
    {
      "epoch": 8.36,
      "learning_rate": 0.001,
      "loss": 2.782,
      "step": 43560
    },
    {
      "epoch": 8.37,
      "learning_rate": 0.001,
      "loss": 2.7827,
      "step": 43572
    },
    {
      "epoch": 8.37,
      "learning_rate": 0.001,
      "loss": 2.785,
      "step": 43584
    },
    {
      "epoch": 8.37,
      "learning_rate": 0.001,
      "loss": 2.7844,
      "step": 43596
    },
    {
      "epoch": 8.37,
      "learning_rate": 0.001,
      "loss": 2.7806,
      "step": 43608
    },
    {
      "epoch": 8.38,
      "learning_rate": 0.001,
      "loss": 2.781,
      "step": 43620
    },
    {
      "epoch": 8.38,
      "learning_rate": 0.001,
      "loss": 2.7777,
      "step": 43632
    },
    {
      "epoch": 8.38,
      "learning_rate": 0.001,
      "loss": 2.7923,
      "step": 43644
    },
    {
      "epoch": 8.38,
      "learning_rate": 0.001,
      "loss": 2.7845,
      "step": 43656
    },
    {
      "epoch": 8.38,
      "learning_rate": 0.001,
      "loss": 2.7809,
      "step": 43668
    },
    {
      "epoch": 8.39,
      "learning_rate": 0.001,
      "loss": 2.7774,
      "step": 43680
    },
    {
      "epoch": 8.39,
      "learning_rate": 0.001,
      "loss": 2.7963,
      "step": 43692
    },
    {
      "epoch": 8.39,
      "learning_rate": 0.001,
      "loss": 2.7849,
      "step": 43704
    },
    {
      "epoch": 8.39,
      "learning_rate": 0.001,
      "loss": 2.7833,
      "step": 43716
    },
    {
      "epoch": 8.4,
      "learning_rate": 0.001,
      "loss": 2.7779,
      "step": 43728
    },
    {
      "epoch": 8.4,
      "learning_rate": 0.001,
      "loss": 2.7859,
      "step": 43740
    },
    {
      "epoch": 8.4,
      "eval_ag_news_accuracy": 0.296375,
      "eval_ag_news_bleu_score": 4.127681050947515,
      "eval_ag_news_bleu_score_sem": 0.1394653004526821,
      "eval_ag_news_emb_cos_sim": 0.7563239336013794,
      "eval_ag_news_emb_cos_sim_sem": 0.008866142103108756,
      "eval_ag_news_emb_top1_equal": 0.171875,
      "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.838296413421631,
      "eval_ag_news_n_ngrams_match_1": 12.674,
      "eval_ag_news_n_ngrams_match_2": 2.552,
      "eval_ag_news_n_ngrams_match_3": 0.654,
      "eval_ag_news_num_pred_words": 47.252,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 46.446281740943554,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3095242437772054,
      "eval_ag_news_runtime": 12.4816,
      "eval_ag_news_samples_per_second": 40.059,
      "eval_ag_news_steps_per_second": 0.08,
      "eval_ag_news_token_set_f1": 0.31812557604777497,
      "eval_ag_news_token_set_f1_sem": 0.004140194139602665,
      "eval_ag_news_token_set_precision": 0.30030903788951346,
      "eval_ag_news_token_set_recall": 0.35390381441230057,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 43750
    },
    {
      "epoch": 8.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.10359375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.6464367895337446,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10675059715711638,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6354289054870605,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00967367769621101,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4955379962921143,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.542,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.562,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.52,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.818,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 32.968019855168535,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.186614958033128,
      "eval_anthropic_toxic_prompts_runtime": 12.1145,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.273,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.083,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3232138201309163,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005916100528997787,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.38813685524287767,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3055174839589522,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 43750
    },
    {
      "epoch": 8.4,
      "eval_arxiv_accuracy": 0.32078125,
      "eval_arxiv_bleu_score": 3.769695008452207,
      "eval_arxiv_bleu_score_sem": 0.11125574381752283,
      "eval_arxiv_emb_cos_sim": 0.7036663293838501,
      "eval_arxiv_emb_cos_sim_sem": 0.008145491579667203,
      "eval_arxiv_emb_top1_equal": 0.171875,
      "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.698707342147827,
      "eval_arxiv_n_ngrams_match_1": 13.406,
      "eval_arxiv_n_ngrams_match_2": 2.462,
      "eval_arxiv_n_ngrams_match_3": 0.52,
      "eval_arxiv_num_pred_words": 40.34,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 40.395053612932166,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.31429062003146413,
      "eval_arxiv_runtime": 13.2845,
      "eval_arxiv_samples_per_second": 37.638,
      "eval_arxiv_steps_per_second": 0.075,
      "eval_arxiv_token_set_f1": 0.31195369477363527,
      "eval_arxiv_token_set_f1_sem": 0.004173339921038991,
      "eval_arxiv_token_set_precision": 0.26016474718564564,
      "eval_arxiv_token_set_recall": 0.4105074224751023,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 43750
    },
    {
      "epoch": 8.4,
      "eval_python_code_alpaca_accuracy": 0.1455625,
      "eval_python_code_alpaca_bleu_score": 3.6777738058875826,
      "eval_python_code_alpaca_bleu_score_sem": 0.11551648618370353,
      "eval_python_code_alpaca_emb_cos_sim": 0.7076461315155029,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008949351177325897,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.1563408374786377,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.81,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.23,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.628,
      "eval_python_code_alpaca_num_pred_words": 44.006,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.484504894441393,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29306394825778914,
      "eval_python_code_alpaca_runtime": 11.5314,
      "eval_python_code_alpaca_samples_per_second": 43.36,
      "eval_python_code_alpaca_steps_per_second": 0.087,
      "eval_python_code_alpaca_token_set_f1": 0.43354780343817956,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005401078005187498,
      "eval_python_code_alpaca_token_set_precision": 0.4809611521464254,
      "eval_python_code_alpaca_token_set_recall": 0.4182455721870356,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 43750
    },
    {
      "epoch": 8.4,
      "eval_wikibio_accuracy": 0.29396875,
      "eval_wikibio_bleu_score": 5.452889389272654,
      "eval_wikibio_bleu_score_sem": 0.18067991348148654,
      "eval_wikibio_emb_cos_sim": 0.7179189920425415,
      "eval_wikibio_emb_cos_sim_sem": 0.010318616674658805,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.226851463317871,
      "eval_wikibio_n_ngrams_match_1": 10.146,
      "eval_wikibio_n_ngrams_match_2": 3.27,
      "eval_wikibio_n_ngrams_match_3": 1.132,
      "eval_wikibio_num_pred_words": 38.906,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 68.501213697156,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3433434774041647,
      "eval_wikibio_runtime": 11.5794,
      "eval_wikibio_samples_per_second": 43.18,
      "eval_wikibio_steps_per_second": 0.086,
      "eval_wikibio_token_set_f1": 0.3132033298013379,
      "eval_wikibio_token_set_f1_sem": 0.0050542039627546895,
      "eval_wikibio_token_set_precision": 0.3283312891350125,
      "eval_wikibio_token_set_recall": 0.3106371168149282,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 43750
    },
    {
      "epoch": 8.4,
      "eval_nq_accuracy": 0.496625,
      "eval_nq_bleu_score": 10.234519340711536,
      "eval_nq_bleu_score_sem": 0.41931663053445006,
      "eval_nq_emb_cos_sim": 0.7993790507316589,
      "eval_nq_emb_cos_sim_sem": 0.008028297657571105,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4206948280334473,
      "eval_nq_n_ngrams_match_1": 21.614,
      "eval_nq_n_ngrams_match_2": 7.53,
      "eval_nq_n_ngrams_match_3": 3.316,
      "eval_nq_num_pred_words": 49.404,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.253675968494578,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4116959281410013,
      "eval_nq_runtime": 12.5266,
      "eval_nq_samples_per_second": 39.915,
      "eval_nq_steps_per_second": 0.08,
      "eval_nq_token_set_f1": 0.43264911018815577,
      "eval_nq_token_set_f1_sem": 0.004965873225720561,
      "eval_nq_token_set_precision": 0.38849705980467175,
      "eval_nq_token_set_recall": 0.49799832891518625,
      "eval_nq_true_num_tokens": 64.0,
      "step": 43750
    },
    {
      "epoch": 8.4,
      "learning_rate": 0.001,
      "loss": 2.7913,
      "step": 43752
    },
    {
      "epoch": 8.4,
      "learning_rate": 0.001,
      "loss": 2.7918,
      "step": 43764
    },
    {
      "epoch": 8.41,
      "learning_rate": 0.001,
      "loss": 2.784,
      "step": 43776
    },
    {
      "epoch": 8.41,
      "learning_rate": 0.001,
      "loss": 2.7799,
      "step": 43788
    },
    {
      "epoch": 8.41,
      "learning_rate": 0.001,
      "loss": 2.7774,
      "step": 43800
    },
    {
      "epoch": 8.41,
      "learning_rate": 0.001,
      "loss": 2.7851,
      "step": 43812
    },
    {
      "epoch": 8.41,
      "learning_rate": 0.001,
      "loss": 2.7882,
      "step": 43824
    },
    {
      "epoch": 8.42,
      "learning_rate": 0.001,
      "loss": 2.7819,
      "step": 43836
    },
    {
      "epoch": 8.42,
      "learning_rate": 0.001,
      "loss": 2.7885,
      "step": 43848
    },
    {
      "epoch": 8.42,
      "learning_rate": 0.001,
      "loss": 2.7754,
      "step": 43860
    },
    {
      "epoch": 8.42,
      "learning_rate": 0.001,
      "loss": 2.7903,
      "step": 43872
    },
    {
      "epoch": 8.43,
      "learning_rate": 0.001,
      "loss": 2.7701,
      "step": 43884
    },
    {
      "epoch": 8.43,
      "learning_rate": 0.001,
      "loss": 2.7852,
      "step": 43896
    },
    {
      "epoch": 8.43,
      "learning_rate": 0.001,
      "loss": 2.7846,
      "step": 43908
    },
    {
      "epoch": 8.43,
      "learning_rate": 0.001,
      "loss": 2.7845,
      "step": 43920
    },
    {
      "epoch": 8.44,
      "learning_rate": 0.001,
      "loss": 2.7871,
      "step": 43932
    },
    {
      "epoch": 8.44,
      "learning_rate": 0.001,
      "loss": 2.7864,
      "step": 43944
    },
    {
      "epoch": 8.44,
      "learning_rate": 0.001,
      "loss": 2.781,
      "step": 43956
    },
    {
      "epoch": 8.44,
      "learning_rate": 0.001,
      "loss": 2.7906,
      "step": 43968
    },
    {
      "epoch": 8.44,
      "learning_rate": 0.001,
      "loss": 2.7856,
      "step": 43980
    },
    {
      "epoch": 8.45,
      "learning_rate": 0.001,
      "loss": 2.7854,
      "step": 43992
    },
    {
      "epoch": 8.45,
      "learning_rate": 0.001,
      "loss": 2.781,
      "step": 44004
    },
    {
      "epoch": 8.45,
      "learning_rate": 0.001,
      "loss": 2.7868,
      "step": 44016
    },
    {
      "epoch": 8.45,
      "learning_rate": 0.001,
      "loss": 2.7853,
      "step": 44028
    },
    {
      "epoch": 8.46,
      "learning_rate": 0.001,
      "loss": 2.7719,
      "step": 44040
    },
    {
      "epoch": 8.46,
      "learning_rate": 0.001,
      "loss": 2.7774,
      "step": 44052
    },
    {
      "epoch": 8.46,
      "learning_rate": 0.001,
      "loss": 2.7786,
      "step": 44064
    },
    {
      "epoch": 8.46,
      "learning_rate": 0.001,
      "loss": 2.7846,
      "step": 44076
    },
    {
      "epoch": 8.47,
      "learning_rate": 0.001,
      "loss": 2.7842,
      "step": 44088
    },
    {
      "epoch": 8.47,
      "learning_rate": 0.001,
      "loss": 2.7916,
      "step": 44100
    },
    {
      "epoch": 8.47,
      "learning_rate": 0.001,
      "loss": 2.7861,
      "step": 44112
    },
    {
      "epoch": 8.47,
      "learning_rate": 0.001,
      "loss": 2.7797,
      "step": 44124
    },
    {
      "epoch": 8.47,
      "learning_rate": 0.001,
      "loss": 2.7787,
      "step": 44136
    },
    {
      "epoch": 8.48,
      "learning_rate": 0.001,
      "loss": 2.7876,
      "step": 44148
    },
    {
      "epoch": 8.48,
      "learning_rate": 0.001,
      "loss": 2.7783,
      "step": 44160
    },
    {
      "epoch": 8.48,
      "learning_rate": 0.001,
      "loss": 2.7773,
      "step": 44172
    },
    {
      "epoch": 8.48,
      "learning_rate": 0.001,
      "loss": 2.7775,
      "step": 44184
    },
    {
      "epoch": 8.49,
      "learning_rate": 0.001,
      "loss": 2.7783,
      "step": 44196
    },
    {
      "epoch": 8.49,
      "learning_rate": 0.001,
      "loss": 2.7855,
      "step": 44208
    },
    {
      "epoch": 8.49,
      "learning_rate": 0.001,
      "loss": 2.781,
      "step": 44220
    },
    {
      "epoch": 8.49,
      "learning_rate": 0.001,
      "loss": 2.7758,
      "step": 44232
    },
    {
      "epoch": 8.5,
      "learning_rate": 0.001,
      "loss": 2.7756,
      "step": 44244
    },
    {
      "epoch": 8.5,
      "learning_rate": 0.001,
      "loss": 2.7865,
      "step": 44256
    },
    {
      "epoch": 8.5,
      "learning_rate": 0.001,
      "loss": 2.7716,
      "step": 44268
    },
    {
      "epoch": 8.5,
      "learning_rate": 0.001,
      "loss": 2.7787,
      "step": 44280
    },
    {
      "epoch": 8.5,
      "learning_rate": 0.001,
      "loss": 2.7825,
      "step": 44292
    },
    {
      "epoch": 8.51,
      "learning_rate": 0.001,
      "loss": 2.7846,
      "step": 44304
    },
    {
      "epoch": 8.51,
      "learning_rate": 0.001,
      "loss": 2.7797,
      "step": 44316
    },
    {
      "epoch": 8.51,
      "learning_rate": 0.001,
      "loss": 2.7888,
      "step": 44328
    },
    {
      "epoch": 8.51,
      "learning_rate": 0.001,
      "loss": 2.7689,
      "step": 44340
    },
    {
      "epoch": 8.52,
      "learning_rate": 0.001,
      "loss": 2.7922,
      "step": 44352
    },
    {
      "epoch": 8.52,
      "learning_rate": 0.001,
      "loss": 2.77,
      "step": 44364
    },
    {
      "epoch": 8.52,
      "eval_ag_news_accuracy": 0.2985625,
      "eval_ag_news_bleu_score": 4.291809719559645,
      "eval_ag_news_bleu_score_sem": 0.14202437651325187,
      "eval_ag_news_emb_cos_sim": 0.7695584297180176,
      "eval_ag_news_emb_cos_sim_sem": 0.00787921240058693,
      "eval_ag_news_emb_top1_equal": 0.1484375,
      "eval_ag_news_emb_top1_equal_sem": 0.031548465007086954,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.81347393989563,
      "eval_ag_news_n_ngrams_match_1": 12.722,
      "eval_ag_news_n_ngrams_match_2": 2.614,
      "eval_ag_news_n_ngrams_match_3": 0.71,
      "eval_ag_news_num_pred_words": 46.164,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 45.307561536541535,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.31273392151992785,
      "eval_ag_news_runtime": 10.801,
      "eval_ag_news_samples_per_second": 46.292,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.3229267044627019,
      "eval_ag_news_token_set_f1_sem": 0.004233260819430299,
      "eval_ag_news_token_set_precision": 0.2997766880442096,
      "eval_ag_news_token_set_recall": 0.36729325100895005,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 44375
    },
    {
      "epoch": 8.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.10415625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.659425178421464,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10760492846382186,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6349513530731201,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010008689824133946,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4967308044433594,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.478,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.554,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.528,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.546,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 33.00736784061575,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.18674929383812144,
      "eval_anthropic_toxic_prompts_runtime": 10.3269,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.417,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3274998293274113,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006351060424454388,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3814174997872746,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32270478698508404,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 44375
    },
    {
      "epoch": 8.52,
      "eval_arxiv_accuracy": 0.32190625,
      "eval_arxiv_bleu_score": 3.8319262827744947,
      "eval_arxiv_bleu_score_sem": 0.1095366128052342,
      "eval_arxiv_emb_cos_sim": 0.7129121422767639,
      "eval_arxiv_emb_cos_sim_sem": 0.007783770862134011,
      "eval_arxiv_emb_top1_equal": 0.203125,
      "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6637754440307617,
      "eval_arxiv_n_ngrams_match_1": 13.664,
      "eval_arxiv_n_ngrams_match_2": 2.506,
      "eval_arxiv_n_ngrams_match_3": 0.538,
      "eval_arxiv_num_pred_words": 40.39,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 39.0083390095431,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.32292598975976605,
      "eval_arxiv_runtime": 10.6021,
      "eval_arxiv_samples_per_second": 47.16,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.3208577635863141,
      "eval_arxiv_token_set_f1_sem": 0.0040449914744743855,
      "eval_arxiv_token_set_precision": 0.26562525757372335,
      "eval_arxiv_token_set_recall": 0.42531107879627794,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 44375
    },
    {
      "epoch": 8.52,
      "eval_python_code_alpaca_accuracy": 0.1444375,
      "eval_python_code_alpaca_bleu_score": 3.561220880741698,
      "eval_python_code_alpaca_bleu_score_sem": 0.1016785175749165,
      "eval_python_code_alpaca_emb_cos_sim": 0.7064552903175354,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008590842181358136,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.161862850189209,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.456,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.076,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.542,
      "eval_python_code_alpaca_num_pred_words": 42.658,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.614545341030077,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2855763855082162,
      "eval_python_code_alpaca_runtime": 11.0184,
      "eval_python_code_alpaca_samples_per_second": 45.379,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4337527075136103,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005524804980645295,
      "eval_python_code_alpaca_token_set_precision": 0.456940664183077,
      "eval_python_code_alpaca_token_set_recall": 0.441517397443801,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 44375
    },
    {
      "epoch": 8.52,
      "eval_wikibio_accuracy": 0.2968125,
      "eval_wikibio_bleu_score": 5.070998927409954,
      "eval_wikibio_bleu_score_sem": 0.18104906205014448,
      "eval_wikibio_emb_cos_sim": 0.7035855054855347,
      "eval_wikibio_emb_cos_sim_sem": 0.011877870945455714,
      "eval_wikibio_emb_top1_equal": 0.109375,
      "eval_wikibio_emb_top1_equal_sem": 0.027695207821224692,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.074985027313232,
      "eval_wikibio_n_ngrams_match_1": 9.364,
      "eval_wikibio_n_ngrams_match_2": 2.972,
      "eval_wikibio_n_ngrams_match_3": 1.03,
      "eval_wikibio_num_pred_words": 36.584,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 58.84959944513198,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.31476794312823275,
      "eval_wikibio_runtime": 10.1544,
      "eval_wikibio_samples_per_second": 49.24,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.2935711021463623,
      "eval_wikibio_token_set_f1_sem": 0.005645602466548096,
      "eval_wikibio_token_set_precision": 0.3020569331321563,
      "eval_wikibio_token_set_recall": 0.3023993335384537,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 44375
    },
    {
      "epoch": 8.52,
      "eval_nq_accuracy": 0.49346875,
      "eval_nq_bleu_score": 10.24725951532769,
      "eval_nq_bleu_score_sem": 0.42530713635991835,
      "eval_nq_emb_cos_sim": 0.7986217141151428,
      "eval_nq_emb_cos_sim_sem": 0.007920463628566968,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4164085388183594,
      "eval_nq_n_ngrams_match_1": 21.634,
      "eval_nq_n_ngrams_match_2": 7.536,
      "eval_nq_n_ngrams_match_3": 3.352,
      "eval_nq_num_pred_words": 49.11,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.20554268883261,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.41185383705597056,
      "eval_nq_runtime": 10.7717,
      "eval_nq_samples_per_second": 46.418,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.43351899242381453,
      "eval_nq_token_set_f1_sem": 0.00510057992256785,
      "eval_nq_token_set_precision": 0.38465580979919245,
      "eval_nq_token_set_recall": 0.5083314649684932,
      "eval_nq_true_num_tokens": 64.0,
      "step": 44375
    },
    {
      "epoch": 8.52,
      "learning_rate": 0.001,
      "loss": 2.7874,
      "step": 44376
    },
    {
      "epoch": 8.52,
      "learning_rate": 0.001,
      "loss": 2.7874,
      "step": 44388
    },
    {
      "epoch": 8.53,
      "learning_rate": 0.001,
      "loss": 2.7789,
      "step": 44400
    },
    {
      "epoch": 8.53,
      "learning_rate": 0.001,
      "loss": 2.7753,
      "step": 44412
    },
    {
      "epoch": 8.53,
      "learning_rate": 0.001,
      "loss": 2.7847,
      "step": 44424
    },
    {
      "epoch": 8.53,
      "learning_rate": 0.001,
      "loss": 2.7791,
      "step": 44436
    },
    {
      "epoch": 8.53,
      "learning_rate": 0.001,
      "loss": 2.7838,
      "step": 44448
    },
    {
      "epoch": 8.54,
      "learning_rate": 0.001,
      "loss": 2.7905,
      "step": 44460
    },
    {
      "epoch": 8.54,
      "learning_rate": 0.001,
      "loss": 2.7815,
      "step": 44472
    },
    {
      "epoch": 8.54,
      "learning_rate": 0.001,
      "loss": 2.786,
      "step": 44484
    },
    {
      "epoch": 8.54,
      "learning_rate": 0.001,
      "loss": 2.7862,
      "step": 44496
    },
    {
      "epoch": 8.55,
      "learning_rate": 0.001,
      "loss": 2.7911,
      "step": 44508
    },
    {
      "epoch": 8.55,
      "learning_rate": 0.001,
      "loss": 2.7829,
      "step": 44520
    },
    {
      "epoch": 8.55,
      "learning_rate": 0.001,
      "loss": 2.7816,
      "step": 44532
    },
    {
      "epoch": 8.55,
      "learning_rate": 0.001,
      "loss": 2.7829,
      "step": 44544
    },
    {
      "epoch": 8.56,
      "learning_rate": 0.001,
      "loss": 2.7761,
      "step": 44556
    },
    {
      "epoch": 8.56,
      "learning_rate": 0.001,
      "loss": 2.7737,
      "step": 44568
    },
    {
      "epoch": 8.56,
      "learning_rate": 0.001,
      "loss": 2.7638,
      "step": 44580
    },
    {
      "epoch": 8.56,
      "learning_rate": 0.001,
      "loss": 2.776,
      "step": 44592
    },
    {
      "epoch": 8.56,
      "learning_rate": 0.001,
      "loss": 2.7793,
      "step": 44604
    },
    {
      "epoch": 8.57,
      "learning_rate": 0.001,
      "loss": 2.7748,
      "step": 44616
    },
    {
      "epoch": 8.57,
      "learning_rate": 0.001,
      "loss": 2.7686,
      "step": 44628
    },
    {
      "epoch": 8.57,
      "learning_rate": 0.001,
      "loss": 2.7773,
      "step": 44640
    },
    {
      "epoch": 8.57,
      "learning_rate": 0.001,
      "loss": 2.7829,
      "step": 44652
    },
    {
      "epoch": 8.58,
      "learning_rate": 0.001,
      "loss": 2.7753,
      "step": 44664
    },
    {
      "epoch": 8.58,
      "learning_rate": 0.001,
      "loss": 2.7822,
      "step": 44676
    },
    {
      "epoch": 8.58,
      "learning_rate": 0.001,
      "loss": 2.773,
      "step": 44688
    },
    {
      "epoch": 8.58,
      "learning_rate": 0.001,
      "loss": 2.7747,
      "step": 44700
    },
    {
      "epoch": 8.59,
      "learning_rate": 0.001,
      "loss": 2.7878,
      "step": 44712
    },
    {
      "epoch": 8.59,
      "learning_rate": 0.001,
      "loss": 2.7755,
      "step": 44724
    },
    {
      "epoch": 8.59,
      "learning_rate": 0.001,
      "loss": 2.7809,
      "step": 44736
    },
    {
      "epoch": 8.59,
      "learning_rate": 0.001,
      "loss": 2.772,
      "step": 44748
    },
    {
      "epoch": 8.59,
      "learning_rate": 0.001,
      "loss": 2.7662,
      "step": 44760
    },
    {
      "epoch": 8.6,
      "learning_rate": 0.001,
      "loss": 2.779,
      "step": 44772
    },
    {
      "epoch": 8.6,
      "learning_rate": 0.001,
      "loss": 2.7859,
      "step": 44784
    },
    {
      "epoch": 8.6,
      "learning_rate": 0.001,
      "loss": 2.7768,
      "step": 44796
    },
    {
      "epoch": 8.6,
      "learning_rate": 0.001,
      "loss": 2.7768,
      "step": 44808
    },
    {
      "epoch": 8.61,
      "learning_rate": 0.001,
      "loss": 2.7768,
      "step": 44820
    },
    {
      "epoch": 8.61,
      "learning_rate": 0.001,
      "loss": 2.7815,
      "step": 44832
    },
    {
      "epoch": 8.61,
      "learning_rate": 0.001,
      "loss": 2.7897,
      "step": 44844
    },
    {
      "epoch": 8.61,
      "learning_rate": 0.001,
      "loss": 2.7753,
      "step": 44856
    },
    {
      "epoch": 8.62,
      "learning_rate": 0.001,
      "loss": 2.772,
      "step": 44868
    },
    {
      "epoch": 8.62,
      "learning_rate": 0.001,
      "loss": 2.7694,
      "step": 44880
    },
    {
      "epoch": 8.62,
      "learning_rate": 0.001,
      "loss": 2.7731,
      "step": 44892
    },
    {
      "epoch": 8.62,
      "learning_rate": 0.001,
      "loss": 2.7706,
      "step": 44904
    },
    {
      "epoch": 8.62,
      "learning_rate": 0.001,
      "loss": 2.7756,
      "step": 44916
    },
    {
      "epoch": 8.63,
      "learning_rate": 0.001,
      "loss": 2.7916,
      "step": 44928
    },
    {
      "epoch": 8.63,
      "learning_rate": 0.001,
      "loss": 2.7768,
      "step": 44940
    },
    {
      "epoch": 8.63,
      "learning_rate": 0.001,
      "loss": 2.7844,
      "step": 44952
    },
    {
      "epoch": 8.63,
      "learning_rate": 0.001,
      "loss": 2.7745,
      "step": 44964
    },
    {
      "epoch": 8.64,
      "learning_rate": 0.001,
      "loss": 2.7755,
      "step": 44976
    },
    {
      "epoch": 8.64,
      "learning_rate": 0.001,
      "loss": 2.7729,
      "step": 44988
    },
    {
      "epoch": 8.64,
      "learning_rate": 0.001,
      "loss": 2.7853,
      "step": 45000
    },
    {
      "epoch": 8.64,
      "eval_ag_news_accuracy": 0.29778125,
      "eval_ag_news_bleu_score": 4.252840525792193,
      "eval_ag_news_bleu_score_sem": 0.14399064432957762,
      "eval_ag_news_emb_cos_sim": 0.761573076248169,
      "eval_ag_news_emb_cos_sim_sem": 0.009271340587603476,
      "eval_ag_news_emb_top1_equal": 0.1640625,
      "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8181345462799072,
      "eval_ag_news_n_ngrams_match_1": 12.546,
      "eval_ag_news_n_ngrams_match_2": 2.49,
      "eval_ag_news_n_ngrams_match_3": 0.654,
      "eval_ag_news_num_pred_words": 45.39,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 45.519215080909994,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3099763610330088,
      "eval_ag_news_runtime": 10.9598,
      "eval_ag_news_samples_per_second": 45.621,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.31806577721666496,
      "eval_ag_news_token_set_f1_sem": 0.004402298022498817,
      "eval_ag_news_token_set_precision": 0.2957539883176623,
      "eval_ag_news_token_set_recall": 0.3604130285861673,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 45000
    },
    {
      "epoch": 8.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.1025,
      "eval_anthropic_toxic_prompts_bleu_score": 2.5315821913123773,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10042717556961148,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6284630298614502,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01012618865233724,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.520474672317505,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.31,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.494,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.466,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.468,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 33.80046880345548,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.18441072889578983,
      "eval_anthropic_toxic_prompts_runtime": 10.6014,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.163,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3225545158462773,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006462191919774411,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.38040121224395923,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31025760395916663,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 45000
    },
    {
      "epoch": 8.64,
      "eval_arxiv_accuracy": 0.321375,
      "eval_arxiv_bleu_score": 3.6814723018023328,
      "eval_arxiv_bleu_score_sem": 0.1039049498476154,
      "eval_arxiv_emb_cos_sim": 0.7027502059936523,
      "eval_arxiv_emb_cos_sim_sem": 0.007261334991605668,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6967861652374268,
      "eval_arxiv_n_ngrams_match_1": 13.286,
      "eval_arxiv_n_ngrams_match_2": 2.462,
      "eval_arxiv_n_ngrams_match_3": 0.474,
      "eval_arxiv_num_pred_words": 39.318,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 40.31752206839012,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.31524613343789953,
      "eval_arxiv_runtime": 10.9257,
      "eval_arxiv_samples_per_second": 45.764,
      "eval_arxiv_steps_per_second": 0.092,
      "eval_arxiv_token_set_f1": 0.3151740392038086,
      "eval_arxiv_token_set_f1_sem": 0.004050397018570638,
      "eval_arxiv_token_set_precision": 0.2599937548131332,
      "eval_arxiv_token_set_recall": 0.42169620401308466,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 45000
    },
    {
      "epoch": 8.64,
      "eval_python_code_alpaca_accuracy": 0.14265625,
      "eval_python_code_alpaca_bleu_score": 3.8009433394205474,
      "eval_python_code_alpaca_bleu_score_sem": 0.12128623274109651,
      "eval_python_code_alpaca_emb_cos_sim": 0.6998119950294495,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009761663193744548,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.148423671722412,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.656,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.17,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.6,
      "eval_python_code_alpaca_num_pred_words": 42.156,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.299308260081176,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2961311542876535,
      "eval_python_code_alpaca_runtime": 10.8895,
      "eval_python_code_alpaca_samples_per_second": 45.916,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.4358760336980557,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005442843303239688,
      "eval_python_code_alpaca_token_set_precision": 0.46578876224594257,
      "eval_python_code_alpaca_token_set_recall": 0.4349771077123037,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 45000
    },
    {
      "epoch": 8.64,
      "eval_wikibio_accuracy": 0.294125,
      "eval_wikibio_bleu_score": 5.662584307024523,
      "eval_wikibio_bleu_score_sem": 0.20226142434278857,
      "eval_wikibio_emb_cos_sim": 0.6998697519302368,
      "eval_wikibio_emb_cos_sim_sem": 0.010777700563519212,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.150162220001221,
      "eval_wikibio_n_ngrams_match_1": 9.824,
      "eval_wikibio_n_ngrams_match_2": 3.28,
      "eval_wikibio_n_ngrams_match_3": 1.194,
      "eval_wikibio_num_pred_words": 37.482,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 63.44429139641754,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33701230416048844,
      "eval_wikibio_runtime": 12.8761,
      "eval_wikibio_samples_per_second": 38.832,
      "eval_wikibio_steps_per_second": 0.078,
      "eval_wikibio_token_set_f1": 0.30976859066711854,
      "eval_wikibio_token_set_f1_sem": 0.005179778718883501,
      "eval_wikibio_token_set_precision": 0.31861442959538344,
      "eval_wikibio_token_set_recall": 0.3155990255051859,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 45000
    },
    {
      "epoch": 8.64,
      "eval_nq_accuracy": 0.4961875,
      "eval_nq_bleu_score": 10.13577607583043,
      "eval_nq_bleu_score_sem": 0.4370365677875096,
      "eval_nq_emb_cos_sim": 0.798082172870636,
      "eval_nq_emb_cos_sim_sem": 0.007582819956790132,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.411942958831787,
      "eval_nq_n_ngrams_match_1": 21.49,
      "eval_nq_n_ngrams_match_2": 7.38,
      "eval_nq_n_ngrams_match_3": 3.282,
      "eval_nq_num_pred_words": 48.64,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.155615002669718,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.41128913931593736,
      "eval_nq_runtime": 11.0077,
      "eval_nq_samples_per_second": 45.423,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4320660897067614,
      "eval_nq_token_set_f1_sem": 0.005146433859068285,
      "eval_nq_token_set_precision": 0.3832323142103081,
      "eval_nq_token_set_recall": 0.5062301708201041,
      "eval_nq_true_num_tokens": 64.0,
      "step": 45000
    },
    {
      "epoch": 8.64,
      "learning_rate": 0.001,
      "loss": 2.7753,
      "step": 45012
    },
    {
      "epoch": 8.65,
      "learning_rate": 0.001,
      "loss": 2.7798,
      "step": 45024
    },
    {
      "epoch": 8.65,
      "learning_rate": 0.001,
      "loss": 2.7824,
      "step": 45036
    },
    {
      "epoch": 8.65,
      "learning_rate": 0.001,
      "loss": 2.7792,
      "step": 45048
    },
    {
      "epoch": 8.65,
      "learning_rate": 0.001,
      "loss": 2.7785,
      "step": 45060
    },
    {
      "epoch": 8.65,
      "learning_rate": 0.001,
      "loss": 2.7735,
      "step": 45072
    },
    {
      "epoch": 8.66,
      "learning_rate": 0.001,
      "loss": 2.7846,
      "step": 45084
    },
    {
      "epoch": 8.66,
      "learning_rate": 0.001,
      "loss": 2.7724,
      "step": 45096
    },
    {
      "epoch": 8.66,
      "learning_rate": 0.001,
      "loss": 2.7724,
      "step": 45108
    },
    {
      "epoch": 8.66,
      "learning_rate": 0.001,
      "loss": 2.7705,
      "step": 45120
    },
    {
      "epoch": 8.67,
      "learning_rate": 0.001,
      "loss": 2.7803,
      "step": 45132
    },
    {
      "epoch": 8.67,
      "learning_rate": 0.001,
      "loss": 2.7714,
      "step": 45144
    },
    {
      "epoch": 8.67,
      "learning_rate": 0.001,
      "loss": 2.782,
      "step": 45156
    },
    {
      "epoch": 8.67,
      "learning_rate": 0.001,
      "loss": 2.7797,
      "step": 45168
    },
    {
      "epoch": 8.68,
      "learning_rate": 0.001,
      "loss": 2.7732,
      "step": 45180
    },
    {
      "epoch": 8.68,
      "learning_rate": 0.001,
      "loss": 2.7729,
      "step": 45192
    },
    {
      "epoch": 8.68,
      "learning_rate": 0.001,
      "loss": 2.7803,
      "step": 45204
    },
    {
      "epoch": 8.68,
      "learning_rate": 0.001,
      "loss": 2.7669,
      "step": 45216
    },
    {
      "epoch": 8.68,
      "learning_rate": 0.001,
      "loss": 2.7791,
      "step": 45228
    },
    {
      "epoch": 8.69,
      "learning_rate": 0.001,
      "loss": 2.7834,
      "step": 45240
    },
    {
      "epoch": 8.69,
      "learning_rate": 0.001,
      "loss": 2.7811,
      "step": 45252
    },
    {
      "epoch": 8.69,
      "learning_rate": 0.001,
      "loss": 2.7766,
      "step": 45264
    },
    {
      "epoch": 8.69,
      "learning_rate": 0.001,
      "loss": 2.7703,
      "step": 45276
    },
    {
      "epoch": 8.7,
      "learning_rate": 0.001,
      "loss": 2.7712,
      "step": 45288
    },
    {
      "epoch": 8.7,
      "learning_rate": 0.001,
      "loss": 2.7797,
      "step": 45300
    },
    {
      "epoch": 8.7,
      "learning_rate": 0.001,
      "loss": 2.7919,
      "step": 45312
    },
    {
      "epoch": 8.7,
      "learning_rate": 0.001,
      "loss": 2.7725,
      "step": 45324
    },
    {
      "epoch": 8.71,
      "learning_rate": 0.001,
      "loss": 2.7795,
      "step": 45336
    },
    {
      "epoch": 8.71,
      "learning_rate": 0.001,
      "loss": 2.7747,
      "step": 45348
    },
    {
      "epoch": 8.71,
      "learning_rate": 0.001,
      "loss": 2.7695,
      "step": 45360
    },
    {
      "epoch": 8.71,
      "learning_rate": 0.001,
      "loss": 2.7867,
      "step": 45372
    },
    {
      "epoch": 8.71,
      "learning_rate": 0.001,
      "loss": 2.7756,
      "step": 45384
    },
    {
      "epoch": 8.72,
      "learning_rate": 0.001,
      "loss": 2.7844,
      "step": 45396
    },
    {
      "epoch": 8.72,
      "learning_rate": 0.001,
      "loss": 2.7747,
      "step": 45408
    },
    {
      "epoch": 8.72,
      "learning_rate": 0.001,
      "loss": 2.7765,
      "step": 45420
    },
    {
      "epoch": 8.72,
      "learning_rate": 0.001,
      "loss": 2.7729,
      "step": 45432
    },
    {
      "epoch": 8.73,
      "learning_rate": 0.001,
      "loss": 2.78,
      "step": 45444
    },
    {
      "epoch": 8.73,
      "learning_rate": 0.001,
      "loss": 2.779,
      "step": 45456
    },
    {
      "epoch": 8.73,
      "learning_rate": 0.001,
      "loss": 2.7784,
      "step": 45468
    },
    {
      "epoch": 8.73,
      "learning_rate": 0.001,
      "loss": 2.7881,
      "step": 45480
    },
    {
      "epoch": 8.74,
      "learning_rate": 0.001,
      "loss": 2.776,
      "step": 45492
    },
    {
      "epoch": 8.74,
      "learning_rate": 0.001,
      "loss": 2.775,
      "step": 45504
    },
    {
      "epoch": 8.74,
      "learning_rate": 0.001,
      "loss": 2.7809,
      "step": 45516
    },
    {
      "epoch": 8.74,
      "learning_rate": 0.001,
      "loss": 2.7689,
      "step": 45528
    },
    {
      "epoch": 8.74,
      "learning_rate": 0.001,
      "loss": 2.7737,
      "step": 45540
    },
    {
      "epoch": 8.75,
      "learning_rate": 0.001,
      "loss": 2.7702,
      "step": 45552
    },
    {
      "epoch": 8.75,
      "learning_rate": 0.001,
      "loss": 2.7819,
      "step": 45564
    },
    {
      "epoch": 8.75,
      "learning_rate": 0.001,
      "loss": 2.7718,
      "step": 45576
    },
    {
      "epoch": 8.75,
      "learning_rate": 0.001,
      "loss": 2.7724,
      "step": 45588
    },
    {
      "epoch": 8.76,
      "learning_rate": 0.001,
      "loss": 2.7775,
      "step": 45600
    },
    {
      "epoch": 8.76,
      "learning_rate": 0.001,
      "loss": 2.7866,
      "step": 45612
    },
    {
      "epoch": 8.76,
      "learning_rate": 0.001,
      "loss": 2.7763,
      "step": 45624
    },
    {
      "epoch": 8.76,
      "eval_ag_news_accuracy": 0.2981875,
      "eval_ag_news_bleu_score": 4.3578400637933274,
      "eval_ag_news_bleu_score_sem": 0.14883707926161324,
      "eval_ag_news_emb_cos_sim": 0.763740062713623,
      "eval_ag_news_emb_cos_sim_sem": 0.008077497517762496,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.824906826019287,
      "eval_ag_news_n_ngrams_match_1": 12.81,
      "eval_ag_news_n_ngrams_match_2": 2.642,
      "eval_ag_news_n_ngrams_match_3": 0.728,
      "eval_ag_news_num_pred_words": 46.29,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 45.828530140816156,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3138468381859568,
      "eval_ag_news_runtime": 10.7475,
      "eval_ag_news_samples_per_second": 46.522,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.3250934128970296,
      "eval_ag_news_token_set_f1_sem": 0.004294252512354449,
      "eval_ag_news_token_set_precision": 0.30407289679426225,
      "eval_ag_news_token_set_recall": 0.3649210660592528,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 45625
    },
    {
      "epoch": 8.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.103875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.6399002406813015,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10639424604383535,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6314331293106079,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008894787472116989,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4990384578704834,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.474,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.53,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.538,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.836,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 33.08362536025581,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.18654160059443092,
      "eval_anthropic_toxic_prompts_runtime": 11.0286,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.337,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.091,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3312069635362678,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006540513237655385,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.38125365875602346,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32555335433824906,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 45625
    },
    {
      "epoch": 8.76,
      "eval_arxiv_accuracy": 0.32359375,
      "eval_arxiv_bleu_score": 3.701269023552277,
      "eval_arxiv_bleu_score_sem": 0.10371974574933927,
      "eval_arxiv_emb_cos_sim": 0.7032526135444641,
      "eval_arxiv_emb_cos_sim_sem": 0.007770770535001924,
      "eval_arxiv_emb_top1_equal": 0.1640625,
      "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6717491149902344,
      "eval_arxiv_n_ngrams_match_1": 13.438,
      "eval_arxiv_n_ngrams_match_2": 2.416,
      "eval_arxiv_n_ngrams_match_3": 0.476,
      "eval_arxiv_num_pred_words": 40.228,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 39.32062203596728,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3178484965802355,
      "eval_arxiv_runtime": 10.3312,
      "eval_arxiv_samples_per_second": 48.397,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.314820727511316,
      "eval_arxiv_token_set_f1_sem": 0.004035596099446558,
      "eval_arxiv_token_set_precision": 0.2603014215915019,
      "eval_arxiv_token_set_recall": 0.41557391776458413,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 45625
    },
    {
      "epoch": 8.76,
      "eval_python_code_alpaca_accuracy": 0.1435,
      "eval_python_code_alpaca_bleu_score": 3.7175162554215166,
      "eval_python_code_alpaca_bleu_score_sem": 0.11682191147286755,
      "eval_python_code_alpaca_emb_cos_sim": 0.709485650062561,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00816893181288434,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.166703701019287,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.694,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.252,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.686,
      "eval_python_code_alpaca_num_pred_words": 44.552,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.729136969158485,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2842620228140523,
      "eval_python_code_alpaca_runtime": 10.8712,
      "eval_python_code_alpaca_samples_per_second": 45.993,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.43770505860819836,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005227562385291587,
      "eval_python_code_alpaca_token_set_precision": 0.46721530110310105,
      "eval_python_code_alpaca_token_set_recall": 0.43838408230338305,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 45625
    },
    {
      "epoch": 8.76,
      "eval_wikibio_accuracy": 0.2973125,
      "eval_wikibio_bleu_score": 5.505173689369643,
      "eval_wikibio_bleu_score_sem": 0.18336472313143126,
      "eval_wikibio_emb_cos_sim": 0.7184184193611145,
      "eval_wikibio_emb_cos_sim_sem": 0.01003264701478985,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.102202892303467,
      "eval_wikibio_n_ngrams_match_1": 10.128,
      "eval_wikibio_n_ngrams_match_2": 3.286,
      "eval_wikibio_n_ngrams_match_3": 1.16,
      "eval_wikibio_num_pred_words": 38.672,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 60.4733572677629,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.341123537531227,
      "eval_wikibio_runtime": 10.3262,
      "eval_wikibio_samples_per_second": 48.421,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3140838242134466,
      "eval_wikibio_token_set_f1_sem": 0.0048917191914225186,
      "eval_wikibio_token_set_precision": 0.3275089197620919,
      "eval_wikibio_token_set_recall": 0.3139662622455534,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 45625
    },
    {
      "epoch": 8.76,
      "eval_nq_accuracy": 0.4973125,
      "eval_nq_bleu_score": 9.879743998732101,
      "eval_nq_bleu_score_sem": 0.4298679560786954,
      "eval_nq_emb_cos_sim": 0.7971436977386475,
      "eval_nq_emb_cos_sim_sem": 0.008095892641503977,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.4060308933258057,
      "eval_nq_n_ngrams_match_1": 21.548,
      "eval_nq_n_ngrams_match_2": 7.334,
      "eval_nq_n_ngrams_match_3": 3.186,
      "eval_nq_num_pred_words": 48.726,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.089856850799437,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4125605925813294,
      "eval_nq_runtime": 10.3239,
      "eval_nq_samples_per_second": 48.431,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.43067224105822605,
      "eval_nq_token_set_f1_sem": 0.005032645768006537,
      "eval_nq_token_set_precision": 0.38256301616897675,
      "eval_nq_token_set_recall": 0.5040065194179415,
      "eval_nq_true_num_tokens": 64.0,
      "step": 45625
    },
    {
      "epoch": 8.76,
      "learning_rate": 0.001,
      "loss": 2.7735,
      "step": 45636
    },
    {
      "epoch": 8.76,
      "learning_rate": 0.001,
      "loss": 2.7899,
      "step": 45648
    },
    {
      "epoch": 8.77,
      "learning_rate": 0.001,
      "loss": 2.763,
      "step": 45660
    },
    {
      "epoch": 8.77,
      "learning_rate": 0.001,
      "loss": 2.7674,
      "step": 45672
    },
    {
      "epoch": 8.77,
      "learning_rate": 0.001,
      "loss": 2.7747,
      "step": 45684
    },
    {
      "epoch": 8.77,
      "learning_rate": 0.001,
      "loss": 2.7745,
      "step": 45696
    },
    {
      "epoch": 8.78,
      "learning_rate": 0.001,
      "loss": 2.7774,
      "step": 45708
    },
    {
      "epoch": 8.78,
      "learning_rate": 0.001,
      "loss": 2.7728,
      "step": 45720
    },
    {
      "epoch": 8.78,
      "learning_rate": 0.001,
      "loss": 2.7789,
      "step": 45732
    },
    {
      "epoch": 8.78,
      "learning_rate": 0.001,
      "loss": 2.7671,
      "step": 45744
    },
    {
      "epoch": 8.79,
      "learning_rate": 0.001,
      "loss": 2.7653,
      "step": 45756
    },
    {
      "epoch": 8.79,
      "learning_rate": 0.001,
      "loss": 2.7749,
      "step": 45768
    },
    {
      "epoch": 8.79,
      "learning_rate": 0.001,
      "loss": 2.769,
      "step": 45780
    },
    {
      "epoch": 8.79,
      "learning_rate": 0.001,
      "loss": 2.7781,
      "step": 45792
    },
    {
      "epoch": 8.79,
      "learning_rate": 0.001,
      "loss": 2.7702,
      "step": 45804
    },
    {
      "epoch": 8.8,
      "learning_rate": 0.001,
      "loss": 2.7756,
      "step": 45816
    },
    {
      "epoch": 8.8,
      "learning_rate": 0.001,
      "loss": 2.7747,
      "step": 45828
    },
    {
      "epoch": 8.8,
      "learning_rate": 0.001,
      "loss": 2.7691,
      "step": 45840
    },
    {
      "epoch": 8.8,
      "learning_rate": 0.001,
      "loss": 2.7792,
      "step": 45852
    },
    {
      "epoch": 8.81,
      "learning_rate": 0.001,
      "loss": 2.7765,
      "step": 45864
    },
    {
      "epoch": 8.81,
      "learning_rate": 0.001,
      "loss": 2.7764,
      "step": 45876
    },
    {
      "epoch": 8.81,
      "learning_rate": 0.001,
      "loss": 2.7789,
      "step": 45888
    },
    {
      "epoch": 8.81,
      "learning_rate": 0.001,
      "loss": 2.7811,
      "step": 45900
    },
    {
      "epoch": 8.82,
      "learning_rate": 0.001,
      "loss": 2.7754,
      "step": 45912
    },
    {
      "epoch": 8.82,
      "learning_rate": 0.001,
      "loss": 2.7782,
      "step": 45924
    },
    {
      "epoch": 8.82,
      "learning_rate": 0.001,
      "loss": 2.7747,
      "step": 45936
    },
    {
      "epoch": 8.82,
      "learning_rate": 0.001,
      "loss": 2.777,
      "step": 45948
    },
    {
      "epoch": 8.82,
      "learning_rate": 0.001,
      "loss": 2.7843,
      "step": 45960
    },
    {
      "epoch": 8.83,
      "learning_rate": 0.001,
      "loss": 2.7663,
      "step": 45972
    },
    {
      "epoch": 8.83,
      "learning_rate": 0.001,
      "loss": 2.7744,
      "step": 45984
    },
    {
      "epoch": 8.83,
      "learning_rate": 0.001,
      "loss": 2.7813,
      "step": 45996
    },
    {
      "epoch": 8.83,
      "learning_rate": 0.001,
      "loss": 2.7748,
      "step": 46008
    },
    {
      "epoch": 8.84,
      "learning_rate": 0.001,
      "loss": 2.7655,
      "step": 46020
    },
    {
      "epoch": 8.84,
      "learning_rate": 0.001,
      "loss": 2.781,
      "step": 46032
    },
    {
      "epoch": 8.84,
      "learning_rate": 0.001,
      "loss": 2.7822,
      "step": 46044
    },
    {
      "epoch": 8.84,
      "learning_rate": 0.001,
      "loss": 2.7846,
      "step": 46056
    },
    {
      "epoch": 8.85,
      "learning_rate": 0.001,
      "loss": 2.7745,
      "step": 46068
    },
    {
      "epoch": 8.85,
      "learning_rate": 0.001,
      "loss": 2.7844,
      "step": 46080
    },
    {
      "epoch": 8.85,
      "learning_rate": 0.001,
      "loss": 2.7663,
      "step": 46092
    },
    {
      "epoch": 8.85,
      "learning_rate": 0.001,
      "loss": 2.7775,
      "step": 46104
    },
    {
      "epoch": 8.85,
      "learning_rate": 0.001,
      "loss": 2.7611,
      "step": 46116
    },
    {
      "epoch": 8.86,
      "learning_rate": 0.001,
      "loss": 2.7722,
      "step": 46128
    },
    {
      "epoch": 8.86,
      "learning_rate": 0.001,
      "loss": 2.7753,
      "step": 46140
    },
    {
      "epoch": 8.86,
      "learning_rate": 0.001,
      "loss": 2.7818,
      "step": 46152
    },
    {
      "epoch": 8.86,
      "learning_rate": 0.001,
      "loss": 2.7633,
      "step": 46164
    },
    {
      "epoch": 8.87,
      "learning_rate": 0.001,
      "loss": 2.7788,
      "step": 46176
    },
    {
      "epoch": 8.87,
      "learning_rate": 0.001,
      "loss": 2.789,
      "step": 46188
    },
    {
      "epoch": 8.87,
      "learning_rate": 0.001,
      "loss": 2.7722,
      "step": 46200
    },
    {
      "epoch": 8.87,
      "learning_rate": 0.001,
      "loss": 2.7755,
      "step": 46212
    },
    {
      "epoch": 8.88,
      "learning_rate": 0.001,
      "loss": 2.7746,
      "step": 46224
    },
    {
      "epoch": 8.88,
      "learning_rate": 0.001,
      "loss": 2.7727,
      "step": 46236
    },
    {
      "epoch": 8.88,
      "learning_rate": 0.001,
      "loss": 2.766,
      "step": 46248
    },
    {
      "epoch": 8.88,
      "eval_ag_news_accuracy": 0.29840625,
      "eval_ag_news_bleu_score": 4.31373880770984,
      "eval_ag_news_bleu_score_sem": 0.14891728801673904,
      "eval_ag_news_emb_cos_sim": 0.7668585777282715,
      "eval_ag_news_emb_cos_sim_sem": 0.008202250872265726,
      "eval_ag_news_emb_top1_equal": 0.125,
      "eval_ag_news_emb_top1_equal_sem": 0.02934655822437397,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8158342838287354,
      "eval_ag_news_n_ngrams_match_1": 12.81,
      "eval_ag_news_n_ngrams_match_2": 2.57,
      "eval_ag_news_n_ngrams_match_3": 0.706,
      "eval_ag_news_num_pred_words": 46.0,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 45.41462927317122,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3122905712875661,
      "eval_ag_news_runtime": 13.3588,
      "eval_ag_news_samples_per_second": 37.428,
      "eval_ag_news_steps_per_second": 0.075,
      "eval_ag_news_token_set_f1": 0.32392258441957117,
      "eval_ag_news_token_set_f1_sem": 0.004329541468647039,
      "eval_ag_news_token_set_precision": 0.30184441470228995,
      "eval_ag_news_token_set_recall": 0.36679101991713964,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 46250
    },
    {
      "epoch": 8.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.1038125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.6833036236164887,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10954931079641665,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6263077259063721,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010921873737018344,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.5129191875457764,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.434,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.558,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.508,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.23,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 33.54605220678183,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19023205204409382,
      "eval_anthropic_toxic_prompts_runtime": 11.8708,
      "eval_anthropic_toxic_prompts_samples_per_second": 42.12,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.084,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3229738713485022,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006354645238200712,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.38190129987442994,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3117707201913881,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 46250
    },
    {
      "epoch": 8.88,
      "eval_arxiv_accuracy": 0.32334375,
      "eval_arxiv_bleu_score": 3.8999461751598536,
      "eval_arxiv_bleu_score_sem": 0.11473406671058033,
      "eval_arxiv_emb_cos_sim": 0.7049464583396912,
      "eval_arxiv_emb_cos_sim_sem": 0.008832316623000732,
      "eval_arxiv_emb_top1_equal": 0.203125,
      "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6608574390411377,
      "eval_arxiv_n_ngrams_match_1": 13.764,
      "eval_arxiv_n_ngrams_match_2": 2.548,
      "eval_arxiv_n_ngrams_match_3": 0.548,
      "eval_arxiv_num_pred_words": 40.022,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 38.89467839344808,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3231329056106329,
      "eval_arxiv_runtime": 12.1071,
      "eval_arxiv_samples_per_second": 41.298,
      "eval_arxiv_steps_per_second": 0.083,
      "eval_arxiv_token_set_f1": 0.31954391318957837,
      "eval_arxiv_token_set_f1_sem": 0.004254608967652773,
      "eval_arxiv_token_set_precision": 0.266894297936754,
      "eval_arxiv_token_set_recall": 0.4180085983595963,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 46250
    },
    {
      "epoch": 8.88,
      "eval_python_code_alpaca_accuracy": 0.14528125,
      "eval_python_code_alpaca_bleu_score": 3.6883015490260704,
      "eval_python_code_alpaca_bleu_score_sem": 0.11907441131673664,
      "eval_python_code_alpaca_emb_cos_sim": 0.7158737182617188,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008345321642258496,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.148151159286499,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.682,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.24,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.664,
      "eval_python_code_alpaca_num_pred_words": 45.278,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.292959773892136,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2826779664379918,
      "eval_python_code_alpaca_runtime": 10.3254,
      "eval_python_code_alpaca_samples_per_second": 48.424,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.4362769880595835,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005545972938048647,
      "eval_python_code_alpaca_token_set_precision": 0.4676311072069239,
      "eval_python_code_alpaca_token_set_recall": 0.4350531625377758,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 46250
    },
    {
      "epoch": 8.88,
      "eval_wikibio_accuracy": 0.30046875,
      "eval_wikibio_bleu_score": 5.282017018894081,
      "eval_wikibio_bleu_score_sem": 0.19720415492134732,
      "eval_wikibio_emb_cos_sim": 0.6897943615913391,
      "eval_wikibio_emb_cos_sim_sem": 0.012623049617641591,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.131740093231201,
      "eval_wikibio_n_ngrams_match_1": 9.526,
      "eval_wikibio_n_ngrams_match_2": 3.098,
      "eval_wikibio_n_ngrams_match_3": 1.082,
      "eval_wikibio_num_pred_words": 36.246,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 62.28621250724138,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3235842453627053,
      "eval_wikibio_runtime": 12.5273,
      "eval_wikibio_samples_per_second": 39.913,
      "eval_wikibio_steps_per_second": 0.08,
      "eval_wikibio_token_set_f1": 0.30051406466350117,
      "eval_wikibio_token_set_f1_sem": 0.005713289104704219,
      "eval_wikibio_token_set_precision": 0.3073511396358744,
      "eval_wikibio_token_set_recall": 0.31177012278177396,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 46250
    },
    {
      "epoch": 8.88,
      "eval_nq_accuracy": 0.49709375,
      "eval_nq_bleu_score": 10.638222654418671,
      "eval_nq_bleu_score_sem": 0.45024079081057866,
      "eval_nq_emb_cos_sim": 0.8051466941833496,
      "eval_nq_emb_cos_sim_sem": 0.007682704036493903,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.403503179550171,
      "eval_nq_n_ngrams_match_1": 21.642,
      "eval_nq_n_ngrams_match_2": 7.668,
      "eval_nq_n_ngrams_match_3": 3.518,
      "eval_nq_num_pred_words": 48.744,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.061860265451713,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4145017684330846,
      "eval_nq_runtime": 11.0429,
      "eval_nq_samples_per_second": 45.278,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4342943505567684,
      "eval_nq_token_set_f1_sem": 0.005005554288096789,
      "eval_nq_token_set_precision": 0.3870788850096803,
      "eval_nq_token_set_recall": 0.5057862840241787,
      "eval_nq_true_num_tokens": 64.0,
      "step": 46250
    },
    {
      "epoch": 8.88,
      "learning_rate": 0.001,
      "loss": 2.774,
      "step": 46260
    },
    {
      "epoch": 8.88,
      "learning_rate": 0.001,
      "loss": 2.7667,
      "step": 46272
    },
    {
      "epoch": 8.89,
      "learning_rate": 0.001,
      "loss": 2.7829,
      "step": 46284
    },
    {
      "epoch": 8.89,
      "learning_rate": 0.001,
      "loss": 2.7785,
      "step": 46296
    },
    {
      "epoch": 8.89,
      "learning_rate": 0.001,
      "loss": 2.7654,
      "step": 46308
    },
    {
      "epoch": 8.89,
      "learning_rate": 0.001,
      "loss": 2.7785,
      "step": 46320
    },
    {
      "epoch": 8.9,
      "learning_rate": 0.001,
      "loss": 2.7762,
      "step": 46332
    },
    {
      "epoch": 8.9,
      "learning_rate": 0.001,
      "loss": 2.7741,
      "step": 46344
    },
    {
      "epoch": 8.9,
      "learning_rate": 0.001,
      "loss": 2.7732,
      "step": 46356
    },
    {
      "epoch": 8.9,
      "learning_rate": 0.001,
      "loss": 2.785,
      "step": 46368
    },
    {
      "epoch": 8.91,
      "learning_rate": 0.001,
      "loss": 2.7871,
      "step": 46380
    },
    {
      "epoch": 8.91,
      "learning_rate": 0.001,
      "loss": 2.7701,
      "step": 46392
    },
    {
      "epoch": 8.91,
      "learning_rate": 0.001,
      "loss": 2.7816,
      "step": 46404
    },
    {
      "epoch": 8.91,
      "learning_rate": 0.001,
      "loss": 2.7728,
      "step": 46416
    },
    {
      "epoch": 8.91,
      "learning_rate": 0.001,
      "loss": 2.7675,
      "step": 46428
    },
    {
      "epoch": 8.92,
      "learning_rate": 0.001,
      "loss": 2.7648,
      "step": 46440
    },
    {
      "epoch": 8.92,
      "learning_rate": 0.001,
      "loss": 2.7681,
      "step": 46452
    },
    {
      "epoch": 8.92,
      "learning_rate": 0.001,
      "loss": 2.773,
      "step": 46464
    },
    {
      "epoch": 8.92,
      "learning_rate": 0.001,
      "loss": 2.7703,
      "step": 46476
    },
    {
      "epoch": 8.93,
      "learning_rate": 0.001,
      "loss": 2.7712,
      "step": 46488
    },
    {
      "epoch": 8.93,
      "learning_rate": 0.001,
      "loss": 2.7682,
      "step": 46500
    },
    {
      "epoch": 8.93,
      "learning_rate": 0.001,
      "loss": 2.7594,
      "step": 46512
    },
    {
      "epoch": 8.93,
      "learning_rate": 0.001,
      "loss": 2.7809,
      "step": 46524
    },
    {
      "epoch": 8.94,
      "learning_rate": 0.001,
      "loss": 2.7777,
      "step": 46536
    },
    {
      "epoch": 8.94,
      "learning_rate": 0.001,
      "loss": 2.7752,
      "step": 46548
    },
    {
      "epoch": 8.94,
      "learning_rate": 0.001,
      "loss": 2.7684,
      "step": 46560
    },
    {
      "epoch": 8.94,
      "learning_rate": 0.001,
      "loss": 2.7754,
      "step": 46572
    },
    {
      "epoch": 8.94,
      "learning_rate": 0.001,
      "loss": 2.763,
      "step": 46584
    },
    {
      "epoch": 8.95,
      "learning_rate": 0.001,
      "loss": 2.7762,
      "step": 46596
    },
    {
      "epoch": 8.95,
      "learning_rate": 0.001,
      "loss": 2.773,
      "step": 46608
    },
    {
      "epoch": 8.95,
      "learning_rate": 0.001,
      "loss": 2.7737,
      "step": 46620
    },
    {
      "epoch": 8.95,
      "learning_rate": 0.001,
      "loss": 2.7605,
      "step": 46632
    },
    {
      "epoch": 8.96,
      "learning_rate": 0.001,
      "loss": 2.7769,
      "step": 46644
    },
    {
      "epoch": 8.96,
      "learning_rate": 0.001,
      "loss": 2.7697,
      "step": 46656
    },
    {
      "epoch": 8.96,
      "learning_rate": 0.001,
      "loss": 2.7778,
      "step": 46668
    },
    {
      "epoch": 8.96,
      "learning_rate": 0.001,
      "loss": 2.7639,
      "step": 46680
    },
    {
      "epoch": 8.97,
      "learning_rate": 0.001,
      "loss": 2.7688,
      "step": 46692
    },
    {
      "epoch": 8.97,
      "learning_rate": 0.001,
      "loss": 2.7701,
      "step": 46704
    },
    {
      "epoch": 8.97,
      "learning_rate": 0.001,
      "loss": 2.7646,
      "step": 46716
    },
    {
      "epoch": 8.97,
      "learning_rate": 0.001,
      "loss": 2.7634,
      "step": 46728
    },
    {
      "epoch": 8.97,
      "learning_rate": 0.001,
      "loss": 2.778,
      "step": 46740
    },
    {
      "epoch": 8.98,
      "learning_rate": 0.001,
      "loss": 2.7694,
      "step": 46752
    },
    {
      "epoch": 8.98,
      "learning_rate": 0.001,
      "loss": 2.7716,
      "step": 46764
    },
    {
      "epoch": 8.98,
      "learning_rate": 0.001,
      "loss": 2.773,
      "step": 46776
    },
    {
      "epoch": 8.98,
      "learning_rate": 0.001,
      "loss": 2.7676,
      "step": 46788
    },
    {
      "epoch": 8.99,
      "learning_rate": 0.001,
      "loss": 2.7777,
      "step": 46800
    },
    {
      "epoch": 8.99,
      "learning_rate": 0.001,
      "loss": 2.7714,
      "step": 46812
    },
    {
      "epoch": 8.99,
      "learning_rate": 0.001,
      "loss": 2.7716,
      "step": 46824
    },
    {
      "epoch": 8.99,
      "learning_rate": 0.001,
      "loss": 2.7684,
      "step": 46836
    },
    {
      "epoch": 9.0,
      "learning_rate": 0.001,
      "loss": 2.7678,
      "step": 46848
    },
    {
      "epoch": 9.0,
      "learning_rate": 0.001,
      "loss": 2.7729,
      "step": 46860
    },
    {
      "epoch": 9.0,
      "learning_rate": 0.001,
      "loss": 2.7707,
      "step": 46872
    },
    {
      "epoch": 9.0,
      "eval_ag_news_accuracy": 0.29903125,
      "eval_ag_news_bleu_score": 4.288653808307046,
      "eval_ag_news_bleu_score_sem": 0.14459158741794412,
      "eval_ag_news_emb_cos_sim": 0.7732715606689453,
      "eval_ag_news_emb_cos_sim_sem": 0.00808783139818176,
      "eval_ag_news_emb_top1_equal": 0.1796875,
      "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8124728202819824,
      "eval_ag_news_n_ngrams_match_1": 12.812,
      "eval_ag_news_n_ngrams_match_2": 2.654,
      "eval_ag_news_n_ngrams_match_3": 0.746,
      "eval_ag_news_num_pred_words": 46.692,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 45.262225945002136,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3134112998323353,
      "eval_ag_news_runtime": 10.5205,
      "eval_ag_news_samples_per_second": 47.526,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.32487076987979246,
      "eval_ag_news_token_set_f1_sem": 0.004333593802880955,
      "eval_ag_news_token_set_precision": 0.30193949526342484,
      "eval_ag_news_token_set_recall": 0.3704356691823834,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 46875
    },
    {
      "epoch": 9.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.1038125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.6723761364822427,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10595742970731784,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6198536157608032,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010522718056928875,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4786829948425293,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.214,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.51,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.556,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.322,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 32.417000592861584,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1824072993406963,
      "eval_anthropic_toxic_prompts_runtime": 9.7625,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.216,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3239000278615702,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006445313758389343,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.36797836907206205,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32676964757090793,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 46875
    },
    {
      "epoch": 9.0,
      "eval_arxiv_accuracy": 0.32184375,
      "eval_arxiv_bleu_score": 3.641969790826636,
      "eval_arxiv_bleu_score_sem": 0.10476167297881656,
      "eval_arxiv_emb_cos_sim": 0.6878165006637573,
      "eval_arxiv_emb_cos_sim_sem": 0.00910235477413257,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6736721992492676,
      "eval_arxiv_n_ngrams_match_1": 13.046,
      "eval_arxiv_n_ngrams_match_2": 2.392,
      "eval_arxiv_n_ngrams_match_3": 0.482,
      "eval_arxiv_num_pred_words": 38.78,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 39.39631166069638,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3104139493609104,
      "eval_arxiv_runtime": 10.9606,
      "eval_arxiv_samples_per_second": 45.618,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.31033714250777356,
      "eval_arxiv_token_set_f1_sem": 0.00431074284094741,
      "eval_arxiv_token_set_precision": 0.2536412884278891,
      "eval_arxiv_token_set_recall": 0.42613121192327474,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 46875
    },
    {
      "epoch": 9.0,
      "eval_python_code_alpaca_accuracy": 0.1441875,
      "eval_python_code_alpaca_bleu_score": 3.592983674218678,
      "eval_python_code_alpaca_bleu_score_sem": 0.10707368222914622,
      "eval_python_code_alpaca_emb_cos_sim": 0.7041624784469604,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01002766512558696,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.1552317142486572,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.356,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.122,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.586,
      "eval_python_code_alpaca_num_pred_words": 42.308,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.458472123962313,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.28172292893895556,
      "eval_python_code_alpaca_runtime": 10.184,
      "eval_python_code_alpaca_samples_per_second": 49.097,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4299302781801553,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005656002653046869,
      "eval_python_code_alpaca_token_set_precision": 0.44608991108380747,
      "eval_python_code_alpaca_token_set_recall": 0.4457493382957042,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 46875
    },
    {
      "epoch": 9.0,
      "eval_wikibio_accuracy": 0.2995625,
      "eval_wikibio_bleu_score": 5.255827214641384,
      "eval_wikibio_bleu_score_sem": 0.18697395818757404,
      "eval_wikibio_emb_cos_sim": 0.6974311470985413,
      "eval_wikibio_emb_cos_sim_sem": 0.011648033643813682,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.123207092285156,
      "eval_wikibio_n_ngrams_match_1": 9.766,
      "eval_wikibio_n_ngrams_match_2": 3.122,
      "eval_wikibio_n_ngrams_match_3": 1.054,
      "eval_wikibio_num_pred_words": 37.344,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 61.756985356057086,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32980918775204293,
      "eval_wikibio_runtime": 10.2956,
      "eval_wikibio_samples_per_second": 48.564,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3070015958413056,
      "eval_wikibio_token_set_f1_sem": 0.0055645537462260955,
      "eval_wikibio_token_set_precision": 0.31397095048371115,
      "eval_wikibio_token_set_recall": 0.3187160784834163,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 46875
    },
    {
      "epoch": 9.0,
      "eval_nq_accuracy": 0.497375,
      "eval_nq_bleu_score": 10.043926883687972,
      "eval_nq_bleu_score_sem": 0.4276172846833347,
      "eval_nq_emb_cos_sim": 0.8026154041290283,
      "eval_nq_emb_cos_sim_sem": 0.00783238659334682,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.406917095184326,
      "eval_nq_n_ngrams_match_1": 21.462,
      "eval_nq_n_ngrams_match_2": 7.45,
      "eval_nq_n_ngrams_match_3": 3.236,
      "eval_nq_num_pred_words": 48.618,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 11.099689058568261,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.41010460940472193,
      "eval_nq_runtime": 10.3816,
      "eval_nq_samples_per_second": 48.162,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.43221719653866686,
      "eval_nq_token_set_f1_sem": 0.005051430922410576,
      "eval_nq_token_set_precision": 0.38060409292633585,
      "eval_nq_token_set_recall": 0.513445150778036,
      "eval_nq_true_num_tokens": 64.0,
      "step": 46875
    },
    {
      "epoch": 9.0,
      "learning_rate": 0.001,
      "loss": 2.7587,
      "step": 46884
    },
    {
      "epoch": 9.0,
      "learning_rate": 0.001,
      "loss": 2.7557,
      "step": 46896
    },
    {
      "epoch": 9.01,
      "learning_rate": 0.001,
      "loss": 2.7481,
      "step": 46908
    },
    {
      "epoch": 9.01,
      "learning_rate": 0.001,
      "loss": 2.7496,
      "step": 46920
    },
    {
      "epoch": 9.01,
      "learning_rate": 0.001,
      "loss": 2.7439,
      "step": 46932
    },
    {
      "epoch": 9.01,
      "learning_rate": 0.001,
      "loss": 2.7462,
      "step": 46944
    },
    {
      "epoch": 9.02,
      "learning_rate": 0.001,
      "loss": 2.7593,
      "step": 46956
    },
    {
      "epoch": 9.02,
      "learning_rate": 0.001,
      "loss": 2.7543,
      "step": 46968
    },
    {
      "epoch": 9.02,
      "learning_rate": 0.001,
      "loss": 2.7362,
      "step": 46980
    },
    {
      "epoch": 9.02,
      "learning_rate": 0.001,
      "loss": 2.7432,
      "step": 46992
    },
    {
      "epoch": 9.03,
      "learning_rate": 0.001,
      "loss": 2.7518,
      "step": 47004
    },
    {
      "epoch": 9.03,
      "learning_rate": 0.001,
      "loss": 2.7525,
      "step": 47016
    },
    {
      "epoch": 9.03,
      "learning_rate": 0.001,
      "loss": 2.7502,
      "step": 47028
    },
    {
      "epoch": 9.03,
      "learning_rate": 0.001,
      "loss": 2.7586,
      "step": 47040
    },
    {
      "epoch": 9.03,
      "learning_rate": 0.001,
      "loss": 2.7568,
      "step": 47052
    },
    {
      "epoch": 9.04,
      "learning_rate": 0.001,
      "loss": 2.763,
      "step": 47064
    },
    {
      "epoch": 9.04,
      "learning_rate": 0.001,
      "loss": 2.7491,
      "step": 47076
    },
    {
      "epoch": 9.04,
      "learning_rate": 0.001,
      "loss": 2.7499,
      "step": 47088
    },
    {
      "epoch": 9.04,
      "learning_rate": 0.001,
      "loss": 2.7512,
      "step": 47100
    },
    {
      "epoch": 9.05,
      "learning_rate": 0.001,
      "loss": 2.758,
      "step": 47112
    },
    {
      "epoch": 9.05,
      "learning_rate": 0.001,
      "loss": 2.7539,
      "step": 47124
    },
    {
      "epoch": 9.05,
      "learning_rate": 0.001,
      "loss": 2.7612,
      "step": 47136
    },
    {
      "epoch": 9.05,
      "learning_rate": 0.001,
      "loss": 2.7435,
      "step": 47148
    },
    {
      "epoch": 9.06,
      "learning_rate": 0.001,
      "loss": 2.7508,
      "step": 47160
    },
    {
      "epoch": 9.06,
      "learning_rate": 0.001,
      "loss": 2.752,
      "step": 47172
    },
    {
      "epoch": 9.06,
      "learning_rate": 0.001,
      "loss": 2.758,
      "step": 47184
    },
    {
      "epoch": 9.06,
      "learning_rate": 0.001,
      "loss": 2.7456,
      "step": 47196
    },
    {
      "epoch": 9.06,
      "learning_rate": 0.001,
      "loss": 2.7555,
      "step": 47208
    },
    {
      "epoch": 9.07,
      "learning_rate": 0.001,
      "loss": 2.7513,
      "step": 47220
    },
    {
      "epoch": 9.07,
      "learning_rate": 0.001,
      "loss": 2.7564,
      "step": 47232
    },
    {
      "epoch": 9.07,
      "learning_rate": 0.001,
      "loss": 2.7463,
      "step": 47244
    },
    {
      "epoch": 9.07,
      "learning_rate": 0.001,
      "loss": 2.7569,
      "step": 47256
    },
    {
      "epoch": 9.08,
      "learning_rate": 0.001,
      "loss": 2.7583,
      "step": 47268
    },
    {
      "epoch": 9.08,
      "learning_rate": 0.001,
      "loss": 2.7581,
      "step": 47280
    },
    {
      "epoch": 9.08,
      "learning_rate": 0.001,
      "loss": 2.7566,
      "step": 47292
    },
    {
      "epoch": 9.08,
      "learning_rate": 0.001,
      "loss": 2.7626,
      "step": 47304
    },
    {
      "epoch": 9.09,
      "learning_rate": 0.001,
      "loss": 2.7437,
      "step": 47316
    },
    {
      "epoch": 9.09,
      "learning_rate": 0.001,
      "loss": 2.7656,
      "step": 47328
    },
    {
      "epoch": 9.09,
      "learning_rate": 0.001,
      "loss": 2.7414,
      "step": 47340
    },
    {
      "epoch": 9.09,
      "learning_rate": 0.001,
      "loss": 2.7427,
      "step": 47352
    },
    {
      "epoch": 9.09,
      "learning_rate": 0.001,
      "loss": 2.7606,
      "step": 47364
    },
    {
      "epoch": 9.1,
      "learning_rate": 0.001,
      "loss": 2.7566,
      "step": 47376
    },
    {
      "epoch": 9.1,
      "learning_rate": 0.001,
      "loss": 2.7488,
      "step": 47388
    },
    {
      "epoch": 9.1,
      "learning_rate": 0.001,
      "loss": 2.7566,
      "step": 47400
    },
    {
      "epoch": 9.1,
      "learning_rate": 0.001,
      "loss": 2.7555,
      "step": 47412
    },
    {
      "epoch": 9.11,
      "learning_rate": 0.001,
      "loss": 2.7526,
      "step": 47424
    },
    {
      "epoch": 9.11,
      "learning_rate": 0.001,
      "loss": 2.7527,
      "step": 47436
    },
    {
      "epoch": 9.11,
      "learning_rate": 0.001,
      "loss": 2.7515,
      "step": 47448
    },
    {
      "epoch": 9.11,
      "learning_rate": 0.001,
      "loss": 2.7468,
      "step": 47460
    },
    {
      "epoch": 9.12,
      "learning_rate": 0.001,
      "loss": 2.7412,
      "step": 47472
    },
    {
      "epoch": 9.12,
      "learning_rate": 0.001,
      "loss": 2.7546,
      "step": 47484
    },
    {
      "epoch": 9.12,
      "learning_rate": 0.001,
      "loss": 2.7521,
      "step": 47496
    },
    {
      "epoch": 9.12,
      "eval_ag_news_accuracy": 0.29959375,
      "eval_ag_news_bleu_score": 4.22949652749428,
      "eval_ag_news_bleu_score_sem": 0.13746238198174335,
      "eval_ag_news_emb_cos_sim": 0.7705955505371094,
      "eval_ag_news_emb_cos_sim_sem": 0.008421604145340673,
      "eval_ag_news_emb_top1_equal": 0.171875,
      "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.8094165325164795,
      "eval_ag_news_n_ngrams_match_1": 12.772,
      "eval_ag_news_n_ngrams_match_2": 2.596,
      "eval_ag_news_n_ngrams_match_3": 0.712,
      "eval_ag_news_num_pred_words": 46.474,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 45.124102737258156,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.31430772925392514,
      "eval_ag_news_runtime": 12.0799,
      "eval_ag_news_samples_per_second": 41.391,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.3221694479981205,
      "eval_ag_news_token_set_f1_sem": 0.004173123746570576,
      "eval_ag_news_token_set_precision": 0.2997731322387018,
      "eval_ag_news_token_set_recall": 0.36404314770694457,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 47500
    },
    {
      "epoch": 9.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.105,
      "eval_anthropic_toxic_prompts_bleu_score": 2.6824998451745734,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09735606462677479,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6408157348632812,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009020674181718457,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.456223726272583,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.654,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.638,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.548,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.08,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.697053475790963,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.193398076506703,
      "eval_anthropic_toxic_prompts_runtime": 10.4325,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.927,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33546083030101165,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006248965843310193,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3985590046768791,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32071277025793865,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 47500
    },
    {
      "epoch": 9.12,
      "eval_arxiv_accuracy": 0.32346875,
      "eval_arxiv_bleu_score": 3.83866917627666,
      "eval_arxiv_bleu_score_sem": 0.11142097098331777,
      "eval_arxiv_emb_cos_sim": 0.7052998542785645,
      "eval_arxiv_emb_cos_sim_sem": 0.008977505930675683,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.654217004776001,
      "eval_arxiv_n_ngrams_match_1": 13.54,
      "eval_arxiv_n_ngrams_match_2": 2.512,
      "eval_arxiv_n_ngrams_match_3": 0.534,
      "eval_arxiv_num_pred_words": 40.57,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 38.637256480882606,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.31631203107164363,
      "eval_arxiv_runtime": 10.9376,
      "eval_arxiv_samples_per_second": 45.714,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.3159684513390401,
      "eval_arxiv_token_set_f1_sem": 0.004085429987867343,
      "eval_arxiv_token_set_precision": 0.2614633675206258,
      "eval_arxiv_token_set_recall": 0.4207364699353341,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 47500
    },
    {
      "epoch": 9.12,
      "eval_python_code_alpaca_accuracy": 0.14646875,
      "eval_python_code_alpaca_bleu_score": 3.8212681127184696,
      "eval_python_code_alpaca_bleu_score_sem": 0.12041746269843684,
      "eval_python_code_alpaca_emb_cos_sim": 0.7284071445465088,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008505652205260984,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.1190266609191895,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.992,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.388,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.696,
      "eval_python_code_alpaca_num_pred_words": 44.718,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.62434776081528,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2945015354590723,
      "eval_python_code_alpaca_runtime": 10.6084,
      "eval_python_code_alpaca_samples_per_second": 47.133,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.44566224977647795,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005389132628955182,
      "eval_python_code_alpaca_token_set_precision": 0.48580211359744807,
      "eval_python_code_alpaca_token_set_recall": 0.4351788171634512,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 47500
    },
    {
      "epoch": 9.12,
      "eval_wikibio_accuracy": 0.2994375,
      "eval_wikibio_bleu_score": 5.3457438873014596,
      "eval_wikibio_bleu_score_sem": 0.18799731893879146,
      "eval_wikibio_emb_cos_sim": 0.7072981595993042,
      "eval_wikibio_emb_cos_sim_sem": 0.01047188764053572,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.10506534576416,
      "eval_wikibio_n_ngrams_match_1": 9.658,
      "eval_wikibio_n_ngrams_match_2": 3.094,
      "eval_wikibio_n_ngrams_match_3": 1.072,
      "eval_wikibio_num_pred_words": 36.484,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 60.646707423566404,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32754757004868085,
      "eval_wikibio_runtime": 10.4914,
      "eval_wikibio_samples_per_second": 47.658,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.3056928148803203,
      "eval_wikibio_token_set_f1_sem": 0.005388830498402596,
      "eval_wikibio_token_set_precision": 0.3142341671727403,
      "eval_wikibio_token_set_recall": 0.31170697687950055,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 47500
    },
    {
      "epoch": 9.12,
      "eval_nq_accuracy": 0.5001875,
      "eval_nq_bleu_score": 10.247925520087861,
      "eval_nq_bleu_score_sem": 0.43562072258183493,
      "eval_nq_emb_cos_sim": 0.8029464483261108,
      "eval_nq_emb_cos_sim_sem": 0.00777242348171696,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.391540050506592,
      "eval_nq_n_ngrams_match_1": 21.606,
      "eval_nq_n_ngrams_match_2": 7.576,
      "eval_nq_n_ngrams_match_3": 3.34,
      "eval_nq_num_pred_words": 49.124,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.930314223634,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4132165959253411,
      "eval_nq_runtime": 10.4544,
      "eval_nq_samples_per_second": 47.827,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.43017255422591094,
      "eval_nq_token_set_f1_sem": 0.005125741734852646,
      "eval_nq_token_set_precision": 0.38525483841614144,
      "eval_nq_token_set_recall": 0.49745545675268665,
      "eval_nq_true_num_tokens": 64.0,
      "step": 47500
    },
    {
      "epoch": 9.12,
      "learning_rate": 0.001,
      "loss": 2.7467,
      "step": 47508
    },
    {
      "epoch": 9.12,
      "learning_rate": 0.001,
      "loss": 2.756,
      "step": 47520
    },
    {
      "epoch": 9.13,
      "learning_rate": 0.001,
      "loss": 2.749,
      "step": 47532
    },
    {
      "epoch": 9.13,
      "learning_rate": 0.001,
      "loss": 2.7496,
      "step": 47544
    },
    {
      "epoch": 9.13,
      "learning_rate": 0.001,
      "loss": 2.7415,
      "step": 47556
    },
    {
      "epoch": 9.13,
      "learning_rate": 0.001,
      "loss": 2.7536,
      "step": 47568
    },
    {
      "epoch": 9.14,
      "learning_rate": 0.001,
      "loss": 2.7458,
      "step": 47580
    },
    {
      "epoch": 9.14,
      "learning_rate": 0.001,
      "loss": 2.754,
      "step": 47592
    },
    {
      "epoch": 9.14,
      "learning_rate": 0.001,
      "loss": 2.7511,
      "step": 47604
    },
    {
      "epoch": 9.14,
      "learning_rate": 0.001,
      "loss": 2.7557,
      "step": 47616
    },
    {
      "epoch": 9.15,
      "learning_rate": 0.001,
      "loss": 2.7573,
      "step": 47628
    },
    {
      "epoch": 9.15,
      "learning_rate": 0.001,
      "loss": 2.7593,
      "step": 47640
    },
    {
      "epoch": 9.15,
      "learning_rate": 0.001,
      "loss": 2.7505,
      "step": 47652
    },
    {
      "epoch": 9.15,
      "learning_rate": 0.001,
      "loss": 2.7482,
      "step": 47664
    },
    {
      "epoch": 9.15,
      "learning_rate": 0.001,
      "loss": 2.7605,
      "step": 47676
    },
    {
      "epoch": 9.16,
      "learning_rate": 0.001,
      "loss": 2.759,
      "step": 47688
    },
    {
      "epoch": 9.16,
      "learning_rate": 0.001,
      "loss": 2.7598,
      "step": 47700
    },
    {
      "epoch": 9.16,
      "learning_rate": 0.001,
      "loss": 2.7586,
      "step": 47712
    },
    {
      "epoch": 9.16,
      "learning_rate": 0.001,
      "loss": 2.7666,
      "step": 47724
    },
    {
      "epoch": 9.17,
      "learning_rate": 0.001,
      "loss": 2.7557,
      "step": 47736
    },
    {
      "epoch": 9.17,
      "learning_rate": 0.001,
      "loss": 2.7563,
      "step": 47748
    },
    {
      "epoch": 9.17,
      "learning_rate": 0.001,
      "loss": 2.7472,
      "step": 47760
    },
    {
      "epoch": 9.17,
      "learning_rate": 0.001,
      "loss": 2.7543,
      "step": 47772
    },
    {
      "epoch": 9.18,
      "learning_rate": 0.001,
      "loss": 2.756,
      "step": 47784
    },
    {
      "epoch": 9.18,
      "learning_rate": 0.001,
      "loss": 2.7686,
      "step": 47796
    },
    {
      "epoch": 9.18,
      "learning_rate": 0.001,
      "loss": 2.7496,
      "step": 47808
    },
    {
      "epoch": 9.18,
      "learning_rate": 0.001,
      "loss": 2.7612,
      "step": 47820
    },
    {
      "epoch": 9.18,
      "learning_rate": 0.001,
      "loss": 2.7648,
      "step": 47832
    },
    {
      "epoch": 9.19,
      "learning_rate": 0.001,
      "loss": 2.7478,
      "step": 47844
    },
    {
      "epoch": 9.19,
      "learning_rate": 0.001,
      "loss": 2.7375,
      "step": 47856
    },
    {
      "epoch": 9.19,
      "learning_rate": 0.001,
      "loss": 2.7452,
      "step": 47868
    },
    {
      "epoch": 9.19,
      "learning_rate": 0.001,
      "loss": 2.7489,
      "step": 47880
    },
    {
      "epoch": 9.2,
      "learning_rate": 0.001,
      "loss": 2.7521,
      "step": 47892
    },
    {
      "epoch": 9.2,
      "learning_rate": 0.001,
      "loss": 2.7564,
      "step": 47904
    },
    {
      "epoch": 9.2,
      "learning_rate": 0.001,
      "loss": 2.7504,
      "step": 47916
    },
    {
      "epoch": 9.2,
      "learning_rate": 0.001,
      "loss": 2.755,
      "step": 47928
    },
    {
      "epoch": 9.21,
      "learning_rate": 0.001,
      "loss": 2.7465,
      "step": 47940
    },
    {
      "epoch": 9.21,
      "learning_rate": 0.001,
      "loss": 2.7471,
      "step": 47952
    },
    {
      "epoch": 9.21,
      "learning_rate": 0.001,
      "loss": 2.749,
      "step": 47964
    },
    {
      "epoch": 9.21,
      "learning_rate": 0.001,
      "loss": 2.7607,
      "step": 47976
    },
    {
      "epoch": 9.21,
      "learning_rate": 0.001,
      "loss": 2.7441,
      "step": 47988
    },
    {
      "epoch": 9.22,
      "learning_rate": 0.001,
      "loss": 2.7466,
      "step": 48000
    },
    {
      "epoch": 9.22,
      "learning_rate": 0.001,
      "loss": 2.7465,
      "step": 48012
    },
    {
      "epoch": 9.22,
      "learning_rate": 0.001,
      "loss": 2.7501,
      "step": 48024
    },
    {
      "epoch": 9.22,
      "learning_rate": 0.001,
      "loss": 2.7498,
      "step": 48036
    },
    {
      "epoch": 9.23,
      "learning_rate": 0.001,
      "loss": 2.7591,
      "step": 48048
    },
    {
      "epoch": 9.23,
      "learning_rate": 0.001,
      "loss": 2.755,
      "step": 48060
    },
    {
      "epoch": 9.23,
      "learning_rate": 0.001,
      "loss": 2.7467,
      "step": 48072
    },
    {
      "epoch": 9.23,
      "learning_rate": 0.001,
      "loss": 2.7501,
      "step": 48084
    },
    {
      "epoch": 9.24,
      "learning_rate": 0.001,
      "loss": 2.7473,
      "step": 48096
    },
    {
      "epoch": 9.24,
      "learning_rate": 0.001,
      "loss": 2.7489,
      "step": 48108
    },
    {
      "epoch": 9.24,
      "learning_rate": 0.001,
      "loss": 2.7483,
      "step": 48120
    },
    {
      "epoch": 9.24,
      "eval_ag_news_accuracy": 0.30184375,
      "eval_ag_news_bleu_score": 4.351701686434694,
      "eval_ag_news_bleu_score_sem": 0.1420752862709685,
      "eval_ag_news_emb_cos_sim": 0.7652785778045654,
      "eval_ag_news_emb_cos_sim_sem": 0.009476753131988938,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7872631549835205,
      "eval_ag_news_n_ngrams_match_1": 12.978,
      "eval_ag_news_n_ngrams_match_2": 2.668,
      "eval_ag_news_n_ngrams_match_3": 0.734,
      "eval_ag_news_num_pred_words": 46.406,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 44.1354429636502,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3174233587706966,
      "eval_ag_news_runtime": 10.3954,
      "eval_ag_news_samples_per_second": 48.098,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.327430124189282,
      "eval_ag_news_token_set_f1_sem": 0.004446779222228204,
      "eval_ag_news_token_set_precision": 0.30576685643142293,
      "eval_ag_news_token_set_recall": 0.3660499994773382,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 48125
    },
    {
      "epoch": 9.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.1050625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.699989161794298,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11689087010030202,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.640740156173706,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010851065714431027,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4292941093444824,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.57,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.578,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.54,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.834,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 30.854854906908304,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1909183661228704,
      "eval_anthropic_toxic_prompts_runtime": 10.1024,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.493,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3300152090117992,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063356743567588554,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3875014729189655,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31639474196486,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 48125
    },
    {
      "epoch": 9.24,
      "eval_arxiv_accuracy": 0.32525,
      "eval_arxiv_bleu_score": 3.854995038599177,
      "eval_arxiv_bleu_score_sem": 0.10864260153314576,
      "eval_arxiv_emb_cos_sim": 0.722553551197052,
      "eval_arxiv_emb_cos_sim_sem": 0.00705173607978296,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6563189029693604,
      "eval_arxiv_n_ngrams_match_1": 13.468,
      "eval_arxiv_n_ngrams_match_2": 2.532,
      "eval_arxiv_n_ngrams_match_3": 0.562,
      "eval_arxiv_num_pred_words": 40.548,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 38.71855346954226,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.31788153585586487,
      "eval_arxiv_runtime": 10.528,
      "eval_arxiv_samples_per_second": 47.493,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.31475247216705377,
      "eval_arxiv_token_set_f1_sem": 0.004326166434345451,
      "eval_arxiv_token_set_precision": 0.26123899148043805,
      "eval_arxiv_token_set_recall": 0.4153508682192829,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 48125
    },
    {
      "epoch": 9.24,
      "eval_python_code_alpaca_accuracy": 0.14628125,
      "eval_python_code_alpaca_bleu_score": 3.669469749310094,
      "eval_python_code_alpaca_bleu_score_sem": 0.11159693307673213,
      "eval_python_code_alpaca_emb_cos_sim": 0.711249589920044,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008566130957240592,
      "eval_python_code_alpaca_emb_top1_equal": 0.0703125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.022687306110270106,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.11362624168396,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.55,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.212,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.676,
      "eval_python_code_alpaca_num_pred_words": 43.384,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.502496119103448,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.28401764689301384,
      "eval_python_code_alpaca_runtime": 11.2649,
      "eval_python_code_alpaca_samples_per_second": 44.386,
      "eval_python_code_alpaca_steps_per_second": 0.089,
      "eval_python_code_alpaca_token_set_f1": 0.43633529654027564,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005892912011820268,
      "eval_python_code_alpaca_token_set_precision": 0.46512658426664066,
      "eval_python_code_alpaca_token_set_recall": 0.43621480267366586,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 48125
    },
    {
      "epoch": 9.24,
      "eval_wikibio_accuracy": 0.297625,
      "eval_wikibio_bleu_score": 5.310316157940524,
      "eval_wikibio_bleu_score_sem": 0.1770462429434233,
      "eval_wikibio_emb_cos_sim": 0.7053753137588501,
      "eval_wikibio_emb_cos_sim_sem": 0.010619001215262996,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.077270030975342,
      "eval_wikibio_n_ngrams_match_1": 9.948,
      "eval_wikibio_n_ngrams_match_2": 3.258,
      "eval_wikibio_n_ngrams_match_3": 1.124,
      "eval_wikibio_num_pred_words": 38.33,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 58.98422474645511,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3346791863394518,
      "eval_wikibio_runtime": 11.2154,
      "eval_wikibio_samples_per_second": 44.582,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.3108615880404229,
      "eval_wikibio_token_set_f1_sem": 0.005273087001684077,
      "eval_wikibio_token_set_precision": 0.32113380387226925,
      "eval_wikibio_token_set_recall": 0.316084648908643,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 48125
    },
    {
      "epoch": 9.24,
      "eval_nq_accuracy": 0.5020625,
      "eval_nq_bleu_score": 10.132491349184725,
      "eval_nq_bleu_score_sem": 0.4566772896043115,
      "eval_nq_emb_cos_sim": 0.8031895160675049,
      "eval_nq_emb_cos_sim_sem": 0.007887558135136596,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.38545298576355,
      "eval_nq_n_ngrams_match_1": 21.718,
      "eval_nq_n_ngrams_match_2": 7.534,
      "eval_nq_n_ngrams_match_3": 3.314,
      "eval_nq_num_pred_words": 48.938,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.863982780000132,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4145385300040625,
      "eval_nq_runtime": 10.8793,
      "eval_nq_samples_per_second": 45.959,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.4305506371636064,
      "eval_nq_token_set_f1_sem": 0.005198653767924212,
      "eval_nq_token_set_precision": 0.384813827332616,
      "eval_nq_token_set_recall": 0.4992643605675793,
      "eval_nq_true_num_tokens": 64.0,
      "step": 48125
    },
    {
      "epoch": 9.24,
      "learning_rate": 0.001,
      "loss": 2.7471,
      "step": 48132
    },
    {
      "epoch": 9.24,
      "learning_rate": 0.001,
      "loss": 2.7484,
      "step": 48144
    },
    {
      "epoch": 9.25,
      "learning_rate": 0.001,
      "loss": 2.7563,
      "step": 48156
    },
    {
      "epoch": 9.25,
      "learning_rate": 0.001,
      "loss": 2.7557,
      "step": 48168
    },
    {
      "epoch": 9.25,
      "learning_rate": 0.001,
      "loss": 2.752,
      "step": 48180
    },
    {
      "epoch": 9.25,
      "learning_rate": 0.001,
      "loss": 2.7522,
      "step": 48192
    },
    {
      "epoch": 9.26,
      "learning_rate": 0.001,
      "loss": 2.7558,
      "step": 48204
    },
    {
      "epoch": 9.26,
      "learning_rate": 0.001,
      "loss": 2.7547,
      "step": 48216
    },
    {
      "epoch": 9.26,
      "learning_rate": 0.001,
      "loss": 2.7492,
      "step": 48228
    },
    {
      "epoch": 9.26,
      "learning_rate": 0.001,
      "loss": 2.7484,
      "step": 48240
    },
    {
      "epoch": 9.26,
      "learning_rate": 0.001,
      "loss": 2.7612,
      "step": 48252
    },
    {
      "epoch": 9.27,
      "learning_rate": 0.001,
      "loss": 2.7669,
      "step": 48264
    },
    {
      "epoch": 9.27,
      "learning_rate": 0.001,
      "loss": 2.7553,
      "step": 48276
    },
    {
      "epoch": 9.27,
      "learning_rate": 0.001,
      "loss": 2.7476,
      "step": 48288
    },
    {
      "epoch": 9.27,
      "learning_rate": 0.001,
      "loss": 2.7514,
      "step": 48300
    },
    {
      "epoch": 9.28,
      "learning_rate": 0.001,
      "loss": 2.7576,
      "step": 48312
    },
    {
      "epoch": 9.28,
      "learning_rate": 0.001,
      "loss": 2.7538,
      "step": 48324
    },
    {
      "epoch": 9.28,
      "learning_rate": 0.001,
      "loss": 2.7554,
      "step": 48336
    },
    {
      "epoch": 9.28,
      "learning_rate": 0.001,
      "loss": 2.7491,
      "step": 48348
    },
    {
      "epoch": 9.29,
      "learning_rate": 0.001,
      "loss": 2.7403,
      "step": 48360
    },
    {
      "epoch": 9.29,
      "learning_rate": 0.001,
      "loss": 2.7501,
      "step": 48372
    },
    {
      "epoch": 9.29,
      "learning_rate": 0.001,
      "loss": 2.7492,
      "step": 48384
    },
    {
      "epoch": 9.29,
      "learning_rate": 0.001,
      "loss": 2.7498,
      "step": 48396
    },
    {
      "epoch": 9.29,
      "learning_rate": 0.001,
      "loss": 2.7402,
      "step": 48408
    },
    {
      "epoch": 9.3,
      "learning_rate": 0.001,
      "loss": 2.7572,
      "step": 48420
    },
    {
      "epoch": 9.3,
      "learning_rate": 0.001,
      "loss": 2.744,
      "step": 48432
    },
    {
      "epoch": 9.3,
      "learning_rate": 0.001,
      "loss": 2.7585,
      "step": 48444
    },
    {
      "epoch": 9.3,
      "learning_rate": 0.001,
      "loss": 2.7455,
      "step": 48456
    },
    {
      "epoch": 9.31,
      "learning_rate": 0.001,
      "loss": 2.7497,
      "step": 48468
    },
    {
      "epoch": 9.31,
      "learning_rate": 0.001,
      "loss": 2.7569,
      "step": 48480
    },
    {
      "epoch": 9.31,
      "learning_rate": 0.001,
      "loss": 2.753,
      "step": 48492
    },
    {
      "epoch": 9.31,
      "learning_rate": 0.001,
      "loss": 2.765,
      "step": 48504
    },
    {
      "epoch": 9.32,
      "learning_rate": 0.001,
      "loss": 2.7542,
      "step": 48516
    },
    {
      "epoch": 9.32,
      "learning_rate": 0.001,
      "loss": 2.7562,
      "step": 48528
    },
    {
      "epoch": 9.32,
      "learning_rate": 0.001,
      "loss": 2.752,
      "step": 48540
    },
    {
      "epoch": 9.32,
      "learning_rate": 0.001,
      "loss": 2.7527,
      "step": 48552
    },
    {
      "epoch": 9.32,
      "learning_rate": 0.001,
      "loss": 2.7635,
      "step": 48564
    },
    {
      "epoch": 9.33,
      "learning_rate": 0.001,
      "loss": 2.7518,
      "step": 48576
    },
    {
      "epoch": 9.33,
      "learning_rate": 0.001,
      "loss": 2.7575,
      "step": 48588
    },
    {
      "epoch": 9.33,
      "learning_rate": 0.001,
      "loss": 2.7459,
      "step": 48600
    },
    {
      "epoch": 9.33,
      "learning_rate": 0.001,
      "loss": 2.7645,
      "step": 48612
    },
    {
      "epoch": 9.34,
      "learning_rate": 0.001,
      "loss": 2.7505,
      "step": 48624
    },
    {
      "epoch": 9.34,
      "learning_rate": 0.001,
      "loss": 2.7481,
      "step": 48636
    },
    {
      "epoch": 9.34,
      "learning_rate": 0.001,
      "loss": 2.7618,
      "step": 48648
    },
    {
      "epoch": 9.34,
      "learning_rate": 0.001,
      "loss": 2.7423,
      "step": 48660
    },
    {
      "epoch": 9.35,
      "learning_rate": 0.001,
      "loss": 2.7547,
      "step": 48672
    },
    {
      "epoch": 9.35,
      "learning_rate": 0.001,
      "loss": 2.7459,
      "step": 48684
    },
    {
      "epoch": 9.35,
      "learning_rate": 0.001,
      "loss": 2.7513,
      "step": 48696
    },
    {
      "epoch": 9.35,
      "learning_rate": 0.001,
      "loss": 2.7506,
      "step": 48708
    },
    {
      "epoch": 9.35,
      "learning_rate": 0.001,
      "loss": 2.7547,
      "step": 48720
    },
    {
      "epoch": 9.36,
      "learning_rate": 0.001,
      "loss": 2.7411,
      "step": 48732
    },
    {
      "epoch": 9.36,
      "learning_rate": 0.001,
      "loss": 2.752,
      "step": 48744
    },
    {
      "epoch": 9.36,
      "eval_ag_news_accuracy": 0.30134375,
      "eval_ag_news_bleu_score": 4.396749226674127,
      "eval_ag_news_bleu_score_sem": 0.14041484471418364,
      "eval_ag_news_emb_cos_sim": 0.7777345180511475,
      "eval_ag_news_emb_cos_sim_sem": 0.007292612563012307,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.79011869430542,
      "eval_ag_news_n_ngrams_match_1": 13.072,
      "eval_ag_news_n_ngrams_match_2": 2.704,
      "eval_ag_news_n_ngrams_match_3": 0.744,
      "eval_ag_news_num_pred_words": 46.302,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 44.261653570436415,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3226398517360535,
      "eval_ag_news_runtime": 10.412,
      "eval_ag_news_samples_per_second": 48.022,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.32864575946343094,
      "eval_ag_news_token_set_f1_sem": 0.004257841407606742,
      "eval_ag_news_token_set_precision": 0.30865923718689464,
      "eval_ag_news_token_set_recall": 0.36671814008622605,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 48750
    },
    {
      "epoch": 9.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.10453125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8131528566844612,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11057926695678694,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6412379741668701,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00939366391287225,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.473548650741577,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.714,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.646,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.59,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.042,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 32.25098710697477,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1974110960979811,
      "eval_anthropic_toxic_prompts_runtime": 10.7744,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.406,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.093,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3313660028572389,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006179051291841012,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3974568862134835,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3114973119014473,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 48750
    },
    {
      "epoch": 9.36,
      "eval_arxiv_accuracy": 0.32503125,
      "eval_arxiv_bleu_score": 3.8107220012480023,
      "eval_arxiv_bleu_score_sem": 0.10939840891064047,
      "eval_arxiv_emb_cos_sim": 0.7042320370674133,
      "eval_arxiv_emb_cos_sim_sem": 0.008122784965031958,
      "eval_arxiv_emb_top1_equal": 0.203125,
      "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6602439880371094,
      "eval_arxiv_n_ngrams_match_1": 13.64,
      "eval_arxiv_n_ngrams_match_2": 2.492,
      "eval_arxiv_n_ngrams_match_3": 0.502,
      "eval_arxiv_num_pred_words": 39.824,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 38.87082573090418,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.32390275596990414,
      "eval_arxiv_runtime": 10.2748,
      "eval_arxiv_samples_per_second": 48.663,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3177359400014232,
      "eval_arxiv_token_set_f1_sem": 0.00394593972515579,
      "eval_arxiv_token_set_precision": 0.2644396575022485,
      "eval_arxiv_token_set_recall": 0.41493651747201754,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 48750
    },
    {
      "epoch": 9.36,
      "eval_python_code_alpaca_accuracy": 0.1439375,
      "eval_python_code_alpaca_bleu_score": 3.653396042760056,
      "eval_python_code_alpaca_bleu_score_sem": 0.11215840486870118,
      "eval_python_code_alpaca_emb_cos_sim": 0.6965563297271729,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009391043564274986,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.1472861766815186,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.22,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.028,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.53,
      "eval_python_code_alpaca_num_pred_words": 40.438,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.272820480194248,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2893894594645693,
      "eval_python_code_alpaca_runtime": 9.9787,
      "eval_python_code_alpaca_samples_per_second": 50.107,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.42216432224459405,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005704082989878473,
      "eval_python_code_alpaca_token_set_precision": 0.441826665075511,
      "eval_python_code_alpaca_token_set_recall": 0.4339781565612325,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 48750
    },
    {
      "epoch": 9.36,
      "eval_wikibio_accuracy": 0.29796875,
      "eval_wikibio_bleu_score": 5.128538995564363,
      "eval_wikibio_bleu_score_sem": 0.18220165220355836,
      "eval_wikibio_emb_cos_sim": 0.7084054350852966,
      "eval_wikibio_emb_cos_sim_sem": 0.011019566180599549,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.093959331512451,
      "eval_wikibio_n_ngrams_match_1": 9.522,
      "eval_wikibio_n_ngrams_match_2": 2.992,
      "eval_wikibio_n_ngrams_match_3": 1.01,
      "eval_wikibio_num_pred_words": 36.41,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 59.97689060893038,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3251450937570389,
      "eval_wikibio_runtime": 10.2684,
      "eval_wikibio_samples_per_second": 48.693,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.30141272476178205,
      "eval_wikibio_token_set_f1_sem": 0.0056895415323902476,
      "eval_wikibio_token_set_precision": 0.30743642302499147,
      "eval_wikibio_token_set_recall": 0.31338408345119867,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 48750
    },
    {
      "epoch": 9.36,
      "eval_nq_accuracy": 0.500375,
      "eval_nq_bleu_score": 10.53056712935079,
      "eval_nq_bleu_score_sem": 0.4490580378016007,
      "eval_nq_emb_cos_sim": 0.8015233874320984,
      "eval_nq_emb_cos_sim_sem": 0.00786959830931489,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.385740280151367,
      "eval_nq_n_ngrams_match_1": 21.868,
      "eval_nq_n_ngrams_match_2": 7.642,
      "eval_nq_n_ngrams_match_3": 3.478,
      "eval_nq_num_pred_words": 48.982,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.86710438967117,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.418429712006624,
      "eval_nq_runtime": 11.0633,
      "eval_nq_samples_per_second": 45.195,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.4365160381662473,
      "eval_nq_token_set_f1_sem": 0.005191955086409502,
      "eval_nq_token_set_precision": 0.38999808114205353,
      "eval_nq_token_set_recall": 0.5080066668214502,
      "eval_nq_true_num_tokens": 64.0,
      "step": 48750
    },
    {
      "epoch": 9.36,
      "learning_rate": 0.001,
      "loss": 2.7599,
      "step": 48756
    },
    {
      "epoch": 9.36,
      "learning_rate": 0.001,
      "loss": 2.755,
      "step": 48768
    },
    {
      "epoch": 9.37,
      "learning_rate": 0.001,
      "loss": 2.7586,
      "step": 48780
    },
    {
      "epoch": 9.37,
      "learning_rate": 0.001,
      "loss": 2.7593,
      "step": 48792
    },
    {
      "epoch": 9.37,
      "learning_rate": 0.001,
      "loss": 2.7457,
      "step": 48804
    },
    {
      "epoch": 9.37,
      "learning_rate": 0.001,
      "loss": 2.7441,
      "step": 48816
    },
    {
      "epoch": 9.38,
      "learning_rate": 0.001,
      "loss": 2.7489,
      "step": 48828
    },
    {
      "epoch": 9.38,
      "learning_rate": 0.001,
      "loss": 2.7499,
      "step": 48840
    },
    {
      "epoch": 9.38,
      "learning_rate": 0.001,
      "loss": 2.7431,
      "step": 48852
    },
    {
      "epoch": 9.38,
      "learning_rate": 0.001,
      "loss": 2.7586,
      "step": 48864
    },
    {
      "epoch": 9.38,
      "learning_rate": 0.001,
      "loss": 2.7487,
      "step": 48876
    },
    {
      "epoch": 9.39,
      "learning_rate": 0.001,
      "loss": 2.7443,
      "step": 48888
    },
    {
      "epoch": 9.39,
      "learning_rate": 0.001,
      "loss": 2.747,
      "step": 48900
    },
    {
      "epoch": 9.39,
      "learning_rate": 0.001,
      "loss": 2.7525,
      "step": 48912
    },
    {
      "epoch": 9.39,
      "learning_rate": 0.001,
      "loss": 2.754,
      "step": 48924
    },
    {
      "epoch": 9.4,
      "learning_rate": 0.001,
      "loss": 2.7516,
      "step": 48936
    },
    {
      "epoch": 9.4,
      "learning_rate": 0.001,
      "loss": 2.7499,
      "step": 48948
    },
    {
      "epoch": 9.4,
      "learning_rate": 0.001,
      "loss": 2.7482,
      "step": 48960
    },
    {
      "epoch": 9.4,
      "learning_rate": 0.001,
      "loss": 2.7515,
      "step": 48972
    },
    {
      "epoch": 9.41,
      "learning_rate": 0.001,
      "loss": 2.744,
      "step": 48984
    },
    {
      "epoch": 9.41,
      "learning_rate": 0.001,
      "loss": 2.7539,
      "step": 48996
    },
    {
      "epoch": 9.41,
      "learning_rate": 0.001,
      "loss": 2.7483,
      "step": 49008
    },
    {
      "epoch": 9.41,
      "learning_rate": 0.001,
      "loss": 2.7514,
      "step": 49020
    },
    {
      "epoch": 9.41,
      "learning_rate": 0.001,
      "loss": 2.7509,
      "step": 49032
    },
    {
      "epoch": 9.42,
      "learning_rate": 0.001,
      "loss": 2.7539,
      "step": 49044
    },
    {
      "epoch": 9.42,
      "learning_rate": 0.001,
      "loss": 2.7379,
      "step": 49056
    },
    {
      "epoch": 9.42,
      "learning_rate": 0.001,
      "loss": 2.7464,
      "step": 49068
    },
    {
      "epoch": 9.42,
      "learning_rate": 0.001,
      "loss": 2.7512,
      "step": 49080
    },
    {
      "epoch": 9.43,
      "learning_rate": 0.001,
      "loss": 2.7504,
      "step": 49092
    },
    {
      "epoch": 9.43,
      "learning_rate": 0.001,
      "loss": 2.7508,
      "step": 49104
    },
    {
      "epoch": 9.43,
      "learning_rate": 0.001,
      "loss": 2.7522,
      "step": 49116
    },
    {
      "epoch": 9.43,
      "learning_rate": 0.001,
      "loss": 2.7509,
      "step": 49128
    },
    {
      "epoch": 9.44,
      "learning_rate": 0.001,
      "loss": 2.7486,
      "step": 49140
    },
    {
      "epoch": 9.44,
      "learning_rate": 0.001,
      "loss": 2.7575,
      "step": 49152
    },
    {
      "epoch": 9.44,
      "learning_rate": 0.001,
      "loss": 2.7398,
      "step": 49164
    },
    {
      "epoch": 9.44,
      "learning_rate": 0.001,
      "loss": 2.761,
      "step": 49176
    },
    {
      "epoch": 9.44,
      "learning_rate": 0.001,
      "loss": 2.7516,
      "step": 49188
    },
    {
      "epoch": 9.45,
      "learning_rate": 0.001,
      "loss": 2.7523,
      "step": 49200
    },
    {
      "epoch": 9.45,
      "learning_rate": 0.001,
      "loss": 2.7451,
      "step": 49212
    },
    {
      "epoch": 9.45,
      "learning_rate": 0.001,
      "loss": 2.7512,
      "step": 49224
    },
    {
      "epoch": 9.45,
      "learning_rate": 0.001,
      "loss": 2.7526,
      "step": 49236
    },
    {
      "epoch": 9.46,
      "learning_rate": 0.001,
      "loss": 2.7538,
      "step": 49248
    },
    {
      "epoch": 9.46,
      "learning_rate": 0.001,
      "loss": 2.7435,
      "step": 49260
    },
    {
      "epoch": 9.46,
      "learning_rate": 0.001,
      "loss": 2.7519,
      "step": 49272
    },
    {
      "epoch": 9.46,
      "learning_rate": 0.001,
      "loss": 2.7502,
      "step": 49284
    },
    {
      "epoch": 9.47,
      "learning_rate": 0.001,
      "loss": 2.7587,
      "step": 49296
    },
    {
      "epoch": 9.47,
      "learning_rate": 0.001,
      "loss": 2.7557,
      "step": 49308
    },
    {
      "epoch": 9.47,
      "learning_rate": 0.001,
      "loss": 2.7477,
      "step": 49320
    },
    {
      "epoch": 9.47,
      "learning_rate": 0.001,
      "loss": 2.7637,
      "step": 49332
    },
    {
      "epoch": 9.47,
      "learning_rate": 0.001,
      "loss": 2.7625,
      "step": 49344
    },
    {
      "epoch": 9.48,
      "learning_rate": 0.001,
      "loss": 2.7565,
      "step": 49356
    },
    {
      "epoch": 9.48,
      "learning_rate": 0.001,
      "loss": 2.7475,
      "step": 49368
    },
    {
      "epoch": 9.48,
      "eval_ag_news_accuracy": 0.3011875,
      "eval_ag_news_bleu_score": 4.357760725215447,
      "eval_ag_news_bleu_score_sem": 0.14459949443749961,
      "eval_ag_news_emb_cos_sim": 0.776526927947998,
      "eval_ag_news_emb_cos_sim_sem": 0.006940873404586609,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7870981693267822,
      "eval_ag_news_n_ngrams_match_1": 12.952,
      "eval_ag_news_n_ngrams_match_2": 2.692,
      "eval_ag_news_n_ngrams_match_3": 0.726,
      "eval_ag_news_num_pred_words": 46.32,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 44.12816184926364,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.31948391192609604,
      "eval_ag_news_runtime": 11.3224,
      "eval_ag_news_samples_per_second": 44.16,
      "eval_ag_news_steps_per_second": 0.088,
      "eval_ag_news_token_set_f1": 0.3277880997004768,
      "eval_ag_news_token_set_f1_sem": 0.004296509623992959,
      "eval_ag_news_token_set_precision": 0.306916125249126,
      "eval_ag_news_token_set_recall": 0.36826670582017923,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 49375
    },
    {
      "epoch": 9.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.10715625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8659356446403557,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11802020499974405,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6357718706130981,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00998766513217209,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4657299518585205,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.614,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.646,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.614,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.482,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.999809570448026,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19273474312050431,
      "eval_anthropic_toxic_prompts_runtime": 12.8921,
      "eval_anthropic_toxic_prompts_samples_per_second": 38.783,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.078,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33540217317294596,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00617058560226162,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3952195862486079,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.325109361773473,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 49375
    },
    {
      "epoch": 9.48,
      "eval_arxiv_accuracy": 0.32528125,
      "eval_arxiv_bleu_score": 3.8059983785055898,
      "eval_arxiv_bleu_score_sem": 0.11226245781831974,
      "eval_arxiv_emb_cos_sim": 0.7064568996429443,
      "eval_arxiv_emb_cos_sim_sem": 0.007243781487745389,
      "eval_arxiv_emb_top1_equal": 0.1796875,
      "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.636347770690918,
      "eval_arxiv_n_ngrams_match_1": 13.794,
      "eval_arxiv_n_ngrams_match_2": 2.508,
      "eval_arxiv_n_ngrams_match_3": 0.51,
      "eval_arxiv_num_pred_words": 40.514,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 37.952970343168325,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.32522123159039995,
      "eval_arxiv_runtime": 12.0727,
      "eval_arxiv_samples_per_second": 41.416,
      "eval_arxiv_steps_per_second": 0.083,
      "eval_arxiv_token_set_f1": 0.3223919267568179,
      "eval_arxiv_token_set_f1_sem": 0.003921306275013076,
      "eval_arxiv_token_set_precision": 0.2682886994738338,
      "eval_arxiv_token_set_recall": 0.42271750291043814,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 49375
    },
    {
      "epoch": 9.48,
      "eval_python_code_alpaca_accuracy": 0.14565625,
      "eval_python_code_alpaca_bleu_score": 3.672034122069554,
      "eval_python_code_alpaca_bleu_score_sem": 0.1187501943966979,
      "eval_python_code_alpaca_emb_cos_sim": 0.7004073858261108,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010131539985009107,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.1419548988342285,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.712,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.202,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.618,
      "eval_python_code_alpaca_num_pred_words": 43.778,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 23.149076757101334,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2891343836772943,
      "eval_python_code_alpaca_runtime": 11.0329,
      "eval_python_code_alpaca_samples_per_second": 45.319,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.43220825848463384,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005445043835031553,
      "eval_python_code_alpaca_token_set_precision": 0.46672497782899824,
      "eval_python_code_alpaca_token_set_recall": 0.4240971527630146,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 49375
    },
    {
      "epoch": 9.48,
      "eval_wikibio_accuracy": 0.29778125,
      "eval_wikibio_bleu_score": 5.119697203143855,
      "eval_wikibio_bleu_score_sem": 0.18412999052839607,
      "eval_wikibio_emb_cos_sim": 0.6921262145042419,
      "eval_wikibio_emb_cos_sim_sem": 0.011651315829641467,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.101794719696045,
      "eval_wikibio_n_ngrams_match_1": 9.74,
      "eval_wikibio_n_ngrams_match_2": 3.076,
      "eval_wikibio_n_ngrams_match_3": 1.02,
      "eval_wikibio_num_pred_words": 38.008,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 60.44867873674017,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32660946377484895,
      "eval_wikibio_runtime": 10.0789,
      "eval_wikibio_samples_per_second": 49.609,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3051208709511552,
      "eval_wikibio_token_set_f1_sem": 0.005336039690711099,
      "eval_wikibio_token_set_precision": 0.31516698974788926,
      "eval_wikibio_token_set_recall": 0.3107078789718283,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 49375
    },
    {
      "epoch": 9.48,
      "eval_nq_accuracy": 0.50165625,
      "eval_nq_bleu_score": 10.328163708855888,
      "eval_nq_bleu_score_sem": 0.4448969767087939,
      "eval_nq_emb_cos_sim": 0.8024446964263916,
      "eval_nq_emb_cos_sim_sem": 0.0077750859771627346,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3828232288360596,
      "eval_nq_n_ngrams_match_1": 21.804,
      "eval_nq_n_ngrams_match_2": 7.586,
      "eval_nq_n_ngrams_match_3": 3.342,
      "eval_nq_num_pred_words": 49.382,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.835450678712899,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.41610867838610055,
      "eval_nq_runtime": 10.7151,
      "eval_nq_samples_per_second": 46.663,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.43548373784904204,
      "eval_nq_token_set_f1_sem": 0.005039144897405993,
      "eval_nq_token_set_precision": 0.3877664544571668,
      "eval_nq_token_set_recall": 0.5082956347348438,
      "eval_nq_true_num_tokens": 64.0,
      "step": 49375
    },
    {
      "epoch": 9.48,
      "learning_rate": 0.001,
      "loss": 2.7451,
      "step": 49380
    },
    {
      "epoch": 9.48,
      "learning_rate": 0.001,
      "loss": 2.7433,
      "step": 49392
    },
    {
      "epoch": 9.49,
      "learning_rate": 0.001,
      "loss": 2.7543,
      "step": 49404
    },
    {
      "epoch": 9.49,
      "learning_rate": 0.001,
      "loss": 2.745,
      "step": 49416
    },
    {
      "epoch": 9.49,
      "learning_rate": 0.001,
      "loss": 2.733,
      "step": 49428
    },
    {
      "epoch": 9.49,
      "learning_rate": 0.001,
      "loss": 2.7444,
      "step": 49440
    },
    {
      "epoch": 9.5,
      "learning_rate": 0.001,
      "loss": 2.7471,
      "step": 49452
    },
    {
      "epoch": 9.5,
      "learning_rate": 0.001,
      "loss": 2.7535,
      "step": 49464
    },
    {
      "epoch": 9.5,
      "learning_rate": 0.001,
      "loss": 2.7492,
      "step": 49476
    },
    {
      "epoch": 9.5,
      "learning_rate": 0.001,
      "loss": 2.7472,
      "step": 49488
    },
    {
      "epoch": 9.5,
      "learning_rate": 0.001,
      "loss": 2.7531,
      "step": 49500
    },
    {
      "epoch": 9.51,
      "learning_rate": 0.001,
      "loss": 2.758,
      "step": 49512
    },
    {
      "epoch": 9.51,
      "learning_rate": 0.001,
      "loss": 2.7482,
      "step": 49524
    },
    {
      "epoch": 9.51,
      "learning_rate": 0.001,
      "loss": 2.7561,
      "step": 49536
    },
    {
      "epoch": 9.51,
      "learning_rate": 0.001,
      "loss": 2.7579,
      "step": 49548
    },
    {
      "epoch": 9.52,
      "learning_rate": 0.001,
      "loss": 2.7521,
      "step": 49560
    },
    {
      "epoch": 9.52,
      "learning_rate": 0.001,
      "loss": 2.755,
      "step": 49572
    },
    {
      "epoch": 9.52,
      "learning_rate": 0.001,
      "loss": 2.7384,
      "step": 49584
    },
    {
      "epoch": 9.52,
      "learning_rate": 0.001,
      "loss": 2.7443,
      "step": 49596
    },
    {
      "epoch": 9.53,
      "learning_rate": 0.001,
      "loss": 2.7304,
      "step": 49608
    },
    {
      "epoch": 9.53,
      "learning_rate": 0.001,
      "loss": 2.7553,
      "step": 49620
    },
    {
      "epoch": 9.53,
      "learning_rate": 0.001,
      "loss": 2.7575,
      "step": 49632
    },
    {
      "epoch": 9.53,
      "learning_rate": 0.001,
      "loss": 2.7403,
      "step": 49644
    },
    {
      "epoch": 9.53,
      "learning_rate": 0.001,
      "loss": 2.7566,
      "step": 49656
    },
    {
      "epoch": 9.54,
      "learning_rate": 0.001,
      "loss": 2.742,
      "step": 49668
    },
    {
      "epoch": 9.54,
      "learning_rate": 0.001,
      "loss": 2.7477,
      "step": 49680
    },
    {
      "epoch": 9.54,
      "learning_rate": 0.001,
      "loss": 2.7456,
      "step": 49692
    },
    {
      "epoch": 9.54,
      "learning_rate": 0.001,
      "loss": 2.7419,
      "step": 49704
    },
    {
      "epoch": 9.55,
      "learning_rate": 0.001,
      "loss": 2.7433,
      "step": 49716
    },
    {
      "epoch": 9.55,
      "learning_rate": 0.001,
      "loss": 2.7371,
      "step": 49728
    },
    {
      "epoch": 9.55,
      "learning_rate": 0.001,
      "loss": 2.7544,
      "step": 49740
    },
    {
      "epoch": 9.55,
      "learning_rate": 0.001,
      "loss": 2.747,
      "step": 49752
    },
    {
      "epoch": 9.56,
      "learning_rate": 0.001,
      "loss": 2.7396,
      "step": 49764
    },
    {
      "epoch": 9.56,
      "learning_rate": 0.001,
      "loss": 2.7428,
      "step": 49776
    },
    {
      "epoch": 9.56,
      "learning_rate": 0.001,
      "loss": 2.7432,
      "step": 49788
    },
    {
      "epoch": 9.56,
      "learning_rate": 0.001,
      "loss": 2.748,
      "step": 49800
    },
    {
      "epoch": 9.56,
      "learning_rate": 0.001,
      "loss": 2.7556,
      "step": 49812
    },
    {
      "epoch": 9.57,
      "learning_rate": 0.001,
      "loss": 2.742,
      "step": 49824
    },
    {
      "epoch": 9.57,
      "learning_rate": 0.001,
      "loss": 2.7481,
      "step": 49836
    },
    {
      "epoch": 9.57,
      "learning_rate": 0.001,
      "loss": 2.7448,
      "step": 49848
    },
    {
      "epoch": 9.57,
      "learning_rate": 0.001,
      "loss": 2.7375,
      "step": 49860
    },
    {
      "epoch": 9.58,
      "learning_rate": 0.001,
      "loss": 2.7477,
      "step": 49872
    },
    {
      "epoch": 9.58,
      "learning_rate": 0.001,
      "loss": 2.7526,
      "step": 49884
    },
    {
      "epoch": 9.58,
      "learning_rate": 0.001,
      "loss": 2.7439,
      "step": 49896
    },
    {
      "epoch": 9.58,
      "learning_rate": 0.001,
      "loss": 2.7462,
      "step": 49908
    },
    {
      "epoch": 9.59,
      "learning_rate": 0.001,
      "loss": 2.7546,
      "step": 49920
    },
    {
      "epoch": 9.59,
      "learning_rate": 0.001,
      "loss": 2.7428,
      "step": 49932
    },
    {
      "epoch": 9.59,
      "learning_rate": 0.001,
      "loss": 2.7475,
      "step": 49944
    },
    {
      "epoch": 9.59,
      "learning_rate": 0.001,
      "loss": 2.7541,
      "step": 49956
    },
    {
      "epoch": 9.59,
      "learning_rate": 0.001,
      "loss": 2.7523,
      "step": 49968
    },
    {
      "epoch": 9.6,
      "learning_rate": 0.001,
      "loss": 2.7482,
      "step": 49980
    },
    {
      "epoch": 9.6,
      "learning_rate": 0.001,
      "loss": 2.7436,
      "step": 49992
    },
    {
      "epoch": 9.6,
      "eval_ag_news_accuracy": 0.30128125,
      "eval_ag_news_bleu_score": 4.401626065564337,
      "eval_ag_news_bleu_score_sem": 0.1445457507032042,
      "eval_ag_news_emb_cos_sim": 0.7778183221817017,
      "eval_ag_news_emb_cos_sim_sem": 0.008032925908373208,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.772658109664917,
      "eval_ag_news_n_ngrams_match_1": 13.148,
      "eval_ag_news_n_ngrams_match_2": 2.694,
      "eval_ag_news_n_ngrams_match_3": 0.75,
      "eval_ag_news_num_pred_words": 46.634,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 43.49552719327691,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.32072590060030487,
      "eval_ag_news_runtime": 10.4683,
      "eval_ag_news_samples_per_second": 47.763,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.33126265519725334,
      "eval_ag_news_token_set_f1_sem": 0.004283752556446434,
      "eval_ag_news_token_set_precision": 0.3102873960943213,
      "eval_ag_news_token_set_recall": 0.371257938336186,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 50000
    },
    {
      "epoch": 9.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.10528125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.646041919126248,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10067921182804163,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6425341963768005,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009844991464517635,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.45570969581604,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.612,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.612,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.524,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.558,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.680764411817897,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19188037681201425,
      "eval_anthropic_toxic_prompts_runtime": 9.9395,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.304,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3319364934553117,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006188616059526924,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.39262972766636134,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3185882702626803,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 50000
    },
    {
      "epoch": 9.6,
      "eval_arxiv_accuracy": 0.32771875,
      "eval_arxiv_bleu_score": 3.7701257023666095,
      "eval_arxiv_bleu_score_sem": 0.11082359700177555,
      "eval_arxiv_emb_cos_sim": 0.7135499715805054,
      "eval_arxiv_emb_cos_sim_sem": 0.008002433980564593,
      "eval_arxiv_emb_top1_equal": 0.203125,
      "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.63419246673584,
      "eval_arxiv_n_ngrams_match_1": 13.586,
      "eval_arxiv_n_ngrams_match_2": 2.446,
      "eval_arxiv_n_ngrams_match_3": 0.502,
      "eval_arxiv_num_pred_words": 39.776,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 37.87125824491667,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.32475878093368343,
      "eval_arxiv_runtime": 10.5645,
      "eval_arxiv_samples_per_second": 47.328,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.32280740944661535,
      "eval_arxiv_token_set_f1_sem": 0.004062348152172144,
      "eval_arxiv_token_set_precision": 0.26694253424269354,
      "eval_arxiv_token_set_recall": 0.42598548702306865,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 50000
    },
    {
      "epoch": 9.6,
      "eval_python_code_alpaca_accuracy": 0.14696875,
      "eval_python_code_alpaca_bleu_score": 3.6439864258715815,
      "eval_python_code_alpaca_bleu_score_sem": 0.11382405744825667,
      "eval_python_code_alpaca_emb_cos_sim": 0.7217139005661011,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007808518964744001,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.1127827167510986,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.926,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.31,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.616,
      "eval_python_code_alpaca_num_pred_words": 44.906,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.483522705973908,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29347859700919776,
      "eval_python_code_alpaca_runtime": 9.8308,
      "eval_python_code_alpaca_samples_per_second": 50.86,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.44862252730393704,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005301960274074061,
      "eval_python_code_alpaca_token_set_precision": 0.48279054465557647,
      "eval_python_code_alpaca_token_set_recall": 0.4414768495568832,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 50000
    },
    {
      "epoch": 9.6,
      "eval_wikibio_accuracy": 0.30303125,
      "eval_wikibio_bleu_score": 5.343358124895723,
      "eval_wikibio_bleu_score_sem": 0.19038480264481653,
      "eval_wikibio_emb_cos_sim": 0.7051657438278198,
      "eval_wikibio_emb_cos_sim_sem": 0.011177690619500295,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.021156311035156,
      "eval_wikibio_n_ngrams_match_1": 9.58,
      "eval_wikibio_n_ngrams_match_2": 3.068,
      "eval_wikibio_n_ngrams_match_3": 1.038,
      "eval_wikibio_num_pred_words": 36.482,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 55.76555088221737,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3313609812011307,
      "eval_wikibio_runtime": 10.4864,
      "eval_wikibio_samples_per_second": 47.681,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.3034471501935343,
      "eval_wikibio_token_set_f1_sem": 0.005550524035739132,
      "eval_wikibio_token_set_precision": 0.3104102153043557,
      "eval_wikibio_token_set_recall": 0.3109466731385039,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 50000
    },
    {
      "epoch": 9.6,
      "eval_nq_accuracy": 0.501,
      "eval_nq_bleu_score": 10.158360347363876,
      "eval_nq_bleu_score_sem": 0.4273361005121561,
      "eval_nq_emb_cos_sim": 0.7996821403503418,
      "eval_nq_emb_cos_sim_sem": 0.009176689302560135,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3760902881622314,
      "eval_nq_n_ngrams_match_1": 21.654,
      "eval_nq_n_ngrams_match_2": 7.46,
      "eval_nq_n_ngrams_match_3": 3.276,
      "eval_nq_num_pred_words": 48.66,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.76274128082477,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4169310345619456,
      "eval_nq_runtime": 10.4174,
      "eval_nq_samples_per_second": 47.996,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4342935261692336,
      "eval_nq_token_set_f1_sem": 0.005008949251638485,
      "eval_nq_token_set_precision": 0.38577029760949005,
      "eval_nq_token_set_recall": 0.5080714527538779,
      "eval_nq_true_num_tokens": 64.0,
      "step": 50000
    },
    {
      "epoch": 9.6,
      "learning_rate": 0.001,
      "loss": 2.7346,
      "step": 50004
    },
    {
      "epoch": 9.6,
      "learning_rate": 0.001,
      "loss": 2.7357,
      "step": 50016
    },
    {
      "epoch": 9.61,
      "learning_rate": 0.001,
      "loss": 2.7565,
      "step": 50028
    },
    {
      "epoch": 9.61,
      "learning_rate": 0.001,
      "loss": 2.7423,
      "step": 50040
    },
    {
      "epoch": 9.61,
      "learning_rate": 0.001,
      "loss": 2.7457,
      "step": 50052
    },
    {
      "epoch": 9.61,
      "learning_rate": 0.001,
      "loss": 2.7506,
      "step": 50064
    },
    {
      "epoch": 9.62,
      "learning_rate": 0.001,
      "loss": 2.7459,
      "step": 50076
    },
    {
      "epoch": 9.62,
      "learning_rate": 0.001,
      "loss": 2.7411,
      "step": 50088
    },
    {
      "epoch": 9.62,
      "learning_rate": 0.001,
      "loss": 2.7355,
      "step": 50100
    },
    {
      "epoch": 9.62,
      "learning_rate": 0.001,
      "loss": 2.7392,
      "step": 50112
    },
    {
      "epoch": 9.62,
      "learning_rate": 0.001,
      "loss": 2.7471,
      "step": 50124
    },
    {
      "epoch": 9.63,
      "learning_rate": 0.001,
      "loss": 2.7457,
      "step": 50136
    },
    {
      "epoch": 9.63,
      "learning_rate": 0.001,
      "loss": 2.7471,
      "step": 50148
    },
    {
      "epoch": 9.63,
      "learning_rate": 0.001,
      "loss": 2.7486,
      "step": 50160
    },
    {
      "epoch": 9.63,
      "learning_rate": 0.001,
      "loss": 2.7439,
      "step": 50172
    },
    {
      "epoch": 9.64,
      "learning_rate": 0.001,
      "loss": 2.7514,
      "step": 50184
    },
    {
      "epoch": 9.64,
      "learning_rate": 0.001,
      "loss": 2.7474,
      "step": 50196
    },
    {
      "epoch": 9.64,
      "learning_rate": 0.001,
      "loss": 2.7514,
      "step": 50208
    },
    {
      "epoch": 9.64,
      "learning_rate": 0.001,
      "loss": 2.7509,
      "step": 50220
    },
    {
      "epoch": 9.65,
      "learning_rate": 0.001,
      "loss": 2.7359,
      "step": 50232
    },
    {
      "epoch": 9.65,
      "learning_rate": 0.001,
      "loss": 2.7599,
      "step": 50244
    },
    {
      "epoch": 9.65,
      "learning_rate": 0.001,
      "loss": 2.7559,
      "step": 50256
    },
    {
      "epoch": 9.65,
      "learning_rate": 0.001,
      "loss": 2.7367,
      "step": 50268
    },
    {
      "epoch": 9.65,
      "learning_rate": 0.001,
      "loss": 2.7404,
      "step": 50280
    },
    {
      "epoch": 9.66,
      "learning_rate": 0.001,
      "loss": 2.7505,
      "step": 50292
    },
    {
      "epoch": 9.66,
      "learning_rate": 0.001,
      "loss": 2.7478,
      "step": 50304
    },
    {
      "epoch": 9.66,
      "learning_rate": 0.001,
      "loss": 2.7466,
      "step": 50316
    },
    {
      "epoch": 9.66,
      "learning_rate": 0.001,
      "loss": 2.7385,
      "step": 50328
    },
    {
      "epoch": 9.67,
      "learning_rate": 0.001,
      "loss": 2.727,
      "step": 50340
    },
    {
      "epoch": 9.67,
      "learning_rate": 0.001,
      "loss": 2.7526,
      "step": 50352
    },
    {
      "epoch": 9.67,
      "learning_rate": 0.001,
      "loss": 2.7375,
      "step": 50364
    },
    {
      "epoch": 9.67,
      "learning_rate": 0.001,
      "loss": 2.7461,
      "step": 50376
    },
    {
      "epoch": 9.68,
      "learning_rate": 0.001,
      "loss": 2.7515,
      "step": 50388
    },
    {
      "epoch": 9.68,
      "learning_rate": 0.001,
      "loss": 2.7417,
      "step": 50400
    },
    {
      "epoch": 9.68,
      "learning_rate": 0.001,
      "loss": 2.752,
      "step": 50412
    },
    {
      "epoch": 9.68,
      "learning_rate": 0.001,
      "loss": 2.7438,
      "step": 50424
    },
    {
      "epoch": 9.68,
      "learning_rate": 0.001,
      "loss": 2.7407,
      "step": 50436
    },
    {
      "epoch": 9.69,
      "learning_rate": 0.001,
      "loss": 2.7504,
      "step": 50448
    },
    {
      "epoch": 9.69,
      "learning_rate": 0.001,
      "loss": 2.7544,
      "step": 50460
    },
    {
      "epoch": 9.69,
      "learning_rate": 0.001,
      "loss": 2.739,
      "step": 50472
    },
    {
      "epoch": 9.69,
      "learning_rate": 0.001,
      "loss": 2.744,
      "step": 50484
    },
    {
      "epoch": 9.7,
      "learning_rate": 0.001,
      "loss": 2.747,
      "step": 50496
    },
    {
      "epoch": 9.7,
      "learning_rate": 0.001,
      "loss": 2.7388,
      "step": 50508
    },
    {
      "epoch": 9.7,
      "learning_rate": 0.001,
      "loss": 2.7464,
      "step": 50520
    },
    {
      "epoch": 9.7,
      "learning_rate": 0.001,
      "loss": 2.7439,
      "step": 50532
    },
    {
      "epoch": 9.71,
      "learning_rate": 0.001,
      "loss": 2.7371,
      "step": 50544
    },
    {
      "epoch": 9.71,
      "learning_rate": 0.001,
      "loss": 2.7383,
      "step": 50556
    },
    {
      "epoch": 9.71,
      "learning_rate": 0.001,
      "loss": 2.7515,
      "step": 50568
    },
    {
      "epoch": 9.71,
      "learning_rate": 0.001,
      "loss": 2.7446,
      "step": 50580
    },
    {
      "epoch": 9.71,
      "learning_rate": 0.001,
      "loss": 2.7498,
      "step": 50592
    },
    {
      "epoch": 9.72,
      "learning_rate": 0.001,
      "loss": 2.7567,
      "step": 50604
    },
    {
      "epoch": 9.72,
      "learning_rate": 0.001,
      "loss": 2.7446,
      "step": 50616
    },
    {
      "epoch": 9.72,
      "eval_ag_news_accuracy": 0.30315625,
      "eval_ag_news_bleu_score": 4.229310166403921,
      "eval_ag_news_bleu_score_sem": 0.13653759383921774,
      "eval_ag_news_emb_cos_sim": 0.7814064025878906,
      "eval_ag_news_emb_cos_sim_sem": 0.0076000323345952765,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.773273229598999,
      "eval_ag_news_n_ngrams_match_1": 12.932,
      "eval_ag_news_n_ngrams_match_2": 2.582,
      "eval_ag_news_n_ngrams_match_3": 0.678,
      "eval_ag_news_num_pred_words": 46.276,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 43.522290389540785,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.31875568997996345,
      "eval_ag_news_runtime": 10.7756,
      "eval_ag_news_samples_per_second": 46.401,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.32706445291604713,
      "eval_ag_news_token_set_f1_sem": 0.004265483933419019,
      "eval_ag_news_token_set_precision": 0.30472529299794976,
      "eval_ag_news_token_set_recall": 0.3699273734078367,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 50625
    },
    {
      "epoch": 9.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.103875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.75102727221293,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11179479286695834,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6350839734077454,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010060457349361961,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4477925300598145,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.534,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.584,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.544,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.946,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.43093283268095,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1924028521684284,
      "eval_anthropic_toxic_prompts_runtime": 11.0808,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.123,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.09,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.327072750899434,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006390408494524602,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3914232039580596,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3124122785138938,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 50625
    },
    {
      "epoch": 9.72,
      "eval_arxiv_accuracy": 0.32596875,
      "eval_arxiv_bleu_score": 3.7227792440478975,
      "eval_arxiv_bleu_score_sem": 0.10689662456957807,
      "eval_arxiv_emb_cos_sim": 0.7148361802101135,
      "eval_arxiv_emb_cos_sim_sem": 0.008313577821340205,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6310336589813232,
      "eval_arxiv_n_ngrams_match_1": 13.58,
      "eval_arxiv_n_ngrams_match_2": 2.41,
      "eval_arxiv_n_ngrams_match_3": 0.502,
      "eval_arxiv_num_pred_words": 39.048,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 37.75181896287888,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3252440222441543,
      "eval_arxiv_runtime": 9.9786,
      "eval_arxiv_samples_per_second": 50.107,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.3204262909242834,
      "eval_arxiv_token_set_f1_sem": 0.004326924854411187,
      "eval_arxiv_token_set_precision": 0.2664551550336275,
      "eval_arxiv_token_set_recall": 0.42331837328961114,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 50625
    },
    {
      "epoch": 9.72,
      "eval_python_code_alpaca_accuracy": 0.1475,
      "eval_python_code_alpaca_bleu_score": 3.7156771148972076,
      "eval_python_code_alpaca_bleu_score_sem": 0.11075926433811897,
      "eval_python_code_alpaca_emb_cos_sim": 0.7210425138473511,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009259779180221332,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.127523899078369,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.692,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.25,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.618,
      "eval_python_code_alpaca_num_pred_words": 42.658,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.81741132382553,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2960598726055982,
      "eval_python_code_alpaca_runtime": 11.2128,
      "eval_python_code_alpaca_samples_per_second": 44.592,
      "eval_python_code_alpaca_steps_per_second": 0.089,
      "eval_python_code_alpaca_token_set_f1": 0.4352501196633042,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00573300760460424,
      "eval_python_code_alpaca_token_set_precision": 0.47193294741093567,
      "eval_python_code_alpaca_token_set_recall": 0.42933250784160426,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 50625
    },
    {
      "epoch": 9.72,
      "eval_wikibio_accuracy": 0.30403125,
      "eval_wikibio_bleu_score": 5.09087360581446,
      "eval_wikibio_bleu_score_sem": 0.17910898241923462,
      "eval_wikibio_emb_cos_sim": 0.6993159651756287,
      "eval_wikibio_emb_cos_sim_sem": 0.013285735053490507,
      "eval_wikibio_emb_top1_equal": 0.1328125,
      "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.0744476318359375,
      "eval_wikibio_n_ngrams_match_1": 9.562,
      "eval_wikibio_n_ngrams_match_2": 3.03,
      "eval_wikibio_n_ngrams_match_3": 0.972,
      "eval_wikibio_num_pred_words": 36.228,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 58.81798243273018,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3304839476884093,
      "eval_wikibio_runtime": 10.36,
      "eval_wikibio_samples_per_second": 48.263,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.2993221571546114,
      "eval_wikibio_token_set_f1_sem": 0.00557430686192778,
      "eval_wikibio_token_set_precision": 0.3083853823966142,
      "eval_wikibio_token_set_recall": 0.30611439348632996,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 50625
    },
    {
      "epoch": 9.72,
      "eval_nq_accuracy": 0.50015625,
      "eval_nq_bleu_score": 10.113071648166923,
      "eval_nq_bleu_score_sem": 0.4386009372837779,
      "eval_nq_emb_cos_sim": 0.8107307553291321,
      "eval_nq_emb_cos_sim_sem": 0.007513519342581586,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3781211376190186,
      "eval_nq_n_ngrams_match_1": 21.788,
      "eval_nq_n_ngrams_match_2": 7.482,
      "eval_nq_n_ngrams_match_3": 3.31,
      "eval_nq_num_pred_words": 48.79,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.784620997794168,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4199325728624945,
      "eval_nq_runtime": 10.4835,
      "eval_nq_samples_per_second": 47.694,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.436172696277002,
      "eval_nq_token_set_f1_sem": 0.00493042015432282,
      "eval_nq_token_set_precision": 0.38975861990561755,
      "eval_nq_token_set_recall": 0.5055145741123724,
      "eval_nq_true_num_tokens": 64.0,
      "step": 50625
    },
    {
      "epoch": 9.72,
      "learning_rate": 0.001,
      "loss": 2.7509,
      "step": 50628
    },
    {
      "epoch": 9.72,
      "learning_rate": 0.001,
      "loss": 2.7581,
      "step": 50640
    },
    {
      "epoch": 9.73,
      "learning_rate": 0.001,
      "loss": 2.7493,
      "step": 50652
    },
    {
      "epoch": 9.73,
      "learning_rate": 0.001,
      "loss": 2.7315,
      "step": 50664
    },
    {
      "epoch": 9.73,
      "learning_rate": 0.001,
      "loss": 2.7589,
      "step": 50676
    },
    {
      "epoch": 9.73,
      "learning_rate": 0.001,
      "loss": 2.7444,
      "step": 50688
    },
    {
      "epoch": 9.74,
      "learning_rate": 0.001,
      "loss": 2.7568,
      "step": 50700
    },
    {
      "epoch": 9.74,
      "learning_rate": 0.001,
      "loss": 2.747,
      "step": 50712
    },
    {
      "epoch": 9.74,
      "learning_rate": 0.001,
      "loss": 2.7505,
      "step": 50724
    },
    {
      "epoch": 9.74,
      "learning_rate": 0.001,
      "loss": 2.7448,
      "step": 50736
    },
    {
      "epoch": 9.74,
      "learning_rate": 0.001,
      "loss": 2.7422,
      "step": 50748
    },
    {
      "epoch": 9.75,
      "learning_rate": 0.001,
      "loss": 2.7449,
      "step": 50760
    },
    {
      "epoch": 9.75,
      "learning_rate": 0.001,
      "loss": 2.7501,
      "step": 50772
    },
    {
      "epoch": 9.75,
      "learning_rate": 0.001,
      "loss": 2.7481,
      "step": 50784
    },
    {
      "epoch": 9.75,
      "learning_rate": 0.001,
      "loss": 2.7487,
      "step": 50796
    },
    {
      "epoch": 9.76,
      "learning_rate": 0.001,
      "loss": 2.7426,
      "step": 50808
    },
    {
      "epoch": 9.76,
      "learning_rate": 0.001,
      "loss": 2.7384,
      "step": 50820
    },
    {
      "epoch": 9.76,
      "learning_rate": 0.001,
      "loss": 2.7328,
      "step": 50832
    },
    {
      "epoch": 9.76,
      "learning_rate": 0.001,
      "loss": 2.7513,
      "step": 50844
    },
    {
      "epoch": 9.76,
      "learning_rate": 0.001,
      "loss": 2.7395,
      "step": 50856
    },
    {
      "epoch": 9.77,
      "learning_rate": 0.001,
      "loss": 2.7481,
      "step": 50868
    },
    {
      "epoch": 9.77,
      "learning_rate": 0.001,
      "loss": 2.7432,
      "step": 50880
    },
    {
      "epoch": 9.77,
      "learning_rate": 0.001,
      "loss": 2.7414,
      "step": 50892
    },
    {
      "epoch": 9.77,
      "learning_rate": 0.001,
      "loss": 2.7488,
      "step": 50904
    },
    {
      "epoch": 9.78,
      "learning_rate": 0.001,
      "loss": 2.7486,
      "step": 50916
    },
    {
      "epoch": 9.78,
      "learning_rate": 0.001,
      "loss": 2.7469,
      "step": 50928
    },
    {
      "epoch": 9.78,
      "learning_rate": 0.001,
      "loss": 2.7547,
      "step": 50940
    },
    {
      "epoch": 9.78,
      "learning_rate": 0.001,
      "loss": 2.7426,
      "step": 50952
    },
    {
      "epoch": 9.79,
      "learning_rate": 0.001,
      "loss": 2.7471,
      "step": 50964
    },
    {
      "epoch": 9.79,
      "learning_rate": 0.001,
      "loss": 2.7456,
      "step": 50976
    },
    {
      "epoch": 9.79,
      "learning_rate": 0.001,
      "loss": 2.7451,
      "step": 50988
    },
    {
      "epoch": 9.79,
      "learning_rate": 0.001,
      "loss": 2.7454,
      "step": 51000
    },
    {
      "epoch": 9.79,
      "learning_rate": 0.001,
      "loss": 2.7453,
      "step": 51012
    },
    {
      "epoch": 9.8,
      "learning_rate": 0.001,
      "loss": 2.7555,
      "step": 51024
    },
    {
      "epoch": 9.8,
      "learning_rate": 0.001,
      "loss": 2.7391,
      "step": 51036
    },
    {
      "epoch": 9.8,
      "learning_rate": 0.001,
      "loss": 2.7417,
      "step": 51048
    },
    {
      "epoch": 9.8,
      "learning_rate": 0.001,
      "loss": 2.7468,
      "step": 51060
    },
    {
      "epoch": 9.81,
      "learning_rate": 0.001,
      "loss": 2.7449,
      "step": 51072
    },
    {
      "epoch": 9.81,
      "learning_rate": 0.001,
      "loss": 2.744,
      "step": 51084
    },
    {
      "epoch": 9.81,
      "learning_rate": 0.001,
      "loss": 2.749,
      "step": 51096
    },
    {
      "epoch": 9.81,
      "learning_rate": 0.001,
      "loss": 2.741,
      "step": 51108
    },
    {
      "epoch": 9.82,
      "learning_rate": 0.001,
      "loss": 2.7452,
      "step": 51120
    },
    {
      "epoch": 9.82,
      "learning_rate": 0.001,
      "loss": 2.7428,
      "step": 51132
    },
    {
      "epoch": 9.82,
      "learning_rate": 0.001,
      "loss": 2.7408,
      "step": 51144
    },
    {
      "epoch": 9.82,
      "learning_rate": 0.001,
      "loss": 2.7421,
      "step": 51156
    },
    {
      "epoch": 9.82,
      "learning_rate": 0.001,
      "loss": 2.7405,
      "step": 51168
    },
    {
      "epoch": 9.83,
      "learning_rate": 0.001,
      "loss": 2.7512,
      "step": 51180
    },
    {
      "epoch": 9.83,
      "learning_rate": 0.001,
      "loss": 2.7448,
      "step": 51192
    },
    {
      "epoch": 9.83,
      "learning_rate": 0.001,
      "loss": 2.7458,
      "step": 51204
    },
    {
      "epoch": 9.83,
      "learning_rate": 0.001,
      "loss": 2.7465,
      "step": 51216
    },
    {
      "epoch": 9.84,
      "learning_rate": 0.001,
      "loss": 2.7432,
      "step": 51228
    },
    {
      "epoch": 9.84,
      "learning_rate": 0.001,
      "loss": 2.7322,
      "step": 51240
    },
    {
      "epoch": 9.84,
      "eval_ag_news_accuracy": 0.30459375,
      "eval_ag_news_bleu_score": 4.3222048084947815,
      "eval_ag_news_bleu_score_sem": 0.14534535334293044,
      "eval_ag_news_emb_cos_sim": 0.7752635478973389,
      "eval_ag_news_emb_cos_sim_sem": 0.00852406576824869,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.773444890975952,
      "eval_ag_news_n_ngrams_match_1": 12.952,
      "eval_ag_news_n_ngrams_match_2": 2.626,
      "eval_ag_news_n_ngrams_match_3": 0.744,
      "eval_ag_news_num_pred_words": 46.588,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 43.52976212712324,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3192163543966634,
      "eval_ag_news_runtime": 11.1317,
      "eval_ag_news_samples_per_second": 44.917,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.32389958076784425,
      "eval_ag_news_token_set_f1_sem": 0.004484999262216496,
      "eval_ag_news_token_set_precision": 0.30534438193470376,
      "eval_ag_news_token_set_recall": 0.3600787088344293,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 51250
    },
    {
      "epoch": 9.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.10434375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.6621837360314653,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1079963812776486,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6306837797164917,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010964805570582754,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4620285034179688,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.432,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.528,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.522,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.518,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.88158286522493,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.18792841198615146,
      "eval_anthropic_toxic_prompts_runtime": 10.985,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.517,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.091,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3277939466939331,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064747906918940674,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.38528915315719225,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32018547174095824,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 51250
    },
    {
      "epoch": 9.84,
      "eval_arxiv_accuracy": 0.32771875,
      "eval_arxiv_bleu_score": 3.912059097140368,
      "eval_arxiv_bleu_score_sem": 0.11646477476420701,
      "eval_arxiv_emb_cos_sim": 0.7122216820716858,
      "eval_arxiv_emb_cos_sim_sem": 0.007527273044630805,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6349117755889893,
      "eval_arxiv_n_ngrams_match_1": 13.87,
      "eval_arxiv_n_ngrams_match_2": 2.586,
      "eval_arxiv_n_ngrams_match_3": 0.558,
      "eval_arxiv_num_pred_words": 40.31,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 37.89850917599516,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.32614259780896493,
      "eval_arxiv_runtime": 11.6154,
      "eval_arxiv_samples_per_second": 43.046,
      "eval_arxiv_steps_per_second": 0.086,
      "eval_arxiv_token_set_f1": 0.32419261198933813,
      "eval_arxiv_token_set_f1_sem": 0.003989121926248758,
      "eval_arxiv_token_set_precision": 0.2711646476675902,
      "eval_arxiv_token_set_recall": 0.42298350778555,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 51250
    },
    {
      "epoch": 9.84,
      "eval_python_code_alpaca_accuracy": 0.14734375,
      "eval_python_code_alpaca_bleu_score": 3.7333977969059293,
      "eval_python_code_alpaca_bleu_score_sem": 0.11868449068358425,
      "eval_python_code_alpaca_emb_cos_sim": 0.7127091884613037,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009569808763689435,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.1019539833068848,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.934,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.362,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.648,
      "eval_python_code_alpaca_num_pred_words": 43.612,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.24136811175948,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29718029935615586,
      "eval_python_code_alpaca_runtime": 10.7989,
      "eval_python_code_alpaca_samples_per_second": 46.301,
      "eval_python_code_alpaca_steps_per_second": 0.093,
      "eval_python_code_alpaca_token_set_f1": 0.44729144453998904,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00574181211629621,
      "eval_python_code_alpaca_token_set_precision": 0.48234530927173214,
      "eval_python_code_alpaca_token_set_recall": 0.4456196199353644,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 51250
    },
    {
      "epoch": 9.84,
      "eval_wikibio_accuracy": 0.298,
      "eval_wikibio_bleu_score": 5.330900691913495,
      "eval_wikibio_bleu_score_sem": 0.17906310250588,
      "eval_wikibio_emb_cos_sim": 0.7194701433181763,
      "eval_wikibio_emb_cos_sim_sem": 0.011347857203274013,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.049874305725098,
      "eval_wikibio_n_ngrams_match_1": 9.856,
      "eval_wikibio_n_ngrams_match_2": 3.172,
      "eval_wikibio_n_ngrams_match_3": 1.062,
      "eval_wikibio_num_pred_words": 37.386,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 57.390242967095304,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33583352303407343,
      "eval_wikibio_runtime": 10.9786,
      "eval_wikibio_samples_per_second": 45.543,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.30827291045695915,
      "eval_wikibio_token_set_f1_sem": 0.005191835126986878,
      "eval_wikibio_token_set_precision": 0.31814739759521976,
      "eval_wikibio_token_set_recall": 0.3125368036028271,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 51250
    },
    {
      "epoch": 9.84,
      "eval_nq_accuracy": 0.5,
      "eval_nq_bleu_score": 10.506865128841985,
      "eval_nq_bleu_score_sem": 0.43942487607256203,
      "eval_nq_emb_cos_sim": 0.8112521171569824,
      "eval_nq_emb_cos_sim_sem": 0.007949700370543369,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.374133825302124,
      "eval_nq_n_ngrams_match_1": 22.0,
      "eval_nq_n_ngrams_match_2": 7.784,
      "eval_nq_n_ngrams_match_3": 3.446,
      "eval_nq_num_pred_words": 49.368,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.741704962333953,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.41845153063581353,
      "eval_nq_runtime": 11.0452,
      "eval_nq_samples_per_second": 45.268,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4382135816677673,
      "eval_nq_token_set_f1_sem": 0.005028347832057513,
      "eval_nq_token_set_precision": 0.3926721666486054,
      "eval_nq_token_set_recall": 0.5058197006359141,
      "eval_nq_true_num_tokens": 64.0,
      "step": 51250
    },
    {
      "epoch": 9.84,
      "learning_rate": 0.001,
      "loss": 2.7445,
      "step": 51252
    },
    {
      "epoch": 9.84,
      "learning_rate": 0.001,
      "loss": 2.7517,
      "step": 51264
    },
    {
      "epoch": 9.85,
      "learning_rate": 0.001,
      "loss": 2.7501,
      "step": 51276
    },
    {
      "epoch": 9.85,
      "learning_rate": 0.001,
      "loss": 2.7602,
      "step": 51288
    },
    {
      "epoch": 9.85,
      "learning_rate": 0.001,
      "loss": 2.7477,
      "step": 51300
    },
    {
      "epoch": 9.85,
      "learning_rate": 0.001,
      "loss": 2.7403,
      "step": 51312
    },
    {
      "epoch": 9.85,
      "learning_rate": 0.001,
      "loss": 2.753,
      "step": 51324
    },
    {
      "epoch": 9.86,
      "learning_rate": 0.001,
      "loss": 2.751,
      "step": 51336
    },
    {
      "epoch": 9.86,
      "learning_rate": 0.001,
      "loss": 2.7526,
      "step": 51348
    },
    {
      "epoch": 9.86,
      "learning_rate": 0.001,
      "loss": 2.7449,
      "step": 51360
    },
    {
      "epoch": 9.86,
      "learning_rate": 0.001,
      "loss": 2.7452,
      "step": 51372
    },
    {
      "epoch": 9.87,
      "learning_rate": 0.001,
      "loss": 2.7451,
      "step": 51384
    },
    {
      "epoch": 9.87,
      "learning_rate": 0.001,
      "loss": 2.7486,
      "step": 51396
    },
    {
      "epoch": 9.87,
      "learning_rate": 0.001,
      "loss": 2.7473,
      "step": 51408
    },
    {
      "epoch": 9.87,
      "learning_rate": 0.001,
      "loss": 2.7471,
      "step": 51420
    },
    {
      "epoch": 9.88,
      "learning_rate": 0.001,
      "loss": 2.745,
      "step": 51432
    },
    {
      "epoch": 9.88,
      "learning_rate": 0.001,
      "loss": 2.7584,
      "step": 51444
    },
    {
      "epoch": 9.88,
      "learning_rate": 0.001,
      "loss": 2.7455,
      "step": 51456
    },
    {
      "epoch": 9.88,
      "learning_rate": 0.001,
      "loss": 2.756,
      "step": 51468
    },
    {
      "epoch": 9.88,
      "learning_rate": 0.001,
      "loss": 2.7517,
      "step": 51480
    },
    {
      "epoch": 9.89,
      "learning_rate": 0.001,
      "loss": 2.7384,
      "step": 51492
    },
    {
      "epoch": 9.89,
      "learning_rate": 0.001,
      "loss": 2.7426,
      "step": 51504
    },
    {
      "epoch": 9.89,
      "learning_rate": 0.001,
      "loss": 2.7402,
      "step": 51516
    },
    {
      "epoch": 9.89,
      "learning_rate": 0.001,
      "loss": 2.7514,
      "step": 51528
    },
    {
      "epoch": 9.9,
      "learning_rate": 0.001,
      "loss": 2.7391,
      "step": 51540
    },
    {
      "epoch": 9.9,
      "learning_rate": 0.001,
      "loss": 2.7452,
      "step": 51552
    },
    {
      "epoch": 9.9,
      "learning_rate": 0.001,
      "loss": 2.7423,
      "step": 51564
    },
    {
      "epoch": 9.9,
      "learning_rate": 0.001,
      "loss": 2.7506,
      "step": 51576
    },
    {
      "epoch": 9.91,
      "learning_rate": 0.001,
      "loss": 2.7407,
      "step": 51588
    },
    {
      "epoch": 9.91,
      "learning_rate": 0.001,
      "loss": 2.7451,
      "step": 51600
    },
    {
      "epoch": 9.91,
      "learning_rate": 0.001,
      "loss": 2.7469,
      "step": 51612
    },
    {
      "epoch": 9.91,
      "learning_rate": 0.001,
      "loss": 2.7408,
      "step": 51624
    },
    {
      "epoch": 9.91,
      "learning_rate": 0.001,
      "loss": 2.7512,
      "step": 51636
    },
    {
      "epoch": 9.92,
      "learning_rate": 0.001,
      "loss": 2.7501,
      "step": 51648
    },
    {
      "epoch": 9.92,
      "learning_rate": 0.001,
      "loss": 2.7529,
      "step": 51660
    },
    {
      "epoch": 9.92,
      "learning_rate": 0.001,
      "loss": 2.7455,
      "step": 51672
    },
    {
      "epoch": 9.92,
      "learning_rate": 0.001,
      "loss": 2.7414,
      "step": 51684
    },
    {
      "epoch": 9.93,
      "learning_rate": 0.001,
      "loss": 2.748,
      "step": 51696
    },
    {
      "epoch": 9.93,
      "learning_rate": 0.001,
      "loss": 2.7483,
      "step": 51708
    },
    {
      "epoch": 9.93,
      "learning_rate": 0.001,
      "loss": 2.7284,
      "step": 51720
    },
    {
      "epoch": 9.93,
      "learning_rate": 0.001,
      "loss": 2.7468,
      "step": 51732
    },
    {
      "epoch": 9.94,
      "learning_rate": 0.001,
      "loss": 2.7437,
      "step": 51744
    },
    {
      "epoch": 9.94,
      "learning_rate": 0.001,
      "loss": 2.7439,
      "step": 51756
    },
    {
      "epoch": 9.94,
      "learning_rate": 0.001,
      "loss": 2.7327,
      "step": 51768
    },
    {
      "epoch": 9.94,
      "learning_rate": 0.001,
      "loss": 2.7537,
      "step": 51780
    },
    {
      "epoch": 9.94,
      "learning_rate": 0.001,
      "loss": 2.7393,
      "step": 51792
    },
    {
      "epoch": 9.95,
      "learning_rate": 0.001,
      "loss": 2.7467,
      "step": 51804
    },
    {
      "epoch": 9.95,
      "learning_rate": 0.001,
      "loss": 2.7483,
      "step": 51816
    },
    {
      "epoch": 9.95,
      "learning_rate": 0.001,
      "loss": 2.7492,
      "step": 51828
    },
    {
      "epoch": 9.95,
      "learning_rate": 0.001,
      "loss": 2.7394,
      "step": 51840
    },
    {
      "epoch": 9.96,
      "learning_rate": 0.001,
      "loss": 2.7398,
      "step": 51852
    },
    {
      "epoch": 9.96,
      "learning_rate": 0.001,
      "loss": 2.7456,
      "step": 51864
    },
    {
      "epoch": 9.96,
      "eval_ag_news_accuracy": 0.30215625,
      "eval_ag_news_bleu_score": 4.271787666162469,
      "eval_ag_news_bleu_score_sem": 0.14182126944335566,
      "eval_ag_news_emb_cos_sim": 0.7758315801620483,
      "eval_ag_news_emb_cos_sim_sem": 0.008193741818678081,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7651760578155518,
      "eval_ag_news_n_ngrams_match_1": 12.88,
      "eval_ag_news_n_ngrams_match_2": 2.596,
      "eval_ag_news_n_ngrams_match_3": 0.706,
      "eval_ag_news_num_pred_words": 45.916,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 43.17130583662284,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.317430982857297,
      "eval_ag_news_runtime": 13.6479,
      "eval_ag_news_samples_per_second": 36.636,
      "eval_ag_news_steps_per_second": 0.073,
      "eval_ag_news_token_set_f1": 0.32270702357586584,
      "eval_ag_news_token_set_f1_sem": 0.004427138295392929,
      "eval_ag_news_token_set_precision": 0.30369645832459413,
      "eval_ag_news_token_set_recall": 0.36000126454866765,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 51875
    },
    {
      "epoch": 9.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.10621875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7063884695025613,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10063318095502631,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6409755945205688,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01030565520165269,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1640625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.03286167651298939,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4423282146453857,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.574,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.618,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.56,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.81,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.2596526924526,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19119575819917942,
      "eval_anthropic_toxic_prompts_runtime": 10.1386,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.317,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33219675504993484,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006387573394778598,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.39673599179717645,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3154570124504101,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 51875
    },
    {
      "epoch": 9.96,
      "eval_arxiv_accuracy": 0.325625,
      "eval_arxiv_bleu_score": 3.6680001865650946,
      "eval_arxiv_bleu_score_sem": 0.10656478426341325,
      "eval_arxiv_emb_cos_sim": 0.7077811360359192,
      "eval_arxiv_emb_cos_sim_sem": 0.00885183588221734,
      "eval_arxiv_emb_top1_equal": 0.21875,
      "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.630890130996704,
      "eval_arxiv_n_ngrams_match_1": 13.38,
      "eval_arxiv_n_ngrams_match_2": 2.44,
      "eval_arxiv_n_ngrams_match_3": 0.47,
      "eval_arxiv_num_pred_words": 38.496,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 37.7464009092179,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.32117969694031323,
      "eval_arxiv_runtime": 10.7734,
      "eval_arxiv_samples_per_second": 46.41,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.31729348974676336,
      "eval_arxiv_token_set_f1_sem": 0.00419676339136666,
      "eval_arxiv_token_set_precision": 0.2613812140416253,
      "eval_arxiv_token_set_recall": 0.42835992297909065,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 51875
    },
    {
      "epoch": 9.96,
      "eval_python_code_alpaca_accuracy": 0.146375,
      "eval_python_code_alpaca_bleu_score": 3.7233119554006526,
      "eval_python_code_alpaca_bleu_score_sem": 0.12112002098702178,
      "eval_python_code_alpaca_emb_cos_sim": 0.7056361436843872,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010300964389122329,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.119572639465332,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.638,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.206,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.636,
      "eval_python_code_alpaca_num_pred_words": 43.086,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.636703542001946,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29043748771936967,
      "eval_python_code_alpaca_runtime": 10.0391,
      "eval_python_code_alpaca_samples_per_second": 49.805,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4345619470500771,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005317728832814514,
      "eval_python_code_alpaca_token_set_precision": 0.46534873501829915,
      "eval_python_code_alpaca_token_set_recall": 0.4354722309610675,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 51875
    },
    {
      "epoch": 9.96,
      "eval_wikibio_accuracy": 0.3028125,
      "eval_wikibio_bleu_score": 5.088166038314854,
      "eval_wikibio_bleu_score_sem": 0.1935509374253117,
      "eval_wikibio_emb_cos_sim": 0.7037593126296997,
      "eval_wikibio_emb_cos_sim_sem": 0.01229855707968917,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.015735149383545,
      "eval_wikibio_n_ngrams_match_1": 9.208,
      "eval_wikibio_n_ngrams_match_2": 2.918,
      "eval_wikibio_n_ngrams_match_3": 1.006,
      "eval_wikibio_num_pred_words": 35.48,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 55.46405478422453,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3185929641889421,
      "eval_wikibio_runtime": 10.2873,
      "eval_wikibio_samples_per_second": 48.604,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.2955220152254472,
      "eval_wikibio_token_set_f1_sem": 0.0056635203579463965,
      "eval_wikibio_token_set_precision": 0.2983115466329409,
      "eval_wikibio_token_set_recall": 0.31063656795951516,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 51875
    },
    {
      "epoch": 9.96,
      "eval_nq_accuracy": 0.50109375,
      "eval_nq_bleu_score": 10.260100703238406,
      "eval_nq_bleu_score_sem": 0.4403658884810294,
      "eval_nq_emb_cos_sim": 0.8059619665145874,
      "eval_nq_emb_cos_sim_sem": 0.00778136849176889,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3714003562927246,
      "eval_nq_n_ngrams_match_1": 21.762,
      "eval_nq_n_ngrams_match_2": 7.568,
      "eval_nq_n_ngrams_match_3": 3.362,
      "eval_nq_num_pred_words": 48.766,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.712382938390657,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.41838994266378327,
      "eval_nq_runtime": 10.3401,
      "eval_nq_samples_per_second": 48.356,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4369980999074413,
      "eval_nq_token_set_f1_sem": 0.00503054054533651,
      "eval_nq_token_set_precision": 0.38943443456707744,
      "eval_nq_token_set_recall": 0.5073206099411495,
      "eval_nq_true_num_tokens": 64.0,
      "step": 51875
    },
    {
      "epoch": 9.96,
      "learning_rate": 0.001,
      "loss": 2.7398,
      "step": 51876
    },
    {
      "epoch": 9.96,
      "learning_rate": 0.001,
      "loss": 2.7386,
      "step": 51888
    },
    {
      "epoch": 9.97,
      "learning_rate": 0.001,
      "loss": 2.7403,
      "step": 51900
    },
    {
      "epoch": 9.97,
      "learning_rate": 0.001,
      "loss": 2.7472,
      "step": 51912
    },
    {
      "epoch": 9.97,
      "learning_rate": 0.001,
      "loss": 2.7395,
      "step": 51924
    },
    {
      "epoch": 9.97,
      "learning_rate": 0.001,
      "loss": 2.7367,
      "step": 51936
    },
    {
      "epoch": 9.97,
      "learning_rate": 0.001,
      "loss": 2.7407,
      "step": 51948
    },
    {
      "epoch": 9.98,
      "learning_rate": 0.001,
      "loss": 2.7453,
      "step": 51960
    },
    {
      "epoch": 9.98,
      "learning_rate": 0.001,
      "loss": 2.7436,
      "step": 51972
    },
    {
      "epoch": 9.98,
      "learning_rate": 0.001,
      "loss": 2.7393,
      "step": 51984
    },
    {
      "epoch": 9.98,
      "learning_rate": 0.001,
      "loss": 2.7339,
      "step": 51996
    },
    {
      "epoch": 9.99,
      "learning_rate": 0.001,
      "loss": 2.743,
      "step": 52008
    },
    {
      "epoch": 9.99,
      "learning_rate": 0.001,
      "loss": 2.7382,
      "step": 52020
    },
    {
      "epoch": 9.99,
      "learning_rate": 0.001,
      "loss": 2.7365,
      "step": 52032
    },
    {
      "epoch": 9.99,
      "learning_rate": 0.001,
      "loss": 2.7432,
      "step": 52044
    },
    {
      "epoch": 10.0,
      "learning_rate": 0.001,
      "loss": 2.7373,
      "step": 52056
    },
    {
      "epoch": 10.0,
      "learning_rate": 0.001,
      "loss": 2.7395,
      "step": 52068
    },
    {
      "epoch": 10.0,
      "learning_rate": 0.001,
      "loss": 2.7453,
      "step": 52080
    },
    {
      "epoch": 10.0,
      "learning_rate": 0.001,
      "loss": 2.7269,
      "step": 52092
    },
    {
      "epoch": 10.0,
      "learning_rate": 0.001,
      "loss": 2.7214,
      "step": 52104
    },
    {
      "epoch": 10.01,
      "learning_rate": 0.001,
      "loss": 2.7286,
      "step": 52116
    },
    {
      "epoch": 10.01,
      "learning_rate": 0.001,
      "loss": 2.7339,
      "step": 52128
    },
    {
      "epoch": 10.01,
      "learning_rate": 0.001,
      "loss": 2.7257,
      "step": 52140
    },
    {
      "epoch": 10.01,
      "learning_rate": 0.001,
      "loss": 2.7264,
      "step": 52152
    },
    {
      "epoch": 10.02,
      "learning_rate": 0.001,
      "loss": 2.7313,
      "step": 52164
    },
    {
      "epoch": 10.02,
      "learning_rate": 0.001,
      "loss": 2.7306,
      "step": 52176
    },
    {
      "epoch": 10.02,
      "learning_rate": 0.001,
      "loss": 2.7184,
      "step": 52188
    },
    {
      "epoch": 10.02,
      "learning_rate": 0.001,
      "loss": 2.7267,
      "step": 52200
    },
    {
      "epoch": 10.03,
      "learning_rate": 0.001,
      "loss": 2.7126,
      "step": 52212
    },
    {
      "epoch": 10.03,
      "learning_rate": 0.001,
      "loss": 2.7269,
      "step": 52224
    },
    {
      "epoch": 10.03,
      "learning_rate": 0.001,
      "loss": 2.7272,
      "step": 52236
    },
    {
      "epoch": 10.03,
      "learning_rate": 0.001,
      "loss": 2.7314,
      "step": 52248
    },
    {
      "epoch": 10.03,
      "learning_rate": 0.001,
      "loss": 2.7298,
      "step": 52260
    },
    {
      "epoch": 10.04,
      "learning_rate": 0.001,
      "loss": 2.7256,
      "step": 52272
    },
    {
      "epoch": 10.04,
      "learning_rate": 0.001,
      "loss": 2.7284,
      "step": 52284
    },
    {
      "epoch": 10.04,
      "learning_rate": 0.001,
      "loss": 2.7299,
      "step": 52296
    },
    {
      "epoch": 10.04,
      "learning_rate": 0.001,
      "loss": 2.7293,
      "step": 52308
    },
    {
      "epoch": 10.05,
      "learning_rate": 0.001,
      "loss": 2.7133,
      "step": 52320
    },
    {
      "epoch": 10.05,
      "learning_rate": 0.001,
      "loss": 2.717,
      "step": 52332
    },
    {
      "epoch": 10.05,
      "learning_rate": 0.001,
      "loss": 2.7295,
      "step": 52344
    },
    {
      "epoch": 10.05,
      "learning_rate": 0.001,
      "loss": 2.7225,
      "step": 52356
    },
    {
      "epoch": 10.06,
      "learning_rate": 0.001,
      "loss": 2.726,
      "step": 52368
    },
    {
      "epoch": 10.06,
      "learning_rate": 0.001,
      "loss": 2.7339,
      "step": 52380
    },
    {
      "epoch": 10.06,
      "learning_rate": 0.001,
      "loss": 2.7193,
      "step": 52392
    },
    {
      "epoch": 10.06,
      "learning_rate": 0.001,
      "loss": 2.7301,
      "step": 52404
    },
    {
      "epoch": 10.06,
      "learning_rate": 0.001,
      "loss": 2.7269,
      "step": 52416
    },
    {
      "epoch": 10.07,
      "learning_rate": 0.001,
      "loss": 2.7165,
      "step": 52428
    },
    {
      "epoch": 10.07,
      "learning_rate": 0.001,
      "loss": 2.7266,
      "step": 52440
    },
    {
      "epoch": 10.07,
      "learning_rate": 0.001,
      "loss": 2.7213,
      "step": 52452
    },
    {
      "epoch": 10.07,
      "learning_rate": 0.001,
      "loss": 2.7249,
      "step": 52464
    },
    {
      "epoch": 10.08,
      "learning_rate": 0.001,
      "loss": 2.7216,
      "step": 52476
    },
    {
      "epoch": 10.08,
      "learning_rate": 0.001,
      "loss": 2.7294,
      "step": 52488
    },
    {
      "epoch": 10.08,
      "learning_rate": 0.001,
      "loss": 2.7208,
      "step": 52500
    },
    {
      "epoch": 10.08,
      "eval_ag_news_accuracy": 0.30303125,
      "eval_ag_news_bleu_score": 4.228255478546149,
      "eval_ag_news_bleu_score_sem": 0.13346445109529087,
      "eval_ag_news_emb_cos_sim": 0.7734034657478333,
      "eval_ag_news_emb_cos_sim_sem": 0.00898802815482423,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7726082801818848,
      "eval_ag_news_n_ngrams_match_1": 12.894,
      "eval_ag_news_n_ngrams_match_2": 2.572,
      "eval_ag_news_n_ngrams_match_3": 0.69,
      "eval_ag_news_num_pred_words": 45.734,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 43.493359887640956,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3178587687880139,
      "eval_ag_news_runtime": 10.7644,
      "eval_ag_news_samples_per_second": 46.449,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.3262322760940814,
      "eval_ag_news_token_set_f1_sem": 0.004454255222716337,
      "eval_ag_news_token_set_precision": 0.3038468689901446,
      "eval_ag_news_token_set_recall": 0.3698536799096918,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 52500
    },
    {
      "epoch": 10.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.106125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7343252729613194,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11746437774251844,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6511576771736145,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009511287205987552,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.440284490585327,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.55,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.586,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.538,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.634,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.19583182646959,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.190566117399866,
      "eval_anthropic_toxic_prompts_runtime": 10.2776,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.65,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33482702497117933,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006264567064827086,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3930853406408358,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3228208386996512,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 52500
    },
    {
      "epoch": 10.08,
      "eval_arxiv_accuracy": 0.32690625,
      "eval_arxiv_bleu_score": 3.9603489245072288,
      "eval_arxiv_bleu_score_sem": 0.11415442334670534,
      "eval_arxiv_emb_cos_sim": 0.7231601476669312,
      "eval_arxiv_emb_cos_sim_sem": 0.007601504181570617,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6353352069854736,
      "eval_arxiv_n_ngrams_match_1": 14.048,
      "eval_arxiv_n_ngrams_match_2": 2.61,
      "eval_arxiv_n_ngrams_match_3": 0.566,
      "eval_arxiv_num_pred_words": 40.07,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 37.91455999263026,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33180693633247804,
      "eval_arxiv_runtime": 10.5025,
      "eval_arxiv_samples_per_second": 47.608,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3277091009883879,
      "eval_arxiv_token_set_f1_sem": 0.003975888031076143,
      "eval_arxiv_token_set_precision": 0.2749666758619215,
      "eval_arxiv_token_set_recall": 0.4250449361622123,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 52500
    },
    {
      "epoch": 10.08,
      "eval_python_code_alpaca_accuracy": 0.1483125,
      "eval_python_code_alpaca_bleu_score": 3.781720619656825,
      "eval_python_code_alpaca_bleu_score_sem": 0.11532019879356221,
      "eval_python_code_alpaca_emb_cos_sim": 0.7247380018234253,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008669058829660261,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0973434448242188,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.96,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.308,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.67,
      "eval_python_code_alpaca_num_pred_words": 44.014,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.139059458396538,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2968480787843321,
      "eval_python_code_alpaca_runtime": 11.1174,
      "eval_python_code_alpaca_samples_per_second": 44.974,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.44470050077138495,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005326313761115086,
      "eval_python_code_alpaca_token_set_precision": 0.48224593242693986,
      "eval_python_code_alpaca_token_set_recall": 0.43425996236665465,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 52500
    },
    {
      "epoch": 10.08,
      "eval_wikibio_accuracy": 0.30234375,
      "eval_wikibio_bleu_score": 5.503111313402614,
      "eval_wikibio_bleu_score_sem": 0.2092912315387504,
      "eval_wikibio_emb_cos_sim": 0.7226361632347107,
      "eval_wikibio_emb_cos_sim_sem": 0.009754967771731888,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.101844787597656,
      "eval_wikibio_n_ngrams_match_1": 9.926,
      "eval_wikibio_n_ngrams_match_2": 3.202,
      "eval_wikibio_n_ngrams_match_3": 1.12,
      "eval_wikibio_num_pred_words": 36.796,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 60.45170535100718,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3379599294616351,
      "eval_wikibio_runtime": 10.9875,
      "eval_wikibio_samples_per_second": 45.506,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.311808223722134,
      "eval_wikibio_token_set_f1_sem": 0.005279325026010822,
      "eval_wikibio_token_set_precision": 0.3206006788993924,
      "eval_wikibio_token_set_recall": 0.31908003551725816,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 52500
    },
    {
      "epoch": 10.08,
      "eval_nq_accuracy": 0.50184375,
      "eval_nq_bleu_score": 10.447551941160691,
      "eval_nq_bleu_score_sem": 0.45678269801432014,
      "eval_nq_emb_cos_sim": 0.8081899881362915,
      "eval_nq_emb_cos_sim_sem": 0.00814857751923161,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.367133855819702,
      "eval_nq_n_ngrams_match_1": 21.864,
      "eval_nq_n_ngrams_match_2": 7.624,
      "eval_nq_n_ngrams_match_3": 3.404,
      "eval_nq_num_pred_words": 49.164,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.666775911899004,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4188996555677107,
      "eval_nq_runtime": 10.5209,
      "eval_nq_samples_per_second": 47.525,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.441238484716742,
      "eval_nq_token_set_f1_sem": 0.004935353082230968,
      "eval_nq_token_set_precision": 0.3933015622393701,
      "eval_nq_token_set_recall": 0.5134531003887985,
      "eval_nq_true_num_tokens": 64.0,
      "step": 52500
    },
    {
      "epoch": 10.08,
      "learning_rate": 0.001,
      "loss": 2.7165,
      "step": 52512
    },
    {
      "epoch": 10.09,
      "learning_rate": 0.001,
      "loss": 2.7325,
      "step": 52524
    },
    {
      "epoch": 10.09,
      "learning_rate": 0.001,
      "loss": 2.7278,
      "step": 52536
    },
    {
      "epoch": 10.09,
      "learning_rate": 0.001,
      "loss": 2.7407,
      "step": 52548
    },
    {
      "epoch": 10.09,
      "learning_rate": 0.001,
      "loss": 2.7295,
      "step": 52560
    },
    {
      "epoch": 10.09,
      "learning_rate": 0.001,
      "loss": 2.7285,
      "step": 52572
    },
    {
      "epoch": 10.1,
      "learning_rate": 0.001,
      "loss": 2.733,
      "step": 52584
    },
    {
      "epoch": 10.1,
      "learning_rate": 0.001,
      "loss": 2.7146,
      "step": 52596
    },
    {
      "epoch": 10.1,
      "learning_rate": 0.001,
      "loss": 2.7211,
      "step": 52608
    },
    {
      "epoch": 10.1,
      "learning_rate": 0.001,
      "loss": 2.7314,
      "step": 52620
    },
    {
      "epoch": 10.11,
      "learning_rate": 0.001,
      "loss": 2.7417,
      "step": 52632
    },
    {
      "epoch": 10.11,
      "learning_rate": 0.001,
      "loss": 2.7356,
      "step": 52644
    },
    {
      "epoch": 10.11,
      "learning_rate": 0.001,
      "loss": 2.7328,
      "step": 52656
    },
    {
      "epoch": 10.11,
      "learning_rate": 0.001,
      "loss": 2.7277,
      "step": 52668
    },
    {
      "epoch": 10.12,
      "learning_rate": 0.001,
      "loss": 2.7206,
      "step": 52680
    },
    {
      "epoch": 10.12,
      "learning_rate": 0.001,
      "loss": 2.7138,
      "step": 52692
    },
    {
      "epoch": 10.12,
      "learning_rate": 0.001,
      "loss": 2.7233,
      "step": 52704
    },
    {
      "epoch": 10.12,
      "learning_rate": 0.001,
      "loss": 2.7224,
      "step": 52716
    },
    {
      "epoch": 10.12,
      "learning_rate": 0.001,
      "loss": 2.7177,
      "step": 52728
    },
    {
      "epoch": 10.13,
      "learning_rate": 0.001,
      "loss": 2.7225,
      "step": 52740
    },
    {
      "epoch": 10.13,
      "learning_rate": 0.001,
      "loss": 2.7316,
      "step": 52752
    },
    {
      "epoch": 10.13,
      "learning_rate": 0.001,
      "loss": 2.7254,
      "step": 52764
    },
    {
      "epoch": 10.13,
      "learning_rate": 0.001,
      "loss": 2.7318,
      "step": 52776
    },
    {
      "epoch": 10.14,
      "learning_rate": 0.001,
      "loss": 2.7228,
      "step": 52788
    },
    {
      "epoch": 10.14,
      "learning_rate": 0.001,
      "loss": 2.7278,
      "step": 52800
    },
    {
      "epoch": 10.14,
      "learning_rate": 0.001,
      "loss": 2.7185,
      "step": 52812
    },
    {
      "epoch": 10.14,
      "learning_rate": 0.001,
      "loss": 2.7318,
      "step": 52824
    },
    {
      "epoch": 10.15,
      "learning_rate": 0.001,
      "loss": 2.7412,
      "step": 52836
    },
    {
      "epoch": 10.15,
      "learning_rate": 0.001,
      "loss": 2.738,
      "step": 52848
    },
    {
      "epoch": 10.15,
      "learning_rate": 0.001,
      "loss": 2.7196,
      "step": 52860
    },
    {
      "epoch": 10.15,
      "learning_rate": 0.001,
      "loss": 2.7326,
      "step": 52872
    },
    {
      "epoch": 10.15,
      "learning_rate": 0.001,
      "loss": 2.7239,
      "step": 52884
    },
    {
      "epoch": 10.16,
      "learning_rate": 0.001,
      "loss": 2.7201,
      "step": 52896
    },
    {
      "epoch": 10.16,
      "learning_rate": 0.001,
      "loss": 2.717,
      "step": 52908
    },
    {
      "epoch": 10.16,
      "learning_rate": 0.001,
      "loss": 2.7335,
      "step": 52920
    },
    {
      "epoch": 10.16,
      "learning_rate": 0.001,
      "loss": 2.7274,
      "step": 52932
    },
    {
      "epoch": 10.17,
      "learning_rate": 0.001,
      "loss": 2.7241,
      "step": 52944
    },
    {
      "epoch": 10.17,
      "learning_rate": 0.001,
      "loss": 2.733,
      "step": 52956
    },
    {
      "epoch": 10.17,
      "learning_rate": 0.001,
      "loss": 2.7249,
      "step": 52968
    },
    {
      "epoch": 10.17,
      "learning_rate": 0.001,
      "loss": 2.726,
      "step": 52980
    },
    {
      "epoch": 10.18,
      "learning_rate": 0.001,
      "loss": 2.721,
      "step": 52992
    },
    {
      "epoch": 10.18,
      "learning_rate": 0.001,
      "loss": 2.7322,
      "step": 53004
    },
    {
      "epoch": 10.18,
      "learning_rate": 0.001,
      "loss": 2.726,
      "step": 53016
    },
    {
      "epoch": 10.18,
      "learning_rate": 0.001,
      "loss": 2.7226,
      "step": 53028
    },
    {
      "epoch": 10.18,
      "learning_rate": 0.001,
      "loss": 2.7308,
      "step": 53040
    },
    {
      "epoch": 10.19,
      "learning_rate": 0.001,
      "loss": 2.7189,
      "step": 53052
    },
    {
      "epoch": 10.19,
      "learning_rate": 0.001,
      "loss": 2.7168,
      "step": 53064
    },
    {
      "epoch": 10.19,
      "learning_rate": 0.001,
      "loss": 2.719,
      "step": 53076
    },
    {
      "epoch": 10.19,
      "learning_rate": 0.001,
      "loss": 2.7289,
      "step": 53088
    },
    {
      "epoch": 10.2,
      "learning_rate": 0.001,
      "loss": 2.7239,
      "step": 53100
    },
    {
      "epoch": 10.2,
      "learning_rate": 0.001,
      "loss": 2.7317,
      "step": 53112
    },
    {
      "epoch": 10.2,
      "learning_rate": 0.001,
      "loss": 2.725,
      "step": 53124
    },
    {
      "epoch": 10.2,
      "eval_ag_news_accuracy": 0.303,
      "eval_ag_news_bleu_score": 4.32066133713104,
      "eval_ag_news_bleu_score_sem": 0.14242850889569467,
      "eval_ag_news_emb_cos_sim": 0.7658836245536804,
      "eval_ag_news_emb_cos_sim_sem": 0.009419168683121912,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7735326290130615,
      "eval_ag_news_n_ngrams_match_1": 13.0,
      "eval_ag_news_n_ngrams_match_2": 2.644,
      "eval_ag_news_n_ngrams_match_3": 0.678,
      "eval_ag_news_num_pred_words": 46.192,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 43.533581510558264,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3195773277143591,
      "eval_ag_news_runtime": 11.5613,
      "eval_ag_news_samples_per_second": 43.248,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.3280599121254573,
      "eval_ag_news_token_set_f1_sem": 0.00437620625410379,
      "eval_ag_news_token_set_precision": 0.30763405403202343,
      "eval_ag_news_token_set_recall": 0.368383430379265,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 53125
    },
    {
      "epoch": 10.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.10525,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9140187609364174,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14611536976309447,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6332710981369019,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010925239557998644,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4764328002929688,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.718,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.642,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.562,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.748,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 32.344138043014894,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19679542366114136,
      "eval_anthropic_toxic_prompts_runtime": 12.0322,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.555,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.083,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33705735741638665,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006431252472007019,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4005826486388677,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32101468312490955,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 53125
    },
    {
      "epoch": 10.2,
      "eval_arxiv_accuracy": 0.3265,
      "eval_arxiv_bleu_score": 3.846087266620141,
      "eval_arxiv_bleu_score_sem": 0.10120441909318453,
      "eval_arxiv_emb_cos_sim": 0.711337685585022,
      "eval_arxiv_emb_cos_sim_sem": 0.007915942061975158,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.626399040222168,
      "eval_arxiv_n_ngrams_match_1": 13.794,
      "eval_arxiv_n_ngrams_match_2": 2.516,
      "eval_arxiv_n_ngrams_match_3": 0.528,
      "eval_arxiv_num_pred_words": 40.156,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 37.57725849758345,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.326970941475356,
      "eval_arxiv_runtime": 10.9574,
      "eval_arxiv_samples_per_second": 45.631,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.3217325130845331,
      "eval_arxiv_token_set_f1_sem": 0.003980271053771427,
      "eval_arxiv_token_set_precision": 0.2674409880644995,
      "eval_arxiv_token_set_recall": 0.4259264503258127,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 53125
    },
    {
      "epoch": 10.2,
      "eval_python_code_alpaca_accuracy": 0.14871875,
      "eval_python_code_alpaca_bleu_score": 3.6781379074422604,
      "eval_python_code_alpaca_bleu_score_sem": 0.1146349535839436,
      "eval_python_code_alpaca_emb_cos_sim": 0.7062475681304932,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010283644668562384,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.112185001373291,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.638,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.234,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.626,
      "eval_python_code_alpaca_num_pred_words": 42.462,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.47008797417813,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29047458800549775,
      "eval_python_code_alpaca_runtime": 10.1621,
      "eval_python_code_alpaca_samples_per_second": 49.203,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.442423761741765,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0059070106684365135,
      "eval_python_code_alpaca_token_set_precision": 0.46515917016020897,
      "eval_python_code_alpaca_token_set_recall": 0.44947245285610093,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 53125
    },
    {
      "epoch": 10.2,
      "eval_wikibio_accuracy": 0.30303125,
      "eval_wikibio_bleu_score": 5.358139248751093,
      "eval_wikibio_bleu_score_sem": 0.19996863304455187,
      "eval_wikibio_emb_cos_sim": 0.7063294053077698,
      "eval_wikibio_emb_cos_sim_sem": 0.011659479147362421,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.05332612991333,
      "eval_wikibio_n_ngrams_match_1": 9.318,
      "eval_wikibio_n_ngrams_match_2": 3.08,
      "eval_wikibio_n_ngrams_match_3": 1.094,
      "eval_wikibio_num_pred_words": 35.7,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 57.58868629463752,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32126469204064945,
      "eval_wikibio_runtime": 9.9068,
      "eval_wikibio_samples_per_second": 50.47,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.29740553581817,
      "eval_wikibio_token_set_f1_sem": 0.005631365512427164,
      "eval_wikibio_token_set_precision": 0.30314015268978517,
      "eval_wikibio_token_set_recall": 0.3113002605458895,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 53125
    },
    {
      "epoch": 10.2,
      "eval_nq_accuracy": 0.50228125,
      "eval_nq_bleu_score": 10.442141514660827,
      "eval_nq_bleu_score_sem": 0.43914761813133124,
      "eval_nq_emb_cos_sim": 0.8095685243606567,
      "eval_nq_emb_cos_sim_sem": 0.008172592661804217,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3626022338867188,
      "eval_nq_n_ngrams_match_1": 21.91,
      "eval_nq_n_ngrams_match_2": 7.606,
      "eval_nq_n_ngrams_match_3": 3.422,
      "eval_nq_num_pred_words": 49.21,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.618547475276303,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42038613630426913,
      "eval_nq_runtime": 11.242,
      "eval_nq_samples_per_second": 44.476,
      "eval_nq_steps_per_second": 0.089,
      "eval_nq_token_set_f1": 0.439474187141627,
      "eval_nq_token_set_f1_sem": 0.004896226509865437,
      "eval_nq_token_set_precision": 0.39184903021724926,
      "eval_nq_token_set_recall": 0.5109066236302392,
      "eval_nq_true_num_tokens": 64.0,
      "step": 53125
    },
    {
      "epoch": 10.2,
      "learning_rate": 0.001,
      "loss": 2.733,
      "step": 53136
    },
    {
      "epoch": 10.21,
      "learning_rate": 0.001,
      "loss": 2.7309,
      "step": 53148
    },
    {
      "epoch": 10.21,
      "learning_rate": 0.001,
      "loss": 2.7305,
      "step": 53160
    },
    {
      "epoch": 10.21,
      "learning_rate": 0.001,
      "loss": 2.7289,
      "step": 53172
    },
    {
      "epoch": 10.21,
      "learning_rate": 0.001,
      "loss": 2.7369,
      "step": 53184
    },
    {
      "epoch": 10.21,
      "learning_rate": 0.001,
      "loss": 2.7297,
      "step": 53196
    },
    {
      "epoch": 10.22,
      "learning_rate": 0.001,
      "loss": 2.7275,
      "step": 53208
    },
    {
      "epoch": 10.22,
      "learning_rate": 0.001,
      "loss": 2.7331,
      "step": 53220
    },
    {
      "epoch": 10.22,
      "learning_rate": 0.001,
      "loss": 2.7264,
      "step": 53232
    },
    {
      "epoch": 10.22,
      "learning_rate": 0.001,
      "loss": 2.7259,
      "step": 53244
    },
    {
      "epoch": 10.23,
      "learning_rate": 0.001,
      "loss": 2.7298,
      "step": 53256
    },
    {
      "epoch": 10.23,
      "learning_rate": 0.001,
      "loss": 2.7327,
      "step": 53268
    },
    {
      "epoch": 10.23,
      "learning_rate": 0.001,
      "loss": 2.7277,
      "step": 53280
    },
    {
      "epoch": 10.23,
      "learning_rate": 0.001,
      "loss": 2.7331,
      "step": 53292
    },
    {
      "epoch": 10.24,
      "learning_rate": 0.001,
      "loss": 2.7362,
      "step": 53304
    },
    {
      "epoch": 10.24,
      "learning_rate": 0.001,
      "loss": 2.7162,
      "step": 53316
    },
    {
      "epoch": 10.24,
      "learning_rate": 0.001,
      "loss": 2.7294,
      "step": 53328
    },
    {
      "epoch": 10.24,
      "learning_rate": 0.001,
      "loss": 2.7228,
      "step": 53340
    },
    {
      "epoch": 10.24,
      "learning_rate": 0.001,
      "loss": 2.713,
      "step": 53352
    },
    {
      "epoch": 10.25,
      "learning_rate": 0.001,
      "loss": 2.721,
      "step": 53364
    },
    {
      "epoch": 10.25,
      "learning_rate": 0.001,
      "loss": 2.7177,
      "step": 53376
    },
    {
      "epoch": 10.25,
      "learning_rate": 0.001,
      "loss": 2.725,
      "step": 53388
    },
    {
      "epoch": 10.25,
      "learning_rate": 0.001,
      "loss": 2.7309,
      "step": 53400
    },
    {
      "epoch": 10.26,
      "learning_rate": 0.001,
      "loss": 2.727,
      "step": 53412
    },
    {
      "epoch": 10.26,
      "learning_rate": 0.001,
      "loss": 2.7242,
      "step": 53424
    },
    {
      "epoch": 10.26,
      "learning_rate": 0.001,
      "loss": 2.7319,
      "step": 53436
    },
    {
      "epoch": 10.26,
      "learning_rate": 0.001,
      "loss": 2.7211,
      "step": 53448
    },
    {
      "epoch": 10.26,
      "learning_rate": 0.001,
      "loss": 2.7268,
      "step": 53460
    },
    {
      "epoch": 10.27,
      "learning_rate": 0.001,
      "loss": 2.7193,
      "step": 53472
    },
    {
      "epoch": 10.27,
      "learning_rate": 0.001,
      "loss": 2.7338,
      "step": 53484
    },
    {
      "epoch": 10.27,
      "learning_rate": 0.001,
      "loss": 2.7304,
      "step": 53496
    },
    {
      "epoch": 10.27,
      "learning_rate": 0.001,
      "loss": 2.7238,
      "step": 53508
    },
    {
      "epoch": 10.28,
      "learning_rate": 0.001,
      "loss": 2.7245,
      "step": 53520
    },
    {
      "epoch": 10.28,
      "learning_rate": 0.001,
      "loss": 2.7304,
      "step": 53532
    },
    {
      "epoch": 10.28,
      "learning_rate": 0.001,
      "loss": 2.7279,
      "step": 53544
    },
    {
      "epoch": 10.28,
      "learning_rate": 0.001,
      "loss": 2.7422,
      "step": 53556
    },
    {
      "epoch": 10.29,
      "learning_rate": 0.001,
      "loss": 2.7271,
      "step": 53568
    },
    {
      "epoch": 10.29,
      "learning_rate": 0.001,
      "loss": 2.7232,
      "step": 53580
    },
    {
      "epoch": 10.29,
      "learning_rate": 0.001,
      "loss": 2.7283,
      "step": 53592
    },
    {
      "epoch": 10.29,
      "learning_rate": 0.001,
      "loss": 2.7264,
      "step": 53604
    },
    {
      "epoch": 10.29,
      "learning_rate": 0.001,
      "loss": 2.7294,
      "step": 53616
    },
    {
      "epoch": 10.3,
      "learning_rate": 0.001,
      "loss": 2.733,
      "step": 53628
    },
    {
      "epoch": 10.3,
      "learning_rate": 0.001,
      "loss": 2.737,
      "step": 53640
    },
    {
      "epoch": 10.3,
      "learning_rate": 0.001,
      "loss": 2.7252,
      "step": 53652
    },
    {
      "epoch": 10.3,
      "learning_rate": 0.001,
      "loss": 2.7191,
      "step": 53664
    },
    {
      "epoch": 10.31,
      "learning_rate": 0.001,
      "loss": 2.734,
      "step": 53676
    },
    {
      "epoch": 10.31,
      "learning_rate": 0.001,
      "loss": 2.7352,
      "step": 53688
    },
    {
      "epoch": 10.31,
      "learning_rate": 0.001,
      "loss": 2.7268,
      "step": 53700
    },
    {
      "epoch": 10.31,
      "learning_rate": 0.001,
      "loss": 2.7347,
      "step": 53712
    },
    {
      "epoch": 10.32,
      "learning_rate": 0.001,
      "loss": 2.7352,
      "step": 53724
    },
    {
      "epoch": 10.32,
      "learning_rate": 0.001,
      "loss": 2.7311,
      "step": 53736
    },
    {
      "epoch": 10.32,
      "learning_rate": 0.001,
      "loss": 2.7269,
      "step": 53748
    },
    {
      "epoch": 10.32,
      "eval_ag_news_accuracy": 0.30275,
      "eval_ag_news_bleu_score": 4.421029722095021,
      "eval_ag_news_bleu_score_sem": 0.14740592281223455,
      "eval_ag_news_emb_cos_sim": 0.7629153728485107,
      "eval_ag_news_emb_cos_sim_sem": 0.009400203918381027,
      "eval_ag_news_emb_top1_equal": 0.171875,
      "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7627270221710205,
      "eval_ag_news_n_ngrams_match_1": 12.954,
      "eval_ag_news_n_ngrams_match_2": 2.662,
      "eval_ag_news_n_ngrams_match_3": 0.742,
      "eval_ag_news_num_pred_words": 46.282,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 43.06570713008593,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.31879234611824636,
      "eval_ag_news_runtime": 10.8897,
      "eval_ag_news_samples_per_second": 45.915,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.32501083971866607,
      "eval_ag_news_token_set_f1_sem": 0.004351318499970537,
      "eval_ag_news_token_set_precision": 0.30575492319276987,
      "eval_ag_news_token_set_recall": 0.3632878636514925,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 53750
    },
    {
      "epoch": 10.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.10525,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7905245158869563,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12349844543442566,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6437270641326904,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009195974119931902,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.449458122253418,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.514,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.586,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.514,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.612,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.48332757106558,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19282892270001412,
      "eval_anthropic_toxic_prompts_runtime": 9.8181,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.926,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.32824551154878184,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006253782341111984,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.39086054907139633,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3122785192835911,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 53750
    },
    {
      "epoch": 10.32,
      "eval_arxiv_accuracy": 0.32775,
      "eval_arxiv_bleu_score": 3.889417166811716,
      "eval_arxiv_bleu_score_sem": 0.11068272603824485,
      "eval_arxiv_emb_cos_sim": 0.7147364616394043,
      "eval_arxiv_emb_cos_sim_sem": 0.0077809727065195925,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6153247356414795,
      "eval_arxiv_n_ngrams_match_1": 13.902,
      "eval_arxiv_n_ngrams_match_2": 2.578,
      "eval_arxiv_n_ngrams_match_3": 0.516,
      "eval_arxiv_num_pred_words": 41.028,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 37.16341225087441,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3262191461958489,
      "eval_arxiv_runtime": 14.3532,
      "eval_arxiv_samples_per_second": 34.836,
      "eval_arxiv_steps_per_second": 0.07,
      "eval_arxiv_token_set_f1": 0.3220345568752402,
      "eval_arxiv_token_set_f1_sem": 0.004156493584493791,
      "eval_arxiv_token_set_precision": 0.2710933886626981,
      "eval_arxiv_token_set_recall": 0.4130692034550771,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 53750
    },
    {
      "epoch": 10.32,
      "eval_python_code_alpaca_accuracy": 0.1473125,
      "eval_python_code_alpaca_bleu_score": 3.819787974852944,
      "eval_python_code_alpaca_bleu_score_sem": 0.11353915389351618,
      "eval_python_code_alpaca_emb_cos_sim": 0.7258760929107666,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00836633052915216,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.1069111824035645,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.964,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.438,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.712,
      "eval_python_code_alpaca_num_pred_words": 45.472,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.351896731516554,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29383534490871266,
      "eval_python_code_alpaca_runtime": 10.9168,
      "eval_python_code_alpaca_samples_per_second": 45.801,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.449207088132276,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0051724742569146664,
      "eval_python_code_alpaca_token_set_precision": 0.4855544528671801,
      "eval_python_code_alpaca_token_set_recall": 0.44060734647652167,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 53750
    },
    {
      "epoch": 10.32,
      "eval_wikibio_accuracy": 0.30290625,
      "eval_wikibio_bleu_score": 5.383839361078247,
      "eval_wikibio_bleu_score_sem": 0.17734073960355903,
      "eval_wikibio_emb_cos_sim": 0.7250243425369263,
      "eval_wikibio_emb_cos_sim_sem": 0.00935694412965164,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.004042148590088,
      "eval_wikibio_n_ngrams_match_1": 9.812,
      "eval_wikibio_n_ngrams_match_2": 3.104,
      "eval_wikibio_n_ngrams_match_3": 1.02,
      "eval_wikibio_num_pred_words": 36.934,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 54.8192905085521,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3401770317967601,
      "eval_wikibio_runtime": 9.6245,
      "eval_wikibio_samples_per_second": 51.951,
      "eval_wikibio_steps_per_second": 0.104,
      "eval_wikibio_token_set_f1": 0.31065285304992185,
      "eval_wikibio_token_set_f1_sem": 0.005027158760670689,
      "eval_wikibio_token_set_precision": 0.31826837412991577,
      "eval_wikibio_token_set_recall": 0.31702885307558276,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 53750
    },
    {
      "epoch": 10.32,
      "eval_nq_accuracy": 0.50265625,
      "eval_nq_bleu_score": 10.488527768048552,
      "eval_nq_bleu_score_sem": 0.44455737401582057,
      "eval_nq_emb_cos_sim": 0.8087708950042725,
      "eval_nq_emb_cos_sim_sem": 0.007787056340684336,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.359281539916992,
      "eval_nq_n_ngrams_match_1": 21.896,
      "eval_nq_n_ngrams_match_2": 7.718,
      "eval_nq_n_ngrams_match_3": 3.472,
      "eval_nq_num_pred_words": 49.138,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.583345009364187,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.41968451307728977,
      "eval_nq_runtime": 10.5456,
      "eval_nq_samples_per_second": 47.413,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4398026509375775,
      "eval_nq_token_set_f1_sem": 0.004946425865949454,
      "eval_nq_token_set_precision": 0.3917341367311386,
      "eval_nq_token_set_recall": 0.5138229726091218,
      "eval_nq_true_num_tokens": 64.0,
      "step": 53750
    },
    {
      "epoch": 10.32,
      "learning_rate": 0.001,
      "loss": 2.7475,
      "step": 53760
    },
    {
      "epoch": 10.32,
      "learning_rate": 0.001,
      "loss": 2.7249,
      "step": 53772
    },
    {
      "epoch": 10.33,
      "learning_rate": 0.001,
      "loss": 2.7186,
      "step": 53784
    },
    {
      "epoch": 10.33,
      "learning_rate": 0.001,
      "loss": 2.7221,
      "step": 53796
    },
    {
      "epoch": 10.33,
      "learning_rate": 0.001,
      "loss": 2.726,
      "step": 53808
    },
    {
      "epoch": 10.33,
      "learning_rate": 0.001,
      "loss": 2.7207,
      "step": 53820
    },
    {
      "epoch": 10.34,
      "learning_rate": 0.001,
      "loss": 2.7253,
      "step": 53832
    },
    {
      "epoch": 10.34,
      "learning_rate": 0.001,
      "loss": 2.7173,
      "step": 53844
    },
    {
      "epoch": 10.34,
      "learning_rate": 0.001,
      "loss": 2.7344,
      "step": 53856
    },
    {
      "epoch": 10.34,
      "learning_rate": 0.001,
      "loss": 2.7242,
      "step": 53868
    },
    {
      "epoch": 10.35,
      "learning_rate": 0.001,
      "loss": 2.7199,
      "step": 53880
    },
    {
      "epoch": 10.35,
      "learning_rate": 0.001,
      "loss": 2.7364,
      "step": 53892
    },
    {
      "epoch": 10.35,
      "learning_rate": 0.001,
      "loss": 2.7114,
      "step": 53904
    },
    {
      "epoch": 10.35,
      "learning_rate": 0.001,
      "loss": 2.7308,
      "step": 53916
    },
    {
      "epoch": 10.35,
      "learning_rate": 0.001,
      "loss": 2.7228,
      "step": 53928
    },
    {
      "epoch": 10.36,
      "learning_rate": 0.001,
      "loss": 2.7207,
      "step": 53940
    },
    {
      "epoch": 10.36,
      "learning_rate": 0.001,
      "loss": 2.7316,
      "step": 53952
    },
    {
      "epoch": 10.36,
      "learning_rate": 0.001,
      "loss": 2.7201,
      "step": 53964
    },
    {
      "epoch": 10.36,
      "learning_rate": 0.001,
      "loss": 2.731,
      "step": 53976
    },
    {
      "epoch": 10.37,
      "learning_rate": 0.001,
      "loss": 2.7243,
      "step": 53988
    },
    {
      "epoch": 10.37,
      "learning_rate": 0.001,
      "loss": 2.7281,
      "step": 54000
    },
    {
      "epoch": 10.37,
      "learning_rate": 0.001,
      "loss": 2.7357,
      "step": 54012
    },
    {
      "epoch": 10.37,
      "learning_rate": 0.001,
      "loss": 2.7329,
      "step": 54024
    },
    {
      "epoch": 10.38,
      "learning_rate": 0.001,
      "loss": 2.7318,
      "step": 54036
    },
    {
      "epoch": 10.38,
      "learning_rate": 0.001,
      "loss": 2.7176,
      "step": 54048
    },
    {
      "epoch": 10.38,
      "learning_rate": 0.001,
      "loss": 2.73,
      "step": 54060
    },
    {
      "epoch": 10.38,
      "learning_rate": 0.001,
      "loss": 2.7264,
      "step": 54072
    },
    {
      "epoch": 10.38,
      "learning_rate": 0.001,
      "loss": 2.7296,
      "step": 54084
    },
    {
      "epoch": 10.39,
      "learning_rate": 0.001,
      "loss": 2.7206,
      "step": 54096
    },
    {
      "epoch": 10.39,
      "learning_rate": 0.001,
      "loss": 2.7371,
      "step": 54108
    },
    {
      "epoch": 10.39,
      "learning_rate": 0.001,
      "loss": 2.7239,
      "step": 54120
    },
    {
      "epoch": 10.39,
      "learning_rate": 0.001,
      "loss": 2.7242,
      "step": 54132
    },
    {
      "epoch": 10.4,
      "learning_rate": 0.001,
      "loss": 2.7245,
      "step": 54144
    },
    {
      "epoch": 10.4,
      "learning_rate": 0.001,
      "loss": 2.7278,
      "step": 54156
    },
    {
      "epoch": 10.4,
      "learning_rate": 0.001,
      "loss": 2.7218,
      "step": 54168
    },
    {
      "epoch": 10.4,
      "learning_rate": 0.001,
      "loss": 2.7105,
      "step": 54180
    },
    {
      "epoch": 10.41,
      "learning_rate": 0.001,
      "loss": 2.7216,
      "step": 54192
    },
    {
      "epoch": 10.41,
      "learning_rate": 0.001,
      "loss": 2.7307,
      "step": 54204
    },
    {
      "epoch": 10.41,
      "learning_rate": 0.001,
      "loss": 2.7292,
      "step": 54216
    },
    {
      "epoch": 10.41,
      "learning_rate": 0.001,
      "loss": 2.7262,
      "step": 54228
    },
    {
      "epoch": 10.41,
      "learning_rate": 0.001,
      "loss": 2.7245,
      "step": 54240
    },
    {
      "epoch": 10.42,
      "learning_rate": 0.001,
      "loss": 2.7194,
      "step": 54252
    },
    {
      "epoch": 10.42,
      "learning_rate": 0.001,
      "loss": 2.7265,
      "step": 54264
    },
    {
      "epoch": 10.42,
      "learning_rate": 0.001,
      "loss": 2.7261,
      "step": 54276
    },
    {
      "epoch": 10.42,
      "learning_rate": 0.001,
      "loss": 2.7255,
      "step": 54288
    },
    {
      "epoch": 10.43,
      "learning_rate": 0.001,
      "loss": 2.727,
      "step": 54300
    },
    {
      "epoch": 10.43,
      "learning_rate": 0.001,
      "loss": 2.7382,
      "step": 54312
    },
    {
      "epoch": 10.43,
      "learning_rate": 0.001,
      "loss": 2.7307,
      "step": 54324
    },
    {
      "epoch": 10.43,
      "learning_rate": 0.001,
      "loss": 2.7222,
      "step": 54336
    },
    {
      "epoch": 10.44,
      "learning_rate": 0.001,
      "loss": 2.7217,
      "step": 54348
    },
    {
      "epoch": 10.44,
      "learning_rate": 0.001,
      "loss": 2.7219,
      "step": 54360
    },
    {
      "epoch": 10.44,
      "learning_rate": 0.001,
      "loss": 2.7282,
      "step": 54372
    },
    {
      "epoch": 10.44,
      "eval_ag_news_accuracy": 0.30525,
      "eval_ag_news_bleu_score": 4.287151852265607,
      "eval_ag_news_bleu_score_sem": 0.13770535603089282,
      "eval_ag_news_emb_cos_sim": 0.7802896499633789,
      "eval_ag_news_emb_cos_sim_sem": 0.007051109145411361,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7535603046417236,
      "eval_ag_news_n_ngrams_match_1": 13.038,
      "eval_ag_news_n_ngrams_match_2": 2.666,
      "eval_ag_news_n_ngrams_match_3": 0.716,
      "eval_ag_news_num_pred_words": 46.402,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 42.672739819499704,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3200070490912197,
      "eval_ag_news_runtime": 14.716,
      "eval_ag_news_samples_per_second": 33.977,
      "eval_ag_news_steps_per_second": 0.068,
      "eval_ag_news_token_set_f1": 0.32769701431828646,
      "eval_ag_news_token_set_f1_sem": 0.004279940614437022,
      "eval_ag_news_token_set_precision": 0.30821433798282394,
      "eval_ag_news_token_set_recall": 0.36442300756783574,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 54375
    },
    {
      "epoch": 10.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.10690625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.765943346689468,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10928871291275943,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6335175037384033,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010171045411498483,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4407944679260254,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.62,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.602,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.554,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.668,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.211745051172443,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19533282020513973,
      "eval_anthropic_toxic_prompts_runtime": 16.5998,
      "eval_anthropic_toxic_prompts_samples_per_second": 30.121,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.06,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3347748405465396,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066718463553948355,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3944160218442546,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3202924111931561,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 54375
    },
    {
      "epoch": 10.44,
      "eval_arxiv_accuracy": 0.32890625,
      "eval_arxiv_bleu_score": 3.7970648527332895,
      "eval_arxiv_bleu_score_sem": 0.11049710479893754,
      "eval_arxiv_emb_cos_sim": 0.7082982659339905,
      "eval_arxiv_emb_cos_sim_sem": 0.00937611040901197,
      "eval_arxiv_emb_top1_equal": 0.203125,
      "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.605374574661255,
      "eval_arxiv_n_ngrams_match_1": 13.652,
      "eval_arxiv_n_ngrams_match_2": 2.482,
      "eval_arxiv_n_ngrams_match_3": 0.508,
      "eval_arxiv_num_pred_words": 39.622,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 36.79546392468632,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3226470446286839,
      "eval_arxiv_runtime": 14.4784,
      "eval_arxiv_samples_per_second": 34.534,
      "eval_arxiv_steps_per_second": 0.069,
      "eval_arxiv_token_set_f1": 0.3201853071894984,
      "eval_arxiv_token_set_f1_sem": 0.004213754791068077,
      "eval_arxiv_token_set_precision": 0.2668160522356607,
      "eval_arxiv_token_set_recall": 0.42337221545985365,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 54375
    },
    {
      "epoch": 10.44,
      "eval_python_code_alpaca_accuracy": 0.1485625,
      "eval_python_code_alpaca_bleu_score": 3.941212779791558,
      "eval_python_code_alpaca_bleu_score_sem": 0.129502782958863,
      "eval_python_code_alpaca_emb_cos_sim": 0.7207451462745667,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008943700206902896,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.109691858291626,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.948,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.36,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.69,
      "eval_python_code_alpaca_num_pred_words": 43.292,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.414136606165638,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29916473399058563,
      "eval_python_code_alpaca_runtime": 19.5106,
      "eval_python_code_alpaca_samples_per_second": 25.627,
      "eval_python_code_alpaca_steps_per_second": 0.051,
      "eval_python_code_alpaca_token_set_f1": 0.4485665602284746,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005507560376558197,
      "eval_python_code_alpaca_token_set_precision": 0.4817700228563396,
      "eval_python_code_alpaca_token_set_recall": 0.44614202823697596,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 54375
    },
    {
      "epoch": 10.44,
      "eval_wikibio_accuracy": 0.30446875,
      "eval_wikibio_bleu_score": 5.420102411962581,
      "eval_wikibio_bleu_score_sem": 0.19665541423216823,
      "eval_wikibio_emb_cos_sim": 0.6960967183113098,
      "eval_wikibio_emb_cos_sim_sem": 0.011985603558605623,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.005090236663818,
      "eval_wikibio_n_ngrams_match_1": 9.61,
      "eval_wikibio_n_ngrams_match_2": 3.126,
      "eval_wikibio_n_ngrams_match_3": 1.116,
      "eval_wikibio_num_pred_words": 36.826,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 54.87677607284938,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3240339619092947,
      "eval_wikibio_runtime": 13.1997,
      "eval_wikibio_samples_per_second": 37.88,
      "eval_wikibio_steps_per_second": 0.076,
      "eval_wikibio_token_set_f1": 0.30322508384429797,
      "eval_wikibio_token_set_f1_sem": 0.005553227425495123,
      "eval_wikibio_token_set_precision": 0.3115620569302409,
      "eval_wikibio_token_set_recall": 0.3123327368097843,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 54375
    },
    {
      "epoch": 10.44,
      "eval_nq_accuracy": 0.50334375,
      "eval_nq_bleu_score": 10.3531879718149,
      "eval_nq_bleu_score_sem": 0.43948659727354084,
      "eval_nq_emb_cos_sim": 0.8138612508773804,
      "eval_nq_emb_cos_sim_sem": 0.007374245740221845,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3566269874572754,
      "eval_nq_n_ngrams_match_1": 21.62,
      "eval_nq_n_ngrams_match_2": 7.582,
      "eval_nq_n_ngrams_match_3": 3.396,
      "eval_nq_num_pred_words": 48.902,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.555288220422122,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4159296706956379,
      "eval_nq_runtime": 16.7176,
      "eval_nq_samples_per_second": 29.909,
      "eval_nq_steps_per_second": 0.06,
      "eval_nq_token_set_f1": 0.43214432415741205,
      "eval_nq_token_set_f1_sem": 0.005086063762030811,
      "eval_nq_token_set_precision": 0.38538407778102635,
      "eval_nq_token_set_recall": 0.5030186085206476,
      "eval_nq_true_num_tokens": 64.0,
      "step": 54375
    },
    {
      "epoch": 10.44,
      "learning_rate": 0.001,
      "loss": 2.7361,
      "step": 54384
    },
    {
      "epoch": 10.44,
      "learning_rate": 0.001,
      "loss": 2.7173,
      "step": 54396
    },
    {
      "epoch": 10.45,
      "learning_rate": 0.001,
      "loss": 2.7239,
      "step": 54408
    },
    {
      "epoch": 10.45,
      "learning_rate": 0.001,
      "loss": 2.7308,
      "step": 54420
    },
    {
      "epoch": 10.45,
      "learning_rate": 0.001,
      "loss": 2.7387,
      "step": 54432
    },
    {
      "epoch": 10.45,
      "learning_rate": 0.001,
      "loss": 2.7256,
      "step": 54444
    },
    {
      "epoch": 10.46,
      "learning_rate": 0.001,
      "loss": 2.7304,
      "step": 54456
    },
    {
      "epoch": 10.46,
      "learning_rate": 0.001,
      "loss": 2.7151,
      "step": 54468
    },
    {
      "epoch": 10.46,
      "learning_rate": 0.001,
      "loss": 2.7263,
      "step": 54480
    },
    {
      "epoch": 10.46,
      "learning_rate": 0.001,
      "loss": 2.7198,
      "step": 54492
    },
    {
      "epoch": 10.47,
      "learning_rate": 0.001,
      "loss": 2.7233,
      "step": 54504
    },
    {
      "epoch": 10.47,
      "learning_rate": 0.001,
      "loss": 2.7281,
      "step": 54516
    },
    {
      "epoch": 10.47,
      "learning_rate": 0.001,
      "loss": 2.7307,
      "step": 54528
    },
    {
      "epoch": 10.47,
      "learning_rate": 0.001,
      "loss": 2.7301,
      "step": 54540
    },
    {
      "epoch": 10.47,
      "learning_rate": 0.001,
      "loss": 2.7309,
      "step": 54552
    },
    {
      "epoch": 10.48,
      "learning_rate": 0.001,
      "loss": 2.7339,
      "step": 54564
    },
    {
      "epoch": 10.48,
      "learning_rate": 0.001,
      "loss": 2.7172,
      "step": 54576
    },
    {
      "epoch": 10.48,
      "learning_rate": 0.001,
      "loss": 2.7176,
      "step": 54588
    },
    {
      "epoch": 10.48,
      "learning_rate": 0.001,
      "loss": 2.7293,
      "step": 54600
    },
    {
      "epoch": 10.49,
      "learning_rate": 0.001,
      "loss": 2.7239,
      "step": 54612
    },
    {
      "epoch": 10.49,
      "learning_rate": 0.001,
      "loss": 2.7284,
      "step": 54624
    },
    {
      "epoch": 10.49,
      "learning_rate": 0.001,
      "loss": 2.7347,
      "step": 54636
    },
    {
      "epoch": 10.49,
      "learning_rate": 0.001,
      "loss": 2.7322,
      "step": 54648
    },
    {
      "epoch": 10.5,
      "learning_rate": 0.001,
      "loss": 2.7157,
      "step": 54660
    },
    {
      "epoch": 10.5,
      "learning_rate": 0.001,
      "loss": 2.7291,
      "step": 54672
    },
    {
      "epoch": 10.5,
      "learning_rate": 0.001,
      "loss": 2.7272,
      "step": 54684
    },
    {
      "epoch": 10.5,
      "learning_rate": 0.001,
      "loss": 2.7334,
      "step": 54696
    },
    {
      "epoch": 10.5,
      "learning_rate": 0.001,
      "loss": 2.7293,
      "step": 54708
    },
    {
      "epoch": 10.51,
      "learning_rate": 0.001,
      "loss": 2.7238,
      "step": 54720
    },
    {
      "epoch": 10.51,
      "learning_rate": 0.001,
      "loss": 2.7248,
      "step": 54732
    },
    {
      "epoch": 10.51,
      "learning_rate": 0.001,
      "loss": 2.721,
      "step": 54744
    },
    {
      "epoch": 10.51,
      "learning_rate": 0.001,
      "loss": 2.7241,
      "step": 54756
    },
    {
      "epoch": 10.52,
      "learning_rate": 0.001,
      "loss": 2.7341,
      "step": 54768
    },
    {
      "epoch": 10.52,
      "learning_rate": 0.001,
      "loss": 2.7279,
      "step": 54780
    },
    {
      "epoch": 10.52,
      "learning_rate": 0.001,
      "loss": 2.7219,
      "step": 54792
    },
    {
      "epoch": 10.52,
      "learning_rate": 0.001,
      "loss": 2.7162,
      "step": 54804
    },
    {
      "epoch": 10.53,
      "learning_rate": 0.001,
      "loss": 2.7217,
      "step": 54816
    },
    {
      "epoch": 10.53,
      "learning_rate": 0.001,
      "loss": 2.7176,
      "step": 54828
    },
    {
      "epoch": 10.53,
      "learning_rate": 0.001,
      "loss": 2.7223,
      "step": 54840
    },
    {
      "epoch": 10.53,
      "learning_rate": 0.001,
      "loss": 2.7306,
      "step": 54852
    },
    {
      "epoch": 10.53,
      "learning_rate": 0.001,
      "loss": 2.7219,
      "step": 54864
    },
    {
      "epoch": 10.54,
      "learning_rate": 0.001,
      "loss": 2.7307,
      "step": 54876
    },
    {
      "epoch": 10.54,
      "learning_rate": 0.001,
      "loss": 2.7334,
      "step": 54888
    },
    {
      "epoch": 10.54,
      "learning_rate": 0.001,
      "loss": 2.7158,
      "step": 54900
    },
    {
      "epoch": 10.54,
      "learning_rate": 0.001,
      "loss": 2.7202,
      "step": 54912
    },
    {
      "epoch": 10.55,
      "learning_rate": 0.001,
      "loss": 2.7263,
      "step": 54924
    },
    {
      "epoch": 10.55,
      "learning_rate": 0.001,
      "loss": 2.73,
      "step": 54936
    },
    {
      "epoch": 10.55,
      "learning_rate": 0.001,
      "loss": 2.7268,
      "step": 54948
    },
    {
      "epoch": 10.55,
      "learning_rate": 0.001,
      "loss": 2.712,
      "step": 54960
    },
    {
      "epoch": 10.56,
      "learning_rate": 0.001,
      "loss": 2.719,
      "step": 54972
    },
    {
      "epoch": 10.56,
      "learning_rate": 0.001,
      "loss": 2.7191,
      "step": 54984
    },
    {
      "epoch": 10.56,
      "learning_rate": 0.001,
      "loss": 2.718,
      "step": 54996
    },
    {
      "epoch": 10.56,
      "eval_ag_news_accuracy": 0.304375,
      "eval_ag_news_bleu_score": 4.258325095410446,
      "eval_ag_news_bleu_score_sem": 0.14707397261193333,
      "eval_ag_news_emb_cos_sim": 0.7857528924942017,
      "eval_ag_news_emb_cos_sim_sem": 0.007713095206992798,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7430331707000732,
      "eval_ag_news_n_ngrams_match_1": 13.09,
      "eval_ag_news_n_ngrams_match_2": 2.69,
      "eval_ag_news_n_ngrams_match_3": 0.662,
      "eval_ag_news_num_pred_words": 45.962,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 42.22587440461575,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.32498122167146914,
      "eval_ag_news_runtime": 11.4594,
      "eval_ag_news_samples_per_second": 43.632,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.33116753479249383,
      "eval_ag_news_token_set_f1_sem": 0.004402606812542982,
      "eval_ag_news_token_set_precision": 0.3101122491239984,
      "eval_ag_news_token_set_recall": 0.3707862362369787,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 55000
    },
    {
      "epoch": 10.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.10628125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7190780770164444,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10927482698393612,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6370939016342163,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009760940770419292,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.445617198944092,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.614,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.642,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.59,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.814,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.362634459215002,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19396980189909335,
      "eval_anthropic_toxic_prompts_runtime": 12.5556,
      "eval_anthropic_toxic_prompts_samples_per_second": 39.823,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.08,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33535288439937794,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006498362012588391,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.39267813613992103,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3235501114804039,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 55000
    },
    {
      "epoch": 10.56,
      "eval_arxiv_accuracy": 0.33071875,
      "eval_arxiv_bleu_score": 4.0040843338745225,
      "eval_arxiv_bleu_score_sem": 0.12065461584433716,
      "eval_arxiv_emb_cos_sim": 0.7195819616317749,
      "eval_arxiv_emb_cos_sim_sem": 0.007894215361567793,
      "eval_arxiv_emb_top1_equal": 0.21875,
      "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.59499454498291,
      "eval_arxiv_n_ngrams_match_1": 13.928,
      "eval_arxiv_n_ngrams_match_2": 2.64,
      "eval_arxiv_n_ngrams_match_3": 0.578,
      "eval_arxiv_num_pred_words": 40.232,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 36.415501340155096,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3283346123239741,
      "eval_arxiv_runtime": 10.2033,
      "eval_arxiv_samples_per_second": 49.004,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.32425936970857777,
      "eval_arxiv_token_set_f1_sem": 0.004268208409380536,
      "eval_arxiv_token_set_precision": 0.27274057095881565,
      "eval_arxiv_token_set_recall": 0.4169761827083265,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 55000
    },
    {
      "epoch": 10.56,
      "eval_python_code_alpaca_accuracy": 0.1479375,
      "eval_python_code_alpaca_bleu_score": 3.9383014092663715,
      "eval_python_code_alpaca_bleu_score_sem": 0.12581489261409792,
      "eval_python_code_alpaca_emb_cos_sim": 0.7387979626655579,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008078756654861758,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0820183753967285,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.204,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.49,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.712,
      "eval_python_code_alpaca_num_pred_words": 44.498,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.80236337021571,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30338226927866524,
      "eval_python_code_alpaca_runtime": 10.1643,
      "eval_python_code_alpaca_samples_per_second": 49.192,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.45299241303695986,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005447650335026252,
      "eval_python_code_alpaca_token_set_precision": 0.5004375215868001,
      "eval_python_code_alpaca_token_set_recall": 0.4384441876125228,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 55000
    },
    {
      "epoch": 10.56,
      "eval_wikibio_accuracy": 0.30415625,
      "eval_wikibio_bleu_score": 5.539772442940585,
      "eval_wikibio_bleu_score_sem": 0.2109495197485314,
      "eval_wikibio_emb_cos_sim": 0.6955208778381348,
      "eval_wikibio_emb_cos_sim_sem": 0.0115707692508701,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.973893165588379,
      "eval_wikibio_n_ngrams_match_1": 9.76,
      "eval_wikibio_n_ngrams_match_2": 3.158,
      "eval_wikibio_n_ngrams_match_3": 1.14,
      "eval_wikibio_num_pred_words": 36.896,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 53.19121044978153,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3320980570092723,
      "eval_wikibio_runtime": 10.3085,
      "eval_wikibio_samples_per_second": 48.504,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3054922226778108,
      "eval_wikibio_token_set_f1_sem": 0.005688636508994517,
      "eval_wikibio_token_set_precision": 0.3161374752024517,
      "eval_wikibio_token_set_recall": 0.3111209058044092,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 55000
    },
    {
      "epoch": 10.56,
      "eval_nq_accuracy": 0.50303125,
      "eval_nq_bleu_score": 10.267043155676992,
      "eval_nq_bleu_score_sem": 0.4405446619182153,
      "eval_nq_emb_cos_sim": 0.8072665929794312,
      "eval_nq_emb_cos_sim_sem": 0.007750441924585005,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3541324138641357,
      "eval_nq_n_ngrams_match_1": 21.936,
      "eval_nq_n_ngrams_match_2": 7.596,
      "eval_nq_n_ngrams_match_3": 3.342,
      "eval_nq_num_pred_words": 48.97,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.528990092105248,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.41986497596618677,
      "eval_nq_runtime": 14.1104,
      "eval_nq_samples_per_second": 35.435,
      "eval_nq_steps_per_second": 0.071,
      "eval_nq_token_set_f1": 0.43854430931609373,
      "eval_nq_token_set_f1_sem": 0.00489290361061998,
      "eval_nq_token_set_precision": 0.39277717501204584,
      "eval_nq_token_set_recall": 0.5078532499681914,
      "eval_nq_true_num_tokens": 64.0,
      "step": 55000
    },
    {
      "epoch": 10.56,
      "learning_rate": 0.001,
      "loss": 2.7156,
      "step": 55008
    },
    {
      "epoch": 10.56,
      "learning_rate": 0.001,
      "loss": 2.7245,
      "step": 55020
    },
    {
      "epoch": 10.57,
      "learning_rate": 0.001,
      "loss": 2.7322,
      "step": 55032
    },
    {
      "epoch": 10.57,
      "learning_rate": 0.001,
      "loss": 2.7225,
      "step": 55044
    },
    {
      "epoch": 10.57,
      "learning_rate": 0.001,
      "loss": 2.7176,
      "step": 55056
    },
    {
      "epoch": 10.57,
      "learning_rate": 0.001,
      "loss": 2.7093,
      "step": 55068
    },
    {
      "epoch": 10.58,
      "learning_rate": 0.001,
      "loss": 2.7231,
      "step": 55080
    },
    {
      "epoch": 10.58,
      "learning_rate": 0.001,
      "loss": 2.7254,
      "step": 55092
    },
    {
      "epoch": 10.58,
      "learning_rate": 0.001,
      "loss": 2.7353,
      "step": 55104
    },
    {
      "epoch": 10.58,
      "learning_rate": 0.001,
      "loss": 2.7335,
      "step": 55116
    },
    {
      "epoch": 10.59,
      "learning_rate": 0.001,
      "loss": 2.7253,
      "step": 55128
    },
    {
      "epoch": 10.59,
      "learning_rate": 0.001,
      "loss": 2.7211,
      "step": 55140
    },
    {
      "epoch": 10.59,
      "learning_rate": 0.001,
      "loss": 2.7252,
      "step": 55152
    },
    {
      "epoch": 10.59,
      "learning_rate": 0.001,
      "loss": 2.729,
      "step": 55164
    },
    {
      "epoch": 10.59,
      "learning_rate": 0.001,
      "loss": 2.7186,
      "step": 55176
    },
    {
      "epoch": 10.6,
      "learning_rate": 0.001,
      "loss": 2.7206,
      "step": 55188
    },
    {
      "epoch": 10.6,
      "learning_rate": 0.001,
      "loss": 2.7251,
      "step": 55200
    },
    {
      "epoch": 10.6,
      "learning_rate": 0.001,
      "loss": 2.7271,
      "step": 55212
    },
    {
      "epoch": 10.6,
      "learning_rate": 0.001,
      "loss": 2.7165,
      "step": 55224
    },
    {
      "epoch": 10.61,
      "learning_rate": 0.001,
      "loss": 2.7217,
      "step": 55236
    },
    {
      "epoch": 10.61,
      "learning_rate": 0.001,
      "loss": 2.7112,
      "step": 55248
    },
    {
      "epoch": 10.61,
      "learning_rate": 0.001,
      "loss": 2.7165,
      "step": 55260
    },
    {
      "epoch": 10.61,
      "learning_rate": 0.001,
      "loss": 2.7359,
      "step": 55272
    },
    {
      "epoch": 10.62,
      "learning_rate": 0.001,
      "loss": 2.7309,
      "step": 55284
    },
    {
      "epoch": 10.62,
      "learning_rate": 0.001,
      "loss": 2.7206,
      "step": 55296
    },
    {
      "epoch": 10.62,
      "learning_rate": 0.001,
      "loss": 2.7322,
      "step": 55308
    },
    {
      "epoch": 10.62,
      "learning_rate": 0.001,
      "loss": 2.7243,
      "step": 55320
    },
    {
      "epoch": 10.62,
      "learning_rate": 0.001,
      "loss": 2.7265,
      "step": 55332
    },
    {
      "epoch": 10.63,
      "learning_rate": 0.001,
      "loss": 2.7333,
      "step": 55344
    },
    {
      "epoch": 10.63,
      "learning_rate": 0.001,
      "loss": 2.7298,
      "step": 55356
    },
    {
      "epoch": 10.63,
      "learning_rate": 0.001,
      "loss": 2.7149,
      "step": 55368
    },
    {
      "epoch": 10.63,
      "learning_rate": 0.001,
      "loss": 2.7209,
      "step": 55380
    },
    {
      "epoch": 10.64,
      "learning_rate": 0.001,
      "loss": 2.7233,
      "step": 55392
    },
    {
      "epoch": 10.64,
      "learning_rate": 0.001,
      "loss": 2.7243,
      "step": 55404
    },
    {
      "epoch": 10.64,
      "learning_rate": 0.001,
      "loss": 2.7217,
      "step": 55416
    },
    {
      "epoch": 10.64,
      "learning_rate": 0.001,
      "loss": 2.7306,
      "step": 55428
    },
    {
      "epoch": 10.65,
      "learning_rate": 0.001,
      "loss": 2.722,
      "step": 55440
    },
    {
      "epoch": 10.65,
      "learning_rate": 0.001,
      "loss": 2.7347,
      "step": 55452
    },
    {
      "epoch": 10.65,
      "learning_rate": 0.001,
      "loss": 2.7195,
      "step": 55464
    },
    {
      "epoch": 10.65,
      "learning_rate": 0.001,
      "loss": 2.7126,
      "step": 55476
    },
    {
      "epoch": 10.65,
      "learning_rate": 0.001,
      "loss": 2.7276,
      "step": 55488
    },
    {
      "epoch": 10.66,
      "learning_rate": 0.001,
      "loss": 2.7262,
      "step": 55500
    },
    {
      "epoch": 10.66,
      "learning_rate": 0.001,
      "loss": 2.7273,
      "step": 55512
    },
    {
      "epoch": 10.66,
      "learning_rate": 0.001,
      "loss": 2.7221,
      "step": 55524
    },
    {
      "epoch": 10.66,
      "learning_rate": 0.001,
      "loss": 2.7267,
      "step": 55536
    },
    {
      "epoch": 10.67,
      "learning_rate": 0.001,
      "loss": 2.7218,
      "step": 55548
    },
    {
      "epoch": 10.67,
      "learning_rate": 0.001,
      "loss": 2.7223,
      "step": 55560
    },
    {
      "epoch": 10.67,
      "learning_rate": 0.001,
      "loss": 2.7192,
      "step": 55572
    },
    {
      "epoch": 10.67,
      "learning_rate": 0.001,
      "loss": 2.7308,
      "step": 55584
    },
    {
      "epoch": 10.68,
      "learning_rate": 0.001,
      "loss": 2.7208,
      "step": 55596
    },
    {
      "epoch": 10.68,
      "learning_rate": 0.001,
      "loss": 2.7272,
      "step": 55608
    },
    {
      "epoch": 10.68,
      "learning_rate": 0.001,
      "loss": 2.7349,
      "step": 55620
    },
    {
      "epoch": 10.68,
      "eval_ag_news_accuracy": 0.3035,
      "eval_ag_news_bleu_score": 4.3693944548121415,
      "eval_ag_news_bleu_score_sem": 0.14171892856740806,
      "eval_ag_news_emb_cos_sim": 0.7833855748176575,
      "eval_ag_news_emb_cos_sim_sem": 0.007463489716310139,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.756091833114624,
      "eval_ag_news_n_ngrams_match_1": 13.03,
      "eval_ag_news_n_ngrams_match_2": 2.69,
      "eval_ag_news_n_ngrams_match_3": 0.75,
      "eval_ag_news_num_pred_words": 46.122,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 42.78090392786411,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3218732411680624,
      "eval_ag_news_runtime": 10.4582,
      "eval_ag_news_samples_per_second": 47.81,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3292143717381226,
      "eval_ag_news_token_set_f1_sem": 0.0043415927175779944,
      "eval_ag_news_token_set_precision": 0.3086045254028964,
      "eval_ag_news_token_set_recall": 0.37102524345139065,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 55625
    },
    {
      "epoch": 10.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.1054375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.674264644140458,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10298624139985543,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6428861618041992,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009882464622655376,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4474520683288574,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.62,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.556,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.51,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.36,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.420233624321988,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19516646839807889,
      "eval_anthropic_toxic_prompts_runtime": 10.081,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.598,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3326299573257753,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006372127303267976,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.39679224681863035,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3165468159861881,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 55625
    },
    {
      "epoch": 10.68,
      "eval_arxiv_accuracy": 0.33090625,
      "eval_arxiv_bleu_score": 3.8295488650601968,
      "eval_arxiv_bleu_score_sem": 0.12001471560226433,
      "eval_arxiv_emb_cos_sim": 0.718084454536438,
      "eval_arxiv_emb_cos_sim_sem": 0.008743845119620302,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5993494987487793,
      "eval_arxiv_n_ngrams_match_1": 13.798,
      "eval_arxiv_n_ngrams_match_2": 2.55,
      "eval_arxiv_n_ngrams_match_3": 0.512,
      "eval_arxiv_num_pred_words": 39.836,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 36.574434988006985,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3248562342422566,
      "eval_arxiv_runtime": 10.6632,
      "eval_arxiv_samples_per_second": 46.89,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.3224257945683061,
      "eval_arxiv_token_set_f1_sem": 0.004321349759941008,
      "eval_arxiv_token_set_precision": 0.27216607546232435,
      "eval_arxiv_token_set_recall": 0.4180924995094326,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 55625
    },
    {
      "epoch": 10.68,
      "eval_python_code_alpaca_accuracy": 0.14796875,
      "eval_python_code_alpaca_bleu_score": 3.8705052380856286,
      "eval_python_code_alpaca_bleu_score_sem": 0.12261524029647448,
      "eval_python_code_alpaca_emb_cos_sim": 0.7215318083763123,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009852892681524412,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.088245153427124,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.904,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.404,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.682,
      "eval_python_code_alpaca_num_pred_words": 43.458,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.93854539506779,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29954159178261763,
      "eval_python_code_alpaca_runtime": 15.8376,
      "eval_python_code_alpaca_samples_per_second": 31.57,
      "eval_python_code_alpaca_steps_per_second": 0.063,
      "eval_python_code_alpaca_token_set_f1": 0.4425443412975813,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005565341465966108,
      "eval_python_code_alpaca_token_set_precision": 0.4834871944462926,
      "eval_python_code_alpaca_token_set_recall": 0.4343382582916853,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 55625
    },
    {
      "epoch": 10.68,
      "eval_wikibio_accuracy": 0.3059375,
      "eval_wikibio_bleu_score": 5.2086348097576884,
      "eval_wikibio_bleu_score_sem": 0.19022722361901956,
      "eval_wikibio_emb_cos_sim": 0.6870605945587158,
      "eval_wikibio_emb_cos_sim_sem": 0.01231822253578655,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.011824131011963,
      "eval_wikibio_n_ngrams_match_1": 9.4,
      "eval_wikibio_n_ngrams_match_2": 3.014,
      "eval_wikibio_n_ngrams_match_3": 1.0,
      "eval_wikibio_num_pred_words": 35.434,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 55.247557485420494,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32254578911541076,
      "eval_wikibio_runtime": 10.0644,
      "eval_wikibio_samples_per_second": 49.68,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.2984314801837055,
      "eval_wikibio_token_set_f1_sem": 0.005711400051016728,
      "eval_wikibio_token_set_precision": 0.3020520303055395,
      "eval_wikibio_token_set_recall": 0.31569661931425963,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 55625
    },
    {
      "epoch": 10.68,
      "eval_nq_accuracy": 0.5031875,
      "eval_nq_bleu_score": 10.163381954740693,
      "eval_nq_bleu_score_sem": 0.4329978045610566,
      "eval_nq_emb_cos_sim": 0.8052500486373901,
      "eval_nq_emb_cos_sim_sem": 0.008247454025838168,
      "eval_nq_emb_top1_equal": 0.2109375,
      "eval_nq_emb_top1_equal_sem": 0.03620184850179216,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.355693817138672,
      "eval_nq_n_ngrams_match_1": 21.638,
      "eval_nq_n_ngrams_match_2": 7.586,
      "eval_nq_n_ngrams_match_3": 3.298,
      "eval_nq_num_pred_words": 48.234,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.545442933129905,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4174892567607175,
      "eval_nq_runtime": 10.1407,
      "eval_nq_samples_per_second": 49.306,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.43396102757839755,
      "eval_nq_token_set_f1_sem": 0.005101646300458602,
      "eval_nq_token_set_precision": 0.3870328492241951,
      "eval_nq_token_set_recall": 0.506937030946592,
      "eval_nq_true_num_tokens": 64.0,
      "step": 55625
    },
    {
      "epoch": 10.68,
      "learning_rate": 0.001,
      "loss": 2.7341,
      "step": 55632
    },
    {
      "epoch": 10.68,
      "learning_rate": 0.001,
      "loss": 2.7296,
      "step": 55644
    },
    {
      "epoch": 10.69,
      "learning_rate": 0.001,
      "loss": 2.7253,
      "step": 55656
    },
    {
      "epoch": 10.69,
      "learning_rate": 0.001,
      "loss": 2.7245,
      "step": 55668
    },
    {
      "epoch": 10.69,
      "learning_rate": 0.001,
      "loss": 2.7164,
      "step": 55680
    },
    {
      "epoch": 10.69,
      "learning_rate": 0.001,
      "loss": 2.7168,
      "step": 55692
    },
    {
      "epoch": 10.7,
      "learning_rate": 0.001,
      "loss": 2.7266,
      "step": 55704
    },
    {
      "epoch": 10.7,
      "learning_rate": 0.001,
      "loss": 2.7259,
      "step": 55716
    },
    {
      "epoch": 10.7,
      "learning_rate": 0.001,
      "loss": 2.7299,
      "step": 55728
    },
    {
      "epoch": 10.7,
      "learning_rate": 0.001,
      "loss": 2.7171,
      "step": 55740
    },
    {
      "epoch": 10.71,
      "learning_rate": 0.001,
      "loss": 2.7239,
      "step": 55752
    },
    {
      "epoch": 10.71,
      "learning_rate": 0.001,
      "loss": 2.7133,
      "step": 55764
    },
    {
      "epoch": 10.71,
      "learning_rate": 0.001,
      "loss": 2.7238,
      "step": 55776
    },
    {
      "epoch": 10.71,
      "learning_rate": 0.001,
      "loss": 2.722,
      "step": 55788
    },
    {
      "epoch": 10.71,
      "learning_rate": 0.001,
      "loss": 2.731,
      "step": 55800
    },
    {
      "epoch": 10.72,
      "learning_rate": 0.001,
      "loss": 2.7345,
      "step": 55812
    },
    {
      "epoch": 10.72,
      "learning_rate": 0.001,
      "loss": 2.7314,
      "step": 55824
    },
    {
      "epoch": 10.72,
      "learning_rate": 0.001,
      "loss": 2.7299,
      "step": 55836
    },
    {
      "epoch": 10.72,
      "learning_rate": 0.001,
      "loss": 2.7236,
      "step": 55848
    },
    {
      "epoch": 10.73,
      "learning_rate": 0.001,
      "loss": 2.7177,
      "step": 55860
    },
    {
      "epoch": 10.73,
      "learning_rate": 0.001,
      "loss": 2.712,
      "step": 55872
    },
    {
      "epoch": 10.73,
      "learning_rate": 0.001,
      "loss": 2.7253,
      "step": 55884
    },
    {
      "epoch": 10.73,
      "learning_rate": 0.001,
      "loss": 2.725,
      "step": 55896
    },
    {
      "epoch": 10.74,
      "learning_rate": 0.001,
      "loss": 2.7234,
      "step": 55908
    },
    {
      "epoch": 10.74,
      "learning_rate": 0.001,
      "loss": 2.7313,
      "step": 55920
    },
    {
      "epoch": 10.74,
      "learning_rate": 0.001,
      "loss": 2.7208,
      "step": 55932
    },
    {
      "epoch": 10.74,
      "learning_rate": 0.001,
      "loss": 2.7232,
      "step": 55944
    },
    {
      "epoch": 10.74,
      "learning_rate": 0.001,
      "loss": 2.727,
      "step": 55956
    },
    {
      "epoch": 10.75,
      "learning_rate": 0.001,
      "loss": 2.7235,
      "step": 55968
    },
    {
      "epoch": 10.75,
      "learning_rate": 0.001,
      "loss": 2.7381,
      "step": 55980
    },
    {
      "epoch": 10.75,
      "learning_rate": 0.001,
      "loss": 2.7286,
      "step": 55992
    },
    {
      "epoch": 10.75,
      "learning_rate": 0.001,
      "loss": 2.731,
      "step": 56004
    },
    {
      "epoch": 10.76,
      "learning_rate": 0.001,
      "loss": 2.7321,
      "step": 56016
    },
    {
      "epoch": 10.76,
      "learning_rate": 0.001,
      "loss": 2.7298,
      "step": 56028
    },
    {
      "epoch": 10.76,
      "learning_rate": 0.001,
      "loss": 2.7273,
      "step": 56040
    },
    {
      "epoch": 10.76,
      "learning_rate": 0.001,
      "loss": 2.7356,
      "step": 56052
    },
    {
      "epoch": 10.76,
      "learning_rate": 0.001,
      "loss": 2.7198,
      "step": 56064
    },
    {
      "epoch": 10.77,
      "learning_rate": 0.001,
      "loss": 2.7349,
      "step": 56076
    },
    {
      "epoch": 10.77,
      "learning_rate": 0.001,
      "loss": 2.722,
      "step": 56088
    },
    {
      "epoch": 10.77,
      "learning_rate": 0.001,
      "loss": 2.7354,
      "step": 56100
    },
    {
      "epoch": 10.77,
      "learning_rate": 0.001,
      "loss": 2.7343,
      "step": 56112
    },
    {
      "epoch": 10.78,
      "learning_rate": 0.001,
      "loss": 2.7287,
      "step": 56124
    },
    {
      "epoch": 10.78,
      "learning_rate": 0.001,
      "loss": 2.7372,
      "step": 56136
    },
    {
      "epoch": 10.78,
      "learning_rate": 0.001,
      "loss": 2.7405,
      "step": 56148
    },
    {
      "epoch": 10.78,
      "learning_rate": 0.001,
      "loss": 2.7315,
      "step": 56160
    },
    {
      "epoch": 10.79,
      "learning_rate": 0.001,
      "loss": 2.728,
      "step": 56172
    },
    {
      "epoch": 10.79,
      "learning_rate": 0.001,
      "loss": 2.7276,
      "step": 56184
    },
    {
      "epoch": 10.79,
      "learning_rate": 0.001,
      "loss": 2.7176,
      "step": 56196
    },
    {
      "epoch": 10.79,
      "learning_rate": 0.001,
      "loss": 2.7259,
      "step": 56208
    },
    {
      "epoch": 10.79,
      "learning_rate": 0.001,
      "loss": 2.7232,
      "step": 56220
    },
    {
      "epoch": 10.8,
      "learning_rate": 0.001,
      "loss": 2.722,
      "step": 56232
    },
    {
      "epoch": 10.8,
      "learning_rate": 0.001,
      "loss": 2.7261,
      "step": 56244
    },
    {
      "epoch": 10.8,
      "eval_ag_news_accuracy": 0.30584375,
      "eval_ag_news_bleu_score": 4.252377613985345,
      "eval_ag_news_bleu_score_sem": 0.13956869896846102,
      "eval_ag_news_emb_cos_sim": 0.7772521376609802,
      "eval_ag_news_emb_cos_sim_sem": 0.008338865271902232,
      "eval_ag_news_emb_top1_equal": 0.171875,
      "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.742692232131958,
      "eval_ag_news_n_ngrams_match_1": 12.956,
      "eval_ag_news_n_ngrams_match_2": 2.622,
      "eval_ag_news_n_ngrams_match_3": 0.696,
      "eval_ag_news_num_pred_words": 45.932,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 42.211480429328915,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.31743734148452624,
      "eval_ag_news_runtime": 10.443,
      "eval_ag_news_samples_per_second": 47.879,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3285705078935257,
      "eval_ag_news_token_set_f1_sem": 0.004423649195070215,
      "eval_ag_news_token_set_precision": 0.30705897796641934,
      "eval_ag_news_token_set_recall": 0.37212458409033633,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 56250
    },
    {
      "epoch": 10.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.10653125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7686883625140473,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11485083455155894,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6245629191398621,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01097283125250154,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4250752925872803,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.71,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.612,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.574,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.024,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 30.724958126006992,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19806136476059152,
      "eval_anthropic_toxic_prompts_runtime": 9.9973,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.014,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3381603542203713,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006567382204123827,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3974096944412263,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3256008528496704,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 56250
    },
    {
      "epoch": 10.8,
      "eval_arxiv_accuracy": 0.32865625,
      "eval_arxiv_bleu_score": 3.9154094006971207,
      "eval_arxiv_bleu_score_sem": 0.11494489811529976,
      "eval_arxiv_emb_cos_sim": 0.7228298783302307,
      "eval_arxiv_emb_cos_sim_sem": 0.009944919008158932,
      "eval_arxiv_emb_top1_equal": 0.21875,
      "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5995633602142334,
      "eval_arxiv_n_ngrams_match_1": 13.916,
      "eval_arxiv_n_ngrams_match_2": 2.596,
      "eval_arxiv_n_ngrams_match_3": 0.546,
      "eval_arxiv_num_pred_words": 40.058,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 36.58225768672877,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3291134608642131,
      "eval_arxiv_runtime": 9.8718,
      "eval_arxiv_samples_per_second": 50.649,
      "eval_arxiv_steps_per_second": 0.101,
      "eval_arxiv_token_set_f1": 0.32720644769478746,
      "eval_arxiv_token_set_f1_sem": 0.004287680412745275,
      "eval_arxiv_token_set_precision": 0.2743324296845597,
      "eval_arxiv_token_set_recall": 0.42559563997043814,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 56250
    },
    {
      "epoch": 10.8,
      "eval_python_code_alpaca_accuracy": 0.14953125,
      "eval_python_code_alpaca_bleu_score": 4.038940794383992,
      "eval_python_code_alpaca_bleu_score_sem": 0.12327250121728657,
      "eval_python_code_alpaca_emb_cos_sim": 0.7331154346466064,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009374375802777944,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.073585033416748,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.072,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.45,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.734,
      "eval_python_code_alpaca_num_pred_words": 43.66,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.619269714801074,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30501259465143193,
      "eval_python_code_alpaca_runtime": 10.0658,
      "eval_python_code_alpaca_samples_per_second": 49.673,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.45239319200333894,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005190135137561469,
      "eval_python_code_alpaca_token_set_precision": 0.4918878848712137,
      "eval_python_code_alpaca_token_set_recall": 0.44430162301796994,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 56250
    },
    {
      "epoch": 10.8,
      "eval_wikibio_accuracy": 0.305375,
      "eval_wikibio_bleu_score": 5.394592970302806,
      "eval_wikibio_bleu_score_sem": 0.19926590456356802,
      "eval_wikibio_emb_cos_sim": 0.7010586261749268,
      "eval_wikibio_emb_cos_sim_sem": 0.012143735900040465,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.971113681793213,
      "eval_wikibio_n_ngrams_match_1": 9.522,
      "eval_wikibio_n_ngrams_match_2": 3.1,
      "eval_wikibio_n_ngrams_match_3": 1.086,
      "eval_wikibio_num_pred_words": 36.708,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 53.04357161721144,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3291868135137792,
      "eval_wikibio_runtime": 10.798,
      "eval_wikibio_samples_per_second": 46.305,
      "eval_wikibio_steps_per_second": 0.093,
      "eval_wikibio_token_set_f1": 0.3020976043901764,
      "eval_wikibio_token_set_f1_sem": 0.005576644476868077,
      "eval_wikibio_token_set_precision": 0.3091776447519551,
      "eval_wikibio_token_set_recall": 0.31366625996005754,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 56250
    },
    {
      "epoch": 10.8,
      "eval_nq_accuracy": 0.505375,
      "eval_nq_bleu_score": 10.466972194667258,
      "eval_nq_bleu_score_sem": 0.45966750115476035,
      "eval_nq_emb_cos_sim": 0.8111673593521118,
      "eval_nq_emb_cos_sim_sem": 0.008248548526810435,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3507819175720215,
      "eval_nq_n_ngrams_match_1": 21.984,
      "eval_nq_n_ngrams_match_2": 7.676,
      "eval_nq_n_ngrams_match_3": 3.434,
      "eval_nq_num_pred_words": 48.81,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.493771782196534,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42138196083301094,
      "eval_nq_runtime": 10.5808,
      "eval_nq_samples_per_second": 47.255,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.440894338645274,
      "eval_nq_token_set_f1_sem": 0.004996597314153814,
      "eval_nq_token_set_precision": 0.3932978113058087,
      "eval_nq_token_set_recall": 0.5121360078769879,
      "eval_nq_true_num_tokens": 64.0,
      "step": 56250
    },
    {
      "epoch": 10.8,
      "learning_rate": 0.001,
      "loss": 2.712,
      "step": 56256
    },
    {
      "epoch": 10.8,
      "learning_rate": 0.001,
      "loss": 2.7287,
      "step": 56268
    },
    {
      "epoch": 10.81,
      "learning_rate": 0.001,
      "loss": 2.7187,
      "step": 56280
    },
    {
      "epoch": 10.81,
      "learning_rate": 0.001,
      "loss": 2.7205,
      "step": 56292
    },
    {
      "epoch": 10.81,
      "learning_rate": 0.001,
      "loss": 2.7198,
      "step": 56304
    },
    {
      "epoch": 10.81,
      "learning_rate": 0.001,
      "loss": 2.7209,
      "step": 56316
    },
    {
      "epoch": 10.82,
      "learning_rate": 0.001,
      "loss": 2.7162,
      "step": 56328
    },
    {
      "epoch": 10.82,
      "learning_rate": 0.001,
      "loss": 2.7213,
      "step": 56340
    },
    {
      "epoch": 10.82,
      "learning_rate": 0.001,
      "loss": 2.7219,
      "step": 56352
    },
    {
      "epoch": 10.82,
      "learning_rate": 0.001,
      "loss": 2.7208,
      "step": 56364
    },
    {
      "epoch": 10.82,
      "learning_rate": 0.001,
      "loss": 2.7237,
      "step": 56376
    },
    {
      "epoch": 10.83,
      "learning_rate": 0.001,
      "loss": 2.7161,
      "step": 56388
    },
    {
      "epoch": 10.83,
      "learning_rate": 0.001,
      "loss": 2.7152,
      "step": 56400
    },
    {
      "epoch": 10.83,
      "learning_rate": 0.001,
      "loss": 2.7306,
      "step": 56412
    },
    {
      "epoch": 10.83,
      "learning_rate": 0.001,
      "loss": 2.7218,
      "step": 56424
    },
    {
      "epoch": 10.84,
      "learning_rate": 0.001,
      "loss": 2.7287,
      "step": 56436
    },
    {
      "epoch": 10.84,
      "learning_rate": 0.001,
      "loss": 2.719,
      "step": 56448
    },
    {
      "epoch": 10.84,
      "learning_rate": 0.001,
      "loss": 2.726,
      "step": 56460
    },
    {
      "epoch": 10.84,
      "learning_rate": 0.001,
      "loss": 2.7154,
      "step": 56472
    },
    {
      "epoch": 10.85,
      "learning_rate": 0.001,
      "loss": 2.7255,
      "step": 56484
    },
    {
      "epoch": 10.85,
      "learning_rate": 0.001,
      "loss": 2.7163,
      "step": 56496
    },
    {
      "epoch": 10.85,
      "learning_rate": 0.001,
      "loss": 2.7176,
      "step": 56508
    },
    {
      "epoch": 10.85,
      "learning_rate": 0.001,
      "loss": 2.7217,
      "step": 56520
    },
    {
      "epoch": 10.85,
      "learning_rate": 0.001,
      "loss": 2.7195,
      "step": 56532
    },
    {
      "epoch": 10.86,
      "learning_rate": 0.001,
      "loss": 2.7172,
      "step": 56544
    },
    {
      "epoch": 10.86,
      "learning_rate": 0.001,
      "loss": 2.7215,
      "step": 56556
    },
    {
      "epoch": 10.86,
      "learning_rate": 0.001,
      "loss": 2.7276,
      "step": 56568
    },
    {
      "epoch": 10.86,
      "learning_rate": 0.001,
      "loss": 2.7087,
      "step": 56580
    },
    {
      "epoch": 10.87,
      "learning_rate": 0.001,
      "loss": 2.7078,
      "step": 56592
    },
    {
      "epoch": 10.87,
      "learning_rate": 0.001,
      "loss": 2.7211,
      "step": 56604
    },
    {
      "epoch": 10.87,
      "learning_rate": 0.001,
      "loss": 2.7201,
      "step": 56616
    },
    {
      "epoch": 10.87,
      "learning_rate": 0.001,
      "loss": 2.7147,
      "step": 56628
    },
    {
      "epoch": 10.88,
      "learning_rate": 0.001,
      "loss": 2.7234,
      "step": 56640
    },
    {
      "epoch": 10.88,
      "learning_rate": 0.001,
      "loss": 2.7252,
      "step": 56652
    },
    {
      "epoch": 10.88,
      "learning_rate": 0.001,
      "loss": 2.7179,
      "step": 56664
    },
    {
      "epoch": 10.88,
      "learning_rate": 0.001,
      "loss": 2.708,
      "step": 56676
    },
    {
      "epoch": 10.88,
      "learning_rate": 0.001,
      "loss": 2.7263,
      "step": 56688
    },
    {
      "epoch": 10.89,
      "learning_rate": 0.001,
      "loss": 2.7226,
      "step": 56700
    },
    {
      "epoch": 10.89,
      "learning_rate": 0.001,
      "loss": 2.7319,
      "step": 56712
    },
    {
      "epoch": 10.89,
      "learning_rate": 0.001,
      "loss": 2.7253,
      "step": 56724
    },
    {
      "epoch": 10.89,
      "learning_rate": 0.001,
      "loss": 2.7224,
      "step": 56736
    },
    {
      "epoch": 10.9,
      "learning_rate": 0.001,
      "loss": 2.7117,
      "step": 56748
    },
    {
      "epoch": 10.9,
      "learning_rate": 0.001,
      "loss": 2.7121,
      "step": 56760
    },
    {
      "epoch": 10.9,
      "learning_rate": 0.001,
      "loss": 2.7125,
      "step": 56772
    },
    {
      "epoch": 10.9,
      "learning_rate": 0.001,
      "loss": 2.7142,
      "step": 56784
    },
    {
      "epoch": 10.91,
      "learning_rate": 0.001,
      "loss": 2.7152,
      "step": 56796
    },
    {
      "epoch": 10.91,
      "learning_rate": 0.001,
      "loss": 2.7201,
      "step": 56808
    },
    {
      "epoch": 10.91,
      "learning_rate": 0.001,
      "loss": 2.711,
      "step": 56820
    },
    {
      "epoch": 10.91,
      "learning_rate": 0.001,
      "loss": 2.7276,
      "step": 56832
    },
    {
      "epoch": 10.91,
      "learning_rate": 0.001,
      "loss": 2.7235,
      "step": 56844
    },
    {
      "epoch": 10.92,
      "learning_rate": 0.001,
      "loss": 2.7286,
      "step": 56856
    },
    {
      "epoch": 10.92,
      "learning_rate": 0.001,
      "loss": 2.7176,
      "step": 56868
    },
    {
      "epoch": 10.92,
      "eval_ag_news_accuracy": 0.3065,
      "eval_ag_news_bleu_score": 4.4303056189521905,
      "eval_ag_news_bleu_score_sem": 0.14546669489836722,
      "eval_ag_news_emb_cos_sim": 0.7756615281105042,
      "eval_ag_news_emb_cos_sim_sem": 0.008071350663324985,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.758354425430298,
      "eval_ag_news_n_ngrams_match_1": 12.968,
      "eval_ag_news_n_ngrams_match_2": 2.738,
      "eval_ag_news_n_ngrams_match_3": 0.734,
      "eval_ag_news_num_pred_words": 45.898,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 42.87780925963757,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.32139990658253703,
      "eval_ag_news_runtime": 10.9105,
      "eval_ag_news_samples_per_second": 45.827,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.33007117375531914,
      "eval_ag_news_token_set_f1_sem": 0.004439771625943391,
      "eval_ag_news_token_set_precision": 0.3064925052940842,
      "eval_ag_news_token_set_recall": 0.3724746515241469,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 56875
    },
    {
      "epoch": 10.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.1065,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7096081530635847,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10405677912805264,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6226505041122437,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010911427245488214,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4365551471710205,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.654,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.646,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.564,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.57,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.0797085229193,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19296408130567488,
      "eval_anthropic_toxic_prompts_runtime": 9.5619,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.291,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.105,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3352031452142971,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006503343690353681,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.39232298518835124,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.320762978128836,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 56875
    },
    {
      "epoch": 10.92,
      "eval_arxiv_accuracy": 0.32825,
      "eval_arxiv_bleu_score": 3.8825158148039156,
      "eval_arxiv_bleu_score_sem": 0.11672451938898767,
      "eval_arxiv_emb_cos_sim": 0.7262111306190491,
      "eval_arxiv_emb_cos_sim_sem": 0.0079256305687766,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6267576217651367,
      "eval_arxiv_n_ngrams_match_1": 14.048,
      "eval_arxiv_n_ngrams_match_2": 2.69,
      "eval_arxiv_n_ngrams_match_3": 0.528,
      "eval_arxiv_num_pred_words": 41.036,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 37.590735425060366,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3298736000502477,
      "eval_arxiv_runtime": 10.3192,
      "eval_arxiv_samples_per_second": 48.453,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3298888878062372,
      "eval_arxiv_token_set_f1_sem": 0.004159619696913173,
      "eval_arxiv_token_set_precision": 0.27735738660891907,
      "eval_arxiv_token_set_recall": 0.42730287245228055,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 56875
    },
    {
      "epoch": 10.92,
      "eval_python_code_alpaca_accuracy": 0.14990625,
      "eval_python_code_alpaca_bleu_score": 4.0113907569894796,
      "eval_python_code_alpaca_bleu_score_sem": 0.14134455475580787,
      "eval_python_code_alpaca_emb_cos_sim": 0.7198358774185181,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010124257799839834,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.075366735458374,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.124,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.546,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.788,
      "eval_python_code_alpaca_num_pred_words": 45.142,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.65782314695592,
      "eval_python_code_alpaca_pred_num_tokens": 62.96875,
      "eval_python_code_alpaca_rouge_score": 0.2958587589053272,
      "eval_python_code_alpaca_runtime": 10.0032,
      "eval_python_code_alpaca_samples_per_second": 49.984,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.450711941235212,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005993237587569222,
      "eval_python_code_alpaca_token_set_precision": 0.49268291832483696,
      "eval_python_code_alpaca_token_set_recall": 0.4402095610325942,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 56875
    },
    {
      "epoch": 10.92,
      "eval_wikibio_accuracy": 0.30471875,
      "eval_wikibio_bleu_score": 5.496054908468534,
      "eval_wikibio_bleu_score_sem": 0.1968930308782966,
      "eval_wikibio_emb_cos_sim": 0.7178844213485718,
      "eval_wikibio_emb_cos_sim_sem": 0.01097977099452666,
      "eval_wikibio_emb_top1_equal": 0.1328125,
      "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.015972137451172,
      "eval_wikibio_n_ngrams_match_1": 9.84,
      "eval_wikibio_n_ngrams_match_2": 3.184,
      "eval_wikibio_n_ngrams_match_3": 1.116,
      "eval_wikibio_num_pred_words": 36.59,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 55.477200661037045,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34030002023118366,
      "eval_wikibio_runtime": 11.3183,
      "eval_wikibio_samples_per_second": 44.176,
      "eval_wikibio_steps_per_second": 0.088,
      "eval_wikibio_token_set_f1": 0.30973723267275016,
      "eval_wikibio_token_set_f1_sem": 0.005493719360809834,
      "eval_wikibio_token_set_precision": 0.31915044905063195,
      "eval_wikibio_token_set_recall": 0.3163183314609686,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 56875
    },
    {
      "epoch": 10.92,
      "eval_nq_accuracy": 0.504,
      "eval_nq_bleu_score": 10.597470227641582,
      "eval_nq_bleu_score_sem": 0.4344237576175824,
      "eval_nq_emb_cos_sim": 0.8083126544952393,
      "eval_nq_emb_cos_sim_sem": 0.007916100112657075,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3533241748809814,
      "eval_nq_n_ngrams_match_1": 21.944,
      "eval_nq_n_ngrams_match_2": 7.762,
      "eval_nq_n_ngrams_match_3": 3.482,
      "eval_nq_num_pred_words": 48.932,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.520483589965957,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42221122580072257,
      "eval_nq_runtime": 10.5334,
      "eval_nq_samples_per_second": 47.468,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.44059985607738433,
      "eval_nq_token_set_f1_sem": 0.005043803944498725,
      "eval_nq_token_set_precision": 0.39206069446057645,
      "eval_nq_token_set_recall": 0.5139015564453835,
      "eval_nq_true_num_tokens": 64.0,
      "step": 56875
    },
    {
      "epoch": 10.92,
      "learning_rate": 0.001,
      "loss": 2.716,
      "step": 56880
    },
    {
      "epoch": 10.92,
      "learning_rate": 0.001,
      "loss": 2.7179,
      "step": 56892
    },
    {
      "epoch": 10.93,
      "learning_rate": 0.001,
      "loss": 2.7112,
      "step": 56904
    },
    {
      "epoch": 10.93,
      "learning_rate": 0.001,
      "loss": 2.7149,
      "step": 56916
    },
    {
      "epoch": 10.93,
      "learning_rate": 0.001,
      "loss": 2.7163,
      "step": 56928
    },
    {
      "epoch": 10.93,
      "learning_rate": 0.001,
      "loss": 2.7175,
      "step": 56940
    },
    {
      "epoch": 10.94,
      "learning_rate": 0.001,
      "loss": 2.7073,
      "step": 56952
    },
    {
      "epoch": 10.94,
      "learning_rate": 0.001,
      "loss": 2.7231,
      "step": 56964
    },
    {
      "epoch": 10.94,
      "learning_rate": 0.001,
      "loss": 2.7157,
      "step": 56976
    },
    {
      "epoch": 10.94,
      "learning_rate": 0.001,
      "loss": 2.7174,
      "step": 56988
    },
    {
      "epoch": 10.94,
      "learning_rate": 0.001,
      "loss": 2.7086,
      "step": 57000
    },
    {
      "epoch": 10.95,
      "learning_rate": 0.001,
      "loss": 2.7204,
      "step": 57012
    },
    {
      "epoch": 10.95,
      "learning_rate": 0.001,
      "loss": 2.7218,
      "step": 57024
    },
    {
      "epoch": 10.95,
      "learning_rate": 0.001,
      "loss": 2.7228,
      "step": 57036
    },
    {
      "epoch": 10.95,
      "learning_rate": 0.001,
      "loss": 2.7219,
      "step": 57048
    },
    {
      "epoch": 10.96,
      "learning_rate": 0.001,
      "loss": 2.719,
      "step": 57060
    },
    {
      "epoch": 10.96,
      "learning_rate": 0.001,
      "loss": 2.7254,
      "step": 57072
    },
    {
      "epoch": 10.96,
      "learning_rate": 0.001,
      "loss": 2.7163,
      "step": 57084
    },
    {
      "epoch": 10.96,
      "learning_rate": 0.001,
      "loss": 2.7272,
      "step": 57096
    },
    {
      "epoch": 10.97,
      "learning_rate": 0.001,
      "loss": 2.7192,
      "step": 57108
    },
    {
      "epoch": 10.97,
      "learning_rate": 0.001,
      "loss": 2.7127,
      "step": 57120
    },
    {
      "epoch": 10.97,
      "learning_rate": 0.001,
      "loss": 2.7104,
      "step": 57132
    },
    {
      "epoch": 10.97,
      "learning_rate": 0.001,
      "loss": 2.7261,
      "step": 57144
    },
    {
      "epoch": 10.97,
      "learning_rate": 0.001,
      "loss": 2.7245,
      "step": 57156
    },
    {
      "epoch": 10.98,
      "learning_rate": 0.001,
      "loss": 2.7209,
      "step": 57168
    },
    {
      "epoch": 10.98,
      "learning_rate": 0.001,
      "loss": 2.7088,
      "step": 57180
    },
    {
      "epoch": 10.98,
      "learning_rate": 0.001,
      "loss": 2.7189,
      "step": 57192
    },
    {
      "epoch": 10.98,
      "learning_rate": 0.001,
      "loss": 2.7283,
      "step": 57204
    },
    {
      "epoch": 10.99,
      "learning_rate": 0.001,
      "loss": 2.7152,
      "step": 57216
    },
    {
      "epoch": 10.99,
      "learning_rate": 0.001,
      "loss": 2.7228,
      "step": 57228
    },
    {
      "epoch": 10.99,
      "learning_rate": 0.001,
      "loss": 2.723,
      "step": 57240
    },
    {
      "epoch": 10.99,
      "learning_rate": 0.001,
      "loss": 2.7193,
      "step": 57252
    },
    {
      "epoch": 11.0,
      "learning_rate": 0.001,
      "loss": 2.7293,
      "step": 57264
    },
    {
      "epoch": 11.0,
      "learning_rate": 0.001,
      "loss": 2.7189,
      "step": 57276
    },
    {
      "epoch": 11.0,
      "learning_rate": 0.001,
      "loss": 2.7243,
      "step": 57288
    },
    {
      "epoch": 11.0,
      "learning_rate": 0.001,
      "loss": 2.7026,
      "step": 57300
    },
    {
      "epoch": 11.0,
      "learning_rate": 0.001,
      "loss": 2.7074,
      "step": 57312
    },
    {
      "epoch": 11.01,
      "learning_rate": 0.001,
      "loss": 2.6989,
      "step": 57324
    },
    {
      "epoch": 11.01,
      "learning_rate": 0.001,
      "loss": 2.7133,
      "step": 57336
    },
    {
      "epoch": 11.01,
      "learning_rate": 0.001,
      "loss": 2.6993,
      "step": 57348
    },
    {
      "epoch": 11.01,
      "learning_rate": 0.001,
      "loss": 2.6971,
      "step": 57360
    },
    {
      "epoch": 11.02,
      "learning_rate": 0.001,
      "loss": 2.7009,
      "step": 57372
    },
    {
      "epoch": 11.02,
      "learning_rate": 0.001,
      "loss": 2.699,
      "step": 57384
    },
    {
      "epoch": 11.02,
      "learning_rate": 0.001,
      "loss": 2.6975,
      "step": 57396
    },
    {
      "epoch": 11.02,
      "learning_rate": 0.001,
      "loss": 2.696,
      "step": 57408
    },
    {
      "epoch": 11.03,
      "learning_rate": 0.001,
      "loss": 2.7059,
      "step": 57420
    },
    {
      "epoch": 11.03,
      "learning_rate": 0.001,
      "loss": 2.7122,
      "step": 57432
    },
    {
      "epoch": 11.03,
      "learning_rate": 0.001,
      "loss": 2.6863,
      "step": 57444
    },
    {
      "epoch": 11.03,
      "learning_rate": 0.001,
      "loss": 2.6929,
      "step": 57456
    },
    {
      "epoch": 11.03,
      "learning_rate": 0.001,
      "loss": 2.7005,
      "step": 57468
    },
    {
      "epoch": 11.04,
      "learning_rate": 0.001,
      "loss": 2.6949,
      "step": 57480
    },
    {
      "epoch": 11.04,
      "learning_rate": 0.001,
      "loss": 2.6917,
      "step": 57492
    },
    {
      "epoch": 11.04,
      "eval_ag_news_accuracy": 0.3069375,
      "eval_ag_news_bleu_score": 4.522254818587462,
      "eval_ag_news_bleu_score_sem": 0.14588636050961132,
      "eval_ag_news_emb_cos_sim": 0.7798112630844116,
      "eval_ag_news_emb_cos_sim_sem": 0.008509723327409333,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7473461627960205,
      "eval_ag_news_n_ngrams_match_1": 13.038,
      "eval_ag_news_n_ngrams_match_2": 2.792,
      "eval_ag_news_n_ngrams_match_3": 0.802,
      "eval_ag_news_num_pred_words": 46.37,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 42.408387573169776,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3190959353446634,
      "eval_ag_news_runtime": 9.9401,
      "eval_ag_news_samples_per_second": 50.302,
      "eval_ag_news_steps_per_second": 0.101,
      "eval_ag_news_token_set_f1": 0.32775963799587426,
      "eval_ag_news_token_set_f1_sem": 0.004329120398457251,
      "eval_ag_news_token_set_precision": 0.3089203197509194,
      "eval_ag_news_token_set_recall": 0.36637217420744195,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 57500
    },
    {
      "epoch": 11.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.1070625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.816111668498021,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12483843373506187,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6405113935470581,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01103784013215156,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4244728088378906,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.664,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.634,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.578,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.236,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 30.706452413290766,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19553180148485383,
      "eval_anthropic_toxic_prompts_runtime": 10.2,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.019,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33069441523904464,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063617832518983244,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3933402184179548,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3127276552208312,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 57500
    },
    {
      "epoch": 11.04,
      "eval_arxiv_accuracy": 0.3309375,
      "eval_arxiv_bleu_score": 3.9364015802778085,
      "eval_arxiv_bleu_score_sem": 0.11594943685436013,
      "eval_arxiv_emb_cos_sim": 0.7262336015701294,
      "eval_arxiv_emb_cos_sim_sem": 0.007992539349332153,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.6041362285614014,
      "eval_arxiv_n_ngrams_match_1": 14.014,
      "eval_arxiv_n_ngrams_match_2": 2.598,
      "eval_arxiv_n_ngrams_match_3": 0.536,
      "eval_arxiv_num_pred_words": 40.616,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 36.74992660674221,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33120402656786474,
      "eval_arxiv_runtime": 10.3325,
      "eval_arxiv_samples_per_second": 48.391,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.32525667067913205,
      "eval_arxiv_token_set_f1_sem": 0.004185553357486638,
      "eval_arxiv_token_set_precision": 0.2749939755144048,
      "eval_arxiv_token_set_recall": 0.4177945078793143,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 57500
    },
    {
      "epoch": 11.04,
      "eval_python_code_alpaca_accuracy": 0.147875,
      "eval_python_code_alpaca_bleu_score": 3.8843343841007663,
      "eval_python_code_alpaca_bleu_score_sem": 0.12653085280243534,
      "eval_python_code_alpaca_emb_cos_sim": 0.7262643575668335,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008864375886738347,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0930442810058594,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.996,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.416,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.72,
      "eval_python_code_alpaca_num_pred_words": 44.074,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 22.04408431812774,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2957196143145694,
      "eval_python_code_alpaca_runtime": 10.4063,
      "eval_python_code_alpaca_samples_per_second": 48.048,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.445763661886549,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00555026881762446,
      "eval_python_code_alpaca_token_set_precision": 0.49228168618232554,
      "eval_python_code_alpaca_token_set_recall": 0.43314921243799787,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 57500
    },
    {
      "epoch": 11.04,
      "eval_wikibio_accuracy": 0.3026875,
      "eval_wikibio_bleu_score": 5.283167879296112,
      "eval_wikibio_bleu_score_sem": 0.19151798652254653,
      "eval_wikibio_emb_cos_sim": 0.7082334756851196,
      "eval_wikibio_emb_cos_sim_sem": 0.010872272164677085,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.0369110107421875,
      "eval_wikibio_n_ngrams_match_1": 9.636,
      "eval_wikibio_n_ngrams_match_2": 3.094,
      "eval_wikibio_n_ngrams_match_3": 1.058,
      "eval_wikibio_num_pred_words": 36.9,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 56.651077678392404,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32576161499551753,
      "eval_wikibio_runtime": 9.6811,
      "eval_wikibio_samples_per_second": 51.647,
      "eval_wikibio_steps_per_second": 0.103,
      "eval_wikibio_token_set_f1": 0.3044536294926807,
      "eval_wikibio_token_set_f1_sem": 0.005412713035116527,
      "eval_wikibio_token_set_precision": 0.31294918951732376,
      "eval_wikibio_token_set_recall": 0.31493506428632556,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 57500
    },
    {
      "epoch": 11.04,
      "eval_nq_accuracy": 0.5053125,
      "eval_nq_bleu_score": 10.650641997919385,
      "eval_nq_bleu_score_sem": 0.46356261621733846,
      "eval_nq_emb_cos_sim": 0.8047137260437012,
      "eval_nq_emb_cos_sim_sem": 0.008218507043445279,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.340418815612793,
      "eval_nq_n_ngrams_match_1": 21.958,
      "eval_nq_n_ngrams_match_2": 7.73,
      "eval_nq_n_ngrams_match_3": 3.472,
      "eval_nq_num_pred_words": 48.902,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.38558529727981,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4205499582025919,
      "eval_nq_runtime": 12.9034,
      "eval_nq_samples_per_second": 38.749,
      "eval_nq_steps_per_second": 0.077,
      "eval_nq_token_set_f1": 0.4387483533119308,
      "eval_nq_token_set_f1_sem": 0.005354152301822755,
      "eval_nq_token_set_precision": 0.3933372177800882,
      "eval_nq_token_set_recall": 0.507979614733193,
      "eval_nq_true_num_tokens": 64.0,
      "step": 57500
    },
    {
      "epoch": 11.04,
      "learning_rate": 0.001,
      "loss": 2.7055,
      "step": 57504
    },
    {
      "epoch": 11.04,
      "learning_rate": 0.001,
      "loss": 2.6927,
      "step": 57516
    },
    {
      "epoch": 11.05,
      "learning_rate": 0.001,
      "loss": 2.6993,
      "step": 57528
    },
    {
      "epoch": 11.05,
      "learning_rate": 0.001,
      "loss": 2.7009,
      "step": 57540
    },
    {
      "epoch": 11.05,
      "learning_rate": 0.001,
      "loss": 2.7015,
      "step": 57552
    },
    {
      "epoch": 11.05,
      "learning_rate": 0.001,
      "loss": 2.7043,
      "step": 57564
    },
    {
      "epoch": 11.06,
      "learning_rate": 0.001,
      "loss": 2.7079,
      "step": 57576
    },
    {
      "epoch": 11.06,
      "learning_rate": 0.001,
      "loss": 2.7037,
      "step": 57588
    },
    {
      "epoch": 11.06,
      "learning_rate": 0.001,
      "loss": 2.7006,
      "step": 57600
    },
    {
      "epoch": 11.06,
      "learning_rate": 0.001,
      "loss": 2.7177,
      "step": 57612
    },
    {
      "epoch": 11.06,
      "learning_rate": 0.001,
      "loss": 2.7178,
      "step": 57624
    },
    {
      "epoch": 11.07,
      "learning_rate": 0.001,
      "loss": 2.7079,
      "step": 57636
    },
    {
      "epoch": 11.07,
      "learning_rate": 0.001,
      "loss": 2.6982,
      "step": 57648
    },
    {
      "epoch": 11.07,
      "learning_rate": 0.001,
      "loss": 2.7038,
      "step": 57660
    },
    {
      "epoch": 11.07,
      "learning_rate": 0.001,
      "loss": 2.7082,
      "step": 57672
    },
    {
      "epoch": 11.08,
      "learning_rate": 0.001,
      "loss": 2.7047,
      "step": 57684
    },
    {
      "epoch": 11.08,
      "learning_rate": 0.001,
      "loss": 2.7001,
      "step": 57696
    },
    {
      "epoch": 11.08,
      "learning_rate": 0.001,
      "loss": 2.6992,
      "step": 57708
    },
    {
      "epoch": 11.08,
      "learning_rate": 0.001,
      "loss": 2.7006,
      "step": 57720
    },
    {
      "epoch": 11.09,
      "learning_rate": 0.001,
      "loss": 2.7021,
      "step": 57732
    },
    {
      "epoch": 11.09,
      "learning_rate": 0.001,
      "loss": 2.709,
      "step": 57744
    },
    {
      "epoch": 11.09,
      "learning_rate": 0.001,
      "loss": 2.7006,
      "step": 57756
    },
    {
      "epoch": 11.09,
      "learning_rate": 0.001,
      "loss": 2.6971,
      "step": 57768
    },
    {
      "epoch": 11.09,
      "learning_rate": 0.001,
      "loss": 2.712,
      "step": 57780
    },
    {
      "epoch": 11.1,
      "learning_rate": 0.001,
      "loss": 2.6994,
      "step": 57792
    },
    {
      "epoch": 11.1,
      "learning_rate": 0.001,
      "loss": 2.7093,
      "step": 57804
    },
    {
      "epoch": 11.1,
      "learning_rate": 0.001,
      "loss": 2.695,
      "step": 57816
    },
    {
      "epoch": 11.1,
      "learning_rate": 0.001,
      "loss": 2.7148,
      "step": 57828
    },
    {
      "epoch": 11.11,
      "learning_rate": 0.001,
      "loss": 2.7063,
      "step": 57840
    },
    {
      "epoch": 11.11,
      "learning_rate": 0.001,
      "loss": 2.6923,
      "step": 57852
    },
    {
      "epoch": 11.11,
      "learning_rate": 0.001,
      "loss": 2.7013,
      "step": 57864
    },
    {
      "epoch": 11.11,
      "learning_rate": 0.001,
      "loss": 2.7027,
      "step": 57876
    },
    {
      "epoch": 11.12,
      "learning_rate": 0.001,
      "loss": 2.7069,
      "step": 57888
    },
    {
      "epoch": 11.12,
      "learning_rate": 0.001,
      "loss": 2.6966,
      "step": 57900
    },
    {
      "epoch": 11.12,
      "learning_rate": 0.001,
      "loss": 2.7048,
      "step": 57912
    },
    {
      "epoch": 11.12,
      "learning_rate": 0.001,
      "loss": 2.7071,
      "step": 57924
    },
    {
      "epoch": 11.12,
      "learning_rate": 0.001,
      "loss": 2.698,
      "step": 57936
    },
    {
      "epoch": 11.13,
      "learning_rate": 0.001,
      "loss": 2.6979,
      "step": 57948
    },
    {
      "epoch": 11.13,
      "learning_rate": 0.001,
      "loss": 2.7083,
      "step": 57960
    },
    {
      "epoch": 11.13,
      "learning_rate": 0.001,
      "loss": 2.7057,
      "step": 57972
    },
    {
      "epoch": 11.13,
      "learning_rate": 0.001,
      "loss": 2.7064,
      "step": 57984
    },
    {
      "epoch": 11.14,
      "learning_rate": 0.001,
      "loss": 2.7042,
      "step": 57996
    },
    {
      "epoch": 11.14,
      "learning_rate": 0.001,
      "loss": 2.7079,
      "step": 58008
    },
    {
      "epoch": 11.14,
      "learning_rate": 0.001,
      "loss": 2.7114,
      "step": 58020
    },
    {
      "epoch": 11.14,
      "learning_rate": 0.001,
      "loss": 2.7102,
      "step": 58032
    },
    {
      "epoch": 11.15,
      "learning_rate": 0.001,
      "loss": 2.6982,
      "step": 58044
    },
    {
      "epoch": 11.15,
      "learning_rate": 0.001,
      "loss": 2.6971,
      "step": 58056
    },
    {
      "epoch": 11.15,
      "learning_rate": 0.001,
      "loss": 2.6973,
      "step": 58068
    },
    {
      "epoch": 11.15,
      "learning_rate": 0.001,
      "loss": 2.7083,
      "step": 58080
    },
    {
      "epoch": 11.15,
      "learning_rate": 0.001,
      "loss": 2.6956,
      "step": 58092
    },
    {
      "epoch": 11.16,
      "learning_rate": 0.001,
      "loss": 2.7041,
      "step": 58104
    },
    {
      "epoch": 11.16,
      "learning_rate": 0.001,
      "loss": 2.6981,
      "step": 58116
    },
    {
      "epoch": 11.16,
      "eval_ag_news_accuracy": 0.30684375,
      "eval_ag_news_bleu_score": 4.404854953079719,
      "eval_ag_news_bleu_score_sem": 0.13991467928055173,
      "eval_ag_news_emb_cos_sim": 0.772075891494751,
      "eval_ag_news_emb_cos_sim_sem": 0.009697400445022071,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7400405406951904,
      "eval_ag_news_n_ngrams_match_1": 13.006,
      "eval_ag_news_n_ngrams_match_2": 2.78,
      "eval_ag_news_n_ngrams_match_3": 0.75,
      "eval_ag_news_num_pred_words": 46.46,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 42.09969688137981,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3234550037144449,
      "eval_ag_news_runtime": 10.4843,
      "eval_ag_news_samples_per_second": 47.69,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.329440871819913,
      "eval_ag_news_token_set_f1_sem": 0.004309609570861408,
      "eval_ag_news_token_set_precision": 0.31012448042069374,
      "eval_ag_news_token_set_recall": 0.3685082984965904,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 58125
    },
    {
      "epoch": 11.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.108625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7546323647445705,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11088759228191522,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6415292024612427,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010219925219058993,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4118399620056152,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.676,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.626,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.54,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.426,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 30.320982421253134,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19711881354469407,
      "eval_anthropic_toxic_prompts_runtime": 10.9118,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.822,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.092,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3362022816654852,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062065387337703875,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.40087196572619965,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3169868091230833,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 58125
    },
    {
      "epoch": 11.16,
      "eval_arxiv_accuracy": 0.33078125,
      "eval_arxiv_bleu_score": 3.878411244471973,
      "eval_arxiv_bleu_score_sem": 0.10709292701408764,
      "eval_arxiv_emb_cos_sim": 0.7227597832679749,
      "eval_arxiv_emb_cos_sim_sem": 0.007836613131999052,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5966672897338867,
      "eval_arxiv_n_ngrams_match_1": 13.876,
      "eval_arxiv_n_ngrams_match_2": 2.486,
      "eval_arxiv_n_ngrams_match_3": 0.528,
      "eval_arxiv_num_pred_words": 40.04,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 36.47646615394681,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3311057215710334,
      "eval_arxiv_runtime": 16.8109,
      "eval_arxiv_samples_per_second": 29.743,
      "eval_arxiv_steps_per_second": 0.059,
      "eval_arxiv_token_set_f1": 0.3261414864720136,
      "eval_arxiv_token_set_f1_sem": 0.003999993953918227,
      "eval_arxiv_token_set_precision": 0.27435228024569336,
      "eval_arxiv_token_set_recall": 0.42395166930538597,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 58125
    },
    {
      "epoch": 11.16,
      "eval_python_code_alpaca_accuracy": 0.14975,
      "eval_python_code_alpaca_bleu_score": 3.875740023336171,
      "eval_python_code_alpaca_bleu_score_sem": 0.11476498520429856,
      "eval_python_code_alpaca_emb_cos_sim": 0.7398732900619507,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007619849255929493,
      "eval_python_code_alpaca_emb_top1_equal": 0.0703125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.022687306110270106,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0788724422454834,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.208,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.468,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.708,
      "eval_python_code_alpaca_num_pred_words": 43.934,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.733882367321574,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3066312064796719,
      "eval_python_code_alpaca_runtime": 9.8935,
      "eval_python_code_alpaca_samples_per_second": 50.538,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4491762975567623,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00521366702632569,
      "eval_python_code_alpaca_token_set_precision": 0.498575835264117,
      "eval_python_code_alpaca_token_set_recall": 0.43109428207429284,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 58125
    },
    {
      "epoch": 11.16,
      "eval_wikibio_accuracy": 0.30409375,
      "eval_wikibio_bleu_score": 5.14882176265263,
      "eval_wikibio_bleu_score_sem": 0.18901377929551888,
      "eval_wikibio_emb_cos_sim": 0.7046616673469543,
      "eval_wikibio_emb_cos_sim_sem": 0.01085805748147228,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.009243488311768,
      "eval_wikibio_n_ngrams_match_1": 9.352,
      "eval_wikibio_n_ngrams_match_2": 2.99,
      "eval_wikibio_n_ngrams_match_3": 1.022,
      "eval_wikibio_num_pred_words": 36.03,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 55.10516708788541,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3221732509451103,
      "eval_wikibio_runtime": 9.6314,
      "eval_wikibio_samples_per_second": 51.913,
      "eval_wikibio_steps_per_second": 0.104,
      "eval_wikibio_token_set_f1": 0.2962359261040241,
      "eval_wikibio_token_set_f1_sem": 0.005827731273917247,
      "eval_wikibio_token_set_precision": 0.3029042606347466,
      "eval_wikibio_token_set_recall": 0.30852043392029155,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 58125
    },
    {
      "epoch": 11.16,
      "eval_nq_accuracy": 0.50609375,
      "eval_nq_bleu_score": 10.685042045736118,
      "eval_nq_bleu_score_sem": 0.46940046219238923,
      "eval_nq_emb_cos_sim": 0.8148090839385986,
      "eval_nq_emb_cos_sim_sem": 0.007645845958926618,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3385086059570312,
      "eval_nq_n_ngrams_match_1": 22.344,
      "eval_nq_n_ngrams_match_2": 7.904,
      "eval_nq_n_ngrams_match_3": 3.482,
      "eval_nq_num_pred_words": 49.13,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.365765587891014,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4287459749369873,
      "eval_nq_runtime": 10.5752,
      "eval_nq_samples_per_second": 47.28,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.444798741609737,
      "eval_nq_token_set_f1_sem": 0.005066460286545068,
      "eval_nq_token_set_precision": 0.40014016363801125,
      "eval_nq_token_set_recall": 0.5106069938321136,
      "eval_nq_true_num_tokens": 64.0,
      "step": 58125
    },
    {
      "epoch": 11.16,
      "learning_rate": 0.001,
      "loss": 2.6906,
      "step": 58128
    },
    {
      "epoch": 11.16,
      "learning_rate": 0.001,
      "loss": 2.7075,
      "step": 58140
    },
    {
      "epoch": 11.17,
      "learning_rate": 0.001,
      "loss": 2.7106,
      "step": 58152
    },
    {
      "epoch": 11.17,
      "learning_rate": 0.001,
      "loss": 2.7008,
      "step": 58164
    },
    {
      "epoch": 11.17,
      "learning_rate": 0.001,
      "loss": 2.7016,
      "step": 58176
    },
    {
      "epoch": 11.17,
      "learning_rate": 0.001,
      "loss": 2.697,
      "step": 58188
    },
    {
      "epoch": 11.18,
      "learning_rate": 0.001,
      "loss": 2.6962,
      "step": 58200
    },
    {
      "epoch": 11.18,
      "learning_rate": 0.001,
      "loss": 2.6975,
      "step": 58212
    },
    {
      "epoch": 11.18,
      "learning_rate": 0.001,
      "loss": 2.6966,
      "step": 58224
    },
    {
      "epoch": 11.18,
      "learning_rate": 0.001,
      "loss": 2.6928,
      "step": 58236
    },
    {
      "epoch": 11.18,
      "learning_rate": 0.001,
      "loss": 2.7072,
      "step": 58248
    },
    {
      "epoch": 11.19,
      "learning_rate": 0.001,
      "loss": 2.6941,
      "step": 58260
    },
    {
      "epoch": 11.19,
      "learning_rate": 0.001,
      "loss": 2.7011,
      "step": 58272
    },
    {
      "epoch": 11.19,
      "learning_rate": 0.001,
      "loss": 2.7051,
      "step": 58284
    },
    {
      "epoch": 11.19,
      "learning_rate": 0.001,
      "loss": 2.7027,
      "step": 58296
    },
    {
      "epoch": 11.2,
      "learning_rate": 0.001,
      "loss": 2.7041,
      "step": 58308
    },
    {
      "epoch": 11.2,
      "learning_rate": 0.001,
      "loss": 2.7105,
      "step": 58320
    },
    {
      "epoch": 11.2,
      "learning_rate": 0.001,
      "loss": 2.711,
      "step": 58332
    },
    {
      "epoch": 11.2,
      "learning_rate": 0.001,
      "loss": 2.6981,
      "step": 58344
    },
    {
      "epoch": 11.21,
      "learning_rate": 0.001,
      "loss": 2.7,
      "step": 58356
    },
    {
      "epoch": 11.21,
      "learning_rate": 0.001,
      "loss": 2.705,
      "step": 58368
    },
    {
      "epoch": 11.21,
      "learning_rate": 0.001,
      "loss": 2.711,
      "step": 58380
    },
    {
      "epoch": 11.21,
      "learning_rate": 0.001,
      "loss": 2.7122,
      "step": 58392
    },
    {
      "epoch": 11.21,
      "learning_rate": 0.001,
      "loss": 2.7075,
      "step": 58404
    },
    {
      "epoch": 11.22,
      "learning_rate": 0.001,
      "loss": 2.6989,
      "step": 58416
    },
    {
      "epoch": 11.22,
      "learning_rate": 0.001,
      "loss": 2.7039,
      "step": 58428
    },
    {
      "epoch": 11.22,
      "learning_rate": 0.001,
      "loss": 2.7076,
      "step": 58440
    },
    {
      "epoch": 11.22,
      "learning_rate": 0.001,
      "loss": 2.7064,
      "step": 58452
    },
    {
      "epoch": 11.23,
      "learning_rate": 0.001,
      "loss": 2.7056,
      "step": 58464
    },
    {
      "epoch": 11.23,
      "learning_rate": 0.001,
      "loss": 2.7038,
      "step": 58476
    },
    {
      "epoch": 11.23,
      "learning_rate": 0.001,
      "loss": 2.7,
      "step": 58488
    },
    {
      "epoch": 11.23,
      "learning_rate": 0.001,
      "loss": 2.7025,
      "step": 58500
    },
    {
      "epoch": 11.24,
      "learning_rate": 0.001,
      "loss": 2.6966,
      "step": 58512
    },
    {
      "epoch": 11.24,
      "learning_rate": 0.001,
      "loss": 2.7084,
      "step": 58524
    },
    {
      "epoch": 11.24,
      "learning_rate": 0.001,
      "loss": 2.7168,
      "step": 58536
    },
    {
      "epoch": 11.24,
      "learning_rate": 0.001,
      "loss": 2.7072,
      "step": 58548
    },
    {
      "epoch": 11.24,
      "learning_rate": 0.001,
      "loss": 2.6973,
      "step": 58560
    },
    {
      "epoch": 11.25,
      "learning_rate": 0.001,
      "loss": 2.7022,
      "step": 58572
    },
    {
      "epoch": 11.25,
      "learning_rate": 0.001,
      "loss": 2.699,
      "step": 58584
    },
    {
      "epoch": 11.25,
      "learning_rate": 0.001,
      "loss": 2.7046,
      "step": 58596
    },
    {
      "epoch": 11.25,
      "learning_rate": 0.001,
      "loss": 2.7215,
      "step": 58608
    },
    {
      "epoch": 11.26,
      "learning_rate": 0.001,
      "loss": 2.6986,
      "step": 58620
    },
    {
      "epoch": 11.26,
      "learning_rate": 0.001,
      "loss": 2.7048,
      "step": 58632
    },
    {
      "epoch": 11.26,
      "learning_rate": 0.001,
      "loss": 2.6905,
      "step": 58644
    },
    {
      "epoch": 11.26,
      "learning_rate": 0.001,
      "loss": 2.7061,
      "step": 58656
    },
    {
      "epoch": 11.26,
      "learning_rate": 0.001,
      "loss": 2.7046,
      "step": 58668
    },
    {
      "epoch": 11.27,
      "learning_rate": 0.001,
      "loss": 2.7047,
      "step": 58680
    },
    {
      "epoch": 11.27,
      "learning_rate": 0.001,
      "loss": 2.7048,
      "step": 58692
    },
    {
      "epoch": 11.27,
      "learning_rate": 0.001,
      "loss": 2.7144,
      "step": 58704
    },
    {
      "epoch": 11.27,
      "learning_rate": 0.001,
      "loss": 2.6919,
      "step": 58716
    },
    {
      "epoch": 11.28,
      "learning_rate": 0.001,
      "loss": 2.7014,
      "step": 58728
    },
    {
      "epoch": 11.28,
      "learning_rate": 0.001,
      "loss": 2.7059,
      "step": 58740
    },
    {
      "epoch": 11.28,
      "eval_ag_news_accuracy": 0.30684375,
      "eval_ag_news_bleu_score": 4.5296564200494105,
      "eval_ag_news_bleu_score_sem": 0.15152853801023605,
      "eval_ag_news_emb_cos_sim": 0.7873067855834961,
      "eval_ag_news_emb_cos_sim_sem": 0.008036674343712646,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7307772636413574,
      "eval_ag_news_n_ngrams_match_1": 13.292,
      "eval_ag_news_n_ngrams_match_2": 2.762,
      "eval_ag_news_n_ngrams_match_3": 0.768,
      "eval_ag_news_num_pred_words": 46.774,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 41.711516412654476,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.32425441213985295,
      "eval_ag_news_runtime": 11.3499,
      "eval_ag_news_samples_per_second": 44.053,
      "eval_ag_news_steps_per_second": 0.088,
      "eval_ag_news_token_set_f1": 0.3333053712726195,
      "eval_ag_news_token_set_f1_sem": 0.004264557480845165,
      "eval_ag_news_token_set_precision": 0.31513790073234066,
      "eval_ag_news_token_set_recall": 0.36704357239580815,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 58750
    },
    {
      "epoch": 11.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.1068125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9010616846179484,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12660661076898555,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6553249955177307,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009287884542733018,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4395790100097656,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.848,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.704,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.582,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.102,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.173831534379364,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20190160930748008,
      "eval_anthropic_toxic_prompts_runtime": 9.4323,
      "eval_anthropic_toxic_prompts_samples_per_second": 53.009,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.106,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34172570020486376,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006473508333206387,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4151872571029492,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31911770200456474,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 58750
    },
    {
      "epoch": 11.28,
      "eval_arxiv_accuracy": 0.33109375,
      "eval_arxiv_bleu_score": 3.9621848956276606,
      "eval_arxiv_bleu_score_sem": 0.10961241137745281,
      "eval_arxiv_emb_cos_sim": 0.7240477800369263,
      "eval_arxiv_emb_cos_sim_sem": 0.007706940450021191,
      "eval_arxiv_emb_top1_equal": 0.203125,
      "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.59248685836792,
      "eval_arxiv_n_ngrams_match_1": 14.056,
      "eval_arxiv_n_ngrams_match_2": 2.632,
      "eval_arxiv_n_ngrams_match_3": 0.586,
      "eval_arxiv_num_pred_words": 40.116,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 36.32429707851407,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33320154360712884,
      "eval_arxiv_runtime": 9.8297,
      "eval_arxiv_samples_per_second": 50.866,
      "eval_arxiv_steps_per_second": 0.102,
      "eval_arxiv_token_set_f1": 0.3285609237336241,
      "eval_arxiv_token_set_f1_sem": 0.00414196208913151,
      "eval_arxiv_token_set_precision": 0.27753659123686825,
      "eval_arxiv_token_set_recall": 0.42119789710111016,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 58750
    },
    {
      "epoch": 11.28,
      "eval_python_code_alpaca_accuracy": 0.1483125,
      "eval_python_code_alpaca_bleu_score": 3.9916814912080887,
      "eval_python_code_alpaca_bleu_score_sem": 0.12696243628711373,
      "eval_python_code_alpaca_emb_cos_sim": 0.7326651811599731,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008663690374831185,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0883514881134033,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.098,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.432,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.742,
      "eval_python_code_alpaca_num_pred_words": 43.764,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.94087834744448,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3029695969529678,
      "eval_python_code_alpaca_runtime": 10.8584,
      "eval_python_code_alpaca_samples_per_second": 46.047,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.4474930196277289,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005635936224002089,
      "eval_python_code_alpaca_token_set_precision": 0.49212596973618433,
      "eval_python_code_alpaca_token_set_recall": 0.4378380005002488,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 58750
    },
    {
      "epoch": 11.28,
      "eval_wikibio_accuracy": 0.30625,
      "eval_wikibio_bleu_score": 5.284456659195724,
      "eval_wikibio_bleu_score_sem": 0.19164121299613252,
      "eval_wikibio_emb_cos_sim": 0.7153770327568054,
      "eval_wikibio_emb_cos_sim_sem": 0.011638356990813383,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9758846759796143,
      "eval_wikibio_n_ngrams_match_1": 9.524,
      "eval_wikibio_n_ngrams_match_2": 3.05,
      "eval_wikibio_n_ngrams_match_3": 1.076,
      "eval_wikibio_num_pred_words": 36.496,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 53.297246849364434,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32919887920883045,
      "eval_wikibio_runtime": 9.5054,
      "eval_wikibio_samples_per_second": 52.601,
      "eval_wikibio_steps_per_second": 0.105,
      "eval_wikibio_token_set_f1": 0.2996793878624288,
      "eval_wikibio_token_set_f1_sem": 0.005715281630065503,
      "eval_wikibio_token_set_precision": 0.30903652503095136,
      "eval_wikibio_token_set_recall": 0.30777861230276893,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 58750
    },
    {
      "epoch": 11.28,
      "eval_nq_accuracy": 0.50759375,
      "eval_nq_bleu_score": 10.752475591995346,
      "eval_nq_bleu_score_sem": 0.4589472595975013,
      "eval_nq_emb_cos_sim": 0.8130066394805908,
      "eval_nq_emb_cos_sim_sem": 0.007647548955024262,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3384242057800293,
      "eval_nq_n_ngrams_match_1": 22.354,
      "eval_nq_n_ngrams_match_2": 7.916,
      "eval_nq_n_ngrams_match_3": 3.52,
      "eval_nq_num_pred_words": 49.122,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.36489075235929,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4286441479224601,
      "eval_nq_runtime": 15.4561,
      "eval_nq_samples_per_second": 32.35,
      "eval_nq_steps_per_second": 0.065,
      "eval_nq_token_set_f1": 0.44749500085607663,
      "eval_nq_token_set_f1_sem": 0.004987511294251989,
      "eval_nq_token_set_precision": 0.4021669948134483,
      "eval_nq_token_set_recall": 0.5153262770335792,
      "eval_nq_true_num_tokens": 64.0,
      "step": 58750
    },
    {
      "epoch": 11.28,
      "learning_rate": 0.001,
      "loss": 2.7045,
      "step": 58752
    },
    {
      "epoch": 11.28,
      "learning_rate": 0.001,
      "loss": 2.7014,
      "step": 58764
    },
    {
      "epoch": 11.29,
      "learning_rate": 0.001,
      "loss": 2.7051,
      "step": 58776
    },
    {
      "epoch": 11.29,
      "learning_rate": 0.001,
      "loss": 2.7063,
      "step": 58788
    },
    {
      "epoch": 11.29,
      "learning_rate": 0.001,
      "loss": 2.7027,
      "step": 58800
    },
    {
      "epoch": 11.29,
      "learning_rate": 0.001,
      "loss": 2.6969,
      "step": 58812
    },
    {
      "epoch": 11.29,
      "learning_rate": 0.001,
      "loss": 2.7054,
      "step": 58824
    },
    {
      "epoch": 11.3,
      "learning_rate": 0.001,
      "loss": 2.7125,
      "step": 58836
    },
    {
      "epoch": 11.3,
      "learning_rate": 0.001,
      "loss": 2.6916,
      "step": 58848
    },
    {
      "epoch": 11.3,
      "learning_rate": 0.001,
      "loss": 2.7025,
      "step": 58860
    },
    {
      "epoch": 11.3,
      "learning_rate": 0.001,
      "loss": 2.7124,
      "step": 58872
    },
    {
      "epoch": 11.31,
      "learning_rate": 0.001,
      "loss": 2.7008,
      "step": 58884
    },
    {
      "epoch": 11.31,
      "learning_rate": 0.001,
      "loss": 2.7055,
      "step": 58896
    },
    {
      "epoch": 11.31,
      "learning_rate": 0.001,
      "loss": 2.7092,
      "step": 58908
    },
    {
      "epoch": 11.31,
      "learning_rate": 0.001,
      "loss": 2.7101,
      "step": 58920
    },
    {
      "epoch": 11.32,
      "learning_rate": 0.001,
      "loss": 2.7039,
      "step": 58932
    },
    {
      "epoch": 11.32,
      "learning_rate": 0.001,
      "loss": 2.7004,
      "step": 58944
    },
    {
      "epoch": 11.32,
      "learning_rate": 0.001,
      "loss": 2.719,
      "step": 58956
    },
    {
      "epoch": 11.32,
      "learning_rate": 0.001,
      "loss": 2.7028,
      "step": 58968
    },
    {
      "epoch": 11.32,
      "learning_rate": 0.001,
      "loss": 2.7054,
      "step": 58980
    },
    {
      "epoch": 11.33,
      "learning_rate": 0.001,
      "loss": 2.6897,
      "step": 58992
    },
    {
      "epoch": 11.33,
      "learning_rate": 0.001,
      "loss": 2.6987,
      "step": 59004
    },
    {
      "epoch": 11.33,
      "learning_rate": 0.001,
      "loss": 2.6915,
      "step": 59016
    },
    {
      "epoch": 11.33,
      "learning_rate": 0.001,
      "loss": 2.7117,
      "step": 59028
    },
    {
      "epoch": 11.34,
      "learning_rate": 0.001,
      "loss": 2.704,
      "step": 59040
    },
    {
      "epoch": 11.34,
      "learning_rate": 0.001,
      "loss": 2.7062,
      "step": 59052
    },
    {
      "epoch": 11.34,
      "learning_rate": 0.001,
      "loss": 2.7133,
      "step": 59064
    },
    {
      "epoch": 11.34,
      "learning_rate": 0.001,
      "loss": 2.7066,
      "step": 59076
    },
    {
      "epoch": 11.35,
      "learning_rate": 0.001,
      "loss": 2.7077,
      "step": 59088
    },
    {
      "epoch": 11.35,
      "learning_rate": 0.001,
      "loss": 2.7007,
      "step": 59100
    },
    {
      "epoch": 11.35,
      "learning_rate": 0.001,
      "loss": 2.7012,
      "step": 59112
    },
    {
      "epoch": 11.35,
      "learning_rate": 0.001,
      "loss": 2.7049,
      "step": 59124
    },
    {
      "epoch": 11.35,
      "learning_rate": 0.001,
      "loss": 2.7127,
      "step": 59136
    },
    {
      "epoch": 11.36,
      "learning_rate": 0.001,
      "loss": 2.6985,
      "step": 59148
    },
    {
      "epoch": 11.36,
      "learning_rate": 0.001,
      "loss": 2.6989,
      "step": 59160
    },
    {
      "epoch": 11.36,
      "learning_rate": 0.001,
      "loss": 2.7088,
      "step": 59172
    },
    {
      "epoch": 11.36,
      "learning_rate": 0.001,
      "loss": 2.6962,
      "step": 59184
    },
    {
      "epoch": 11.37,
      "learning_rate": 0.001,
      "loss": 2.7118,
      "step": 59196
    },
    {
      "epoch": 11.37,
      "learning_rate": 0.001,
      "loss": 2.7067,
      "step": 59208
    },
    {
      "epoch": 11.37,
      "learning_rate": 0.001,
      "loss": 2.7033,
      "step": 59220
    },
    {
      "epoch": 11.37,
      "learning_rate": 0.001,
      "loss": 2.7015,
      "step": 59232
    },
    {
      "epoch": 11.38,
      "learning_rate": 0.001,
      "loss": 2.7106,
      "step": 59244
    },
    {
      "epoch": 11.38,
      "learning_rate": 0.001,
      "loss": 2.6929,
      "step": 59256
    },
    {
      "epoch": 11.38,
      "learning_rate": 0.001,
      "loss": 2.6965,
      "step": 59268
    },
    {
      "epoch": 11.38,
      "learning_rate": 0.001,
      "loss": 2.6972,
      "step": 59280
    },
    {
      "epoch": 11.38,
      "learning_rate": 0.001,
      "loss": 2.6967,
      "step": 59292
    },
    {
      "epoch": 11.39,
      "learning_rate": 0.001,
      "loss": 2.7053,
      "step": 59304
    },
    {
      "epoch": 11.39,
      "learning_rate": 0.001,
      "loss": 2.6973,
      "step": 59316
    },
    {
      "epoch": 11.39,
      "learning_rate": 0.001,
      "loss": 2.7055,
      "step": 59328
    },
    {
      "epoch": 11.39,
      "learning_rate": 0.001,
      "loss": 2.7058,
      "step": 59340
    },
    {
      "epoch": 11.4,
      "learning_rate": 0.001,
      "loss": 2.6999,
      "step": 59352
    },
    {
      "epoch": 11.4,
      "learning_rate": 0.001,
      "loss": 2.696,
      "step": 59364
    },
    {
      "epoch": 11.4,
      "eval_ag_news_accuracy": 0.30625,
      "eval_ag_news_bleu_score": 4.431863173132895,
      "eval_ag_news_bleu_score_sem": 0.14530038639315268,
      "eval_ag_news_emb_cos_sim": 0.7869585156440735,
      "eval_ag_news_emb_cos_sim_sem": 0.007882709271924318,
      "eval_ag_news_emb_top1_equal": 0.1953125,
      "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7377586364746094,
      "eval_ag_news_n_ngrams_match_1": 13.234,
      "eval_ag_news_n_ngrams_match_2": 2.724,
      "eval_ag_news_n_ngrams_match_3": 0.774,
      "eval_ag_news_num_pred_words": 46.442,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 42.00373893044607,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3246625035059656,
      "eval_ag_news_runtime": 9.7782,
      "eval_ag_news_samples_per_second": 51.134,
      "eval_ag_news_steps_per_second": 0.102,
      "eval_ag_news_token_set_f1": 0.33345497884835773,
      "eval_ag_news_token_set_f1_sem": 0.004357801549242252,
      "eval_ag_news_token_set_precision": 0.3132578723680101,
      "eval_ag_news_token_set_recall": 0.37357079520019015,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 59375
    },
    {
      "epoch": 11.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.10546875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7997797982849897,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10897744782255879,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6400052309036255,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009610886794463874,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4370694160461426,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.654,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.656,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.588,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.252,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 31.095695960228852,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19881881976881854,
      "eval_anthropic_toxic_prompts_runtime": 9.9887,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.057,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3414458554745744,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006301672437152909,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.40264846430668666,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.327145834464341,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 59375
    },
    {
      "epoch": 11.4,
      "eval_arxiv_accuracy": 0.33096875,
      "eval_arxiv_bleu_score": 3.8984078397780997,
      "eval_arxiv_bleu_score_sem": 0.1136108171750166,
      "eval_arxiv_emb_cos_sim": 0.7153143882751465,
      "eval_arxiv_emb_cos_sim_sem": 0.008298240978293795,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5796284675598145,
      "eval_arxiv_n_ngrams_match_1": 13.746,
      "eval_arxiv_n_ngrams_match_2": 2.56,
      "eval_arxiv_n_ngrams_match_3": 0.548,
      "eval_arxiv_num_pred_words": 39.7,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 35.860215138513546,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.32793192745391275,
      "eval_arxiv_runtime": 9.7033,
      "eval_arxiv_samples_per_second": 51.529,
      "eval_arxiv_steps_per_second": 0.103,
      "eval_arxiv_token_set_f1": 0.3250715510328737,
      "eval_arxiv_token_set_f1_sem": 0.0042098734068675625,
      "eval_arxiv_token_set_precision": 0.27413619203129114,
      "eval_arxiv_token_set_recall": 0.42157927825665653,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 59375
    },
    {
      "epoch": 11.4,
      "eval_python_code_alpaca_accuracy": 0.1486875,
      "eval_python_code_alpaca_bleu_score": 3.9670491992170747,
      "eval_python_code_alpaca_bleu_score_sem": 0.12656978363115465,
      "eval_python_code_alpaca_emb_cos_sim": 0.7320708632469177,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008293936731389619,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0828518867492676,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.064,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.458,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.692,
      "eval_python_code_alpaca_num_pred_words": 43.398,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.82054346320137,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3076708404043146,
      "eval_python_code_alpaca_runtime": 9.6604,
      "eval_python_code_alpaca_samples_per_second": 51.757,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.45303357487297796,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005460600680863856,
      "eval_python_code_alpaca_token_set_precision": 0.4877758500785313,
      "eval_python_code_alpaca_token_set_recall": 0.4481852572607296,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 59375
    },
    {
      "epoch": 11.4,
      "eval_wikibio_accuracy": 0.30703125,
      "eval_wikibio_bleu_score": 5.251511457631601,
      "eval_wikibio_bleu_score_sem": 0.18454730950711926,
      "eval_wikibio_emb_cos_sim": 0.7073708772659302,
      "eval_wikibio_emb_cos_sim_sem": 0.011683425142761689,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 4.022034645080566,
      "eval_wikibio_n_ngrams_match_1": 9.514,
      "eval_wikibio_n_ngrams_match_2": 3.102,
      "eval_wikibio_n_ngrams_match_3": 1.084,
      "eval_wikibio_num_pred_words": 36.598,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 55.814553181161685,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3289108956112218,
      "eval_wikibio_runtime": 15.262,
      "eval_wikibio_samples_per_second": 32.761,
      "eval_wikibio_steps_per_second": 0.066,
      "eval_wikibio_token_set_f1": 0.2975795067165946,
      "eval_wikibio_token_set_f1_sem": 0.005461913478978885,
      "eval_wikibio_token_set_precision": 0.3073213589436206,
      "eval_wikibio_token_set_recall": 0.3042370580948697,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 59375
    },
    {
      "epoch": 11.4,
      "eval_nq_accuracy": 0.506875,
      "eval_nq_bleu_score": 10.225693623198634,
      "eval_nq_bleu_score_sem": 0.44159420530919286,
      "eval_nq_emb_cos_sim": 0.8122824430465698,
      "eval_nq_emb_cos_sim_sem": 0.00771513142661148,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.340015411376953,
      "eval_nq_n_ngrams_match_1": 21.684,
      "eval_nq_n_ngrams_match_2": 7.558,
      "eval_nq_n_ngrams_match_3": 3.32,
      "eval_nq_num_pred_words": 48.564,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.381396553114586,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4160662133024913,
      "eval_nq_runtime": 10.4983,
      "eval_nq_samples_per_second": 47.627,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4354222019950613,
      "eval_nq_token_set_f1_sem": 0.004983935344112309,
      "eval_nq_token_set_precision": 0.3897972927517024,
      "eval_nq_token_set_recall": 0.5056233853328723,
      "eval_nq_true_num_tokens": 64.0,
      "step": 59375
    },
    {
      "epoch": 11.4,
      "learning_rate": 0.001,
      "loss": 2.6991,
      "step": 59376
    },
    {
      "epoch": 11.4,
      "learning_rate": 0.001,
      "loss": 2.6989,
      "step": 59388
    },
    {
      "epoch": 11.41,
      "learning_rate": 0.001,
      "loss": 2.7054,
      "step": 59400
    },
    {
      "epoch": 11.41,
      "learning_rate": 0.001,
      "loss": 2.7147,
      "step": 59412
    },
    {
      "epoch": 11.41,
      "learning_rate": 0.001,
      "loss": 2.7055,
      "step": 59424
    },
    {
      "epoch": 11.41,
      "learning_rate": 0.001,
      "loss": 2.7133,
      "step": 59436
    },
    {
      "epoch": 11.41,
      "learning_rate": 0.001,
      "loss": 2.7147,
      "step": 59448
    },
    {
      "epoch": 11.42,
      "learning_rate": 0.001,
      "loss": 2.7069,
      "step": 59460
    },
    {
      "epoch": 11.42,
      "learning_rate": 0.001,
      "loss": 2.7052,
      "step": 59472
    },
    {
      "epoch": 11.42,
      "learning_rate": 0.001,
      "loss": 2.6999,
      "step": 59484
    },
    {
      "epoch": 11.42,
      "learning_rate": 0.001,
      "loss": 2.7046,
      "step": 59496
    },
    {
      "epoch": 11.43,
      "learning_rate": 0.001,
      "loss": 2.6984,
      "step": 59508
    },
    {
      "epoch": 11.43,
      "learning_rate": 0.001,
      "loss": 2.7066,
      "step": 59520
    },
    {
      "epoch": 11.43,
      "learning_rate": 0.001,
      "loss": 2.6957,
      "step": 59532
    },
    {
      "epoch": 11.43,
      "learning_rate": 0.001,
      "loss": 2.7047,
      "step": 59544
    },
    {
      "epoch": 11.44,
      "learning_rate": 0.001,
      "loss": 2.6984,
      "step": 59556
    },
    {
      "epoch": 11.44,
      "learning_rate": 0.001,
      "loss": 2.6994,
      "step": 59568
    },
    {
      "epoch": 11.44,
      "learning_rate": 0.001,
      "loss": 2.7041,
      "step": 59580
    },
    {
      "epoch": 11.44,
      "learning_rate": 0.001,
      "loss": 2.7035,
      "step": 59592
    },
    {
      "epoch": 11.44,
      "learning_rate": 0.001,
      "loss": 2.7135,
      "step": 59604
    },
    {
      "epoch": 11.45,
      "learning_rate": 0.001,
      "loss": 2.7163,
      "step": 59616
    },
    {
      "epoch": 11.45,
      "learning_rate": 0.001,
      "loss": 2.7157,
      "step": 59628
    },
    {
      "epoch": 11.45,
      "learning_rate": 0.001,
      "loss": 2.7114,
      "step": 59640
    },
    {
      "epoch": 11.45,
      "learning_rate": 0.001,
      "loss": 2.704,
      "step": 59652
    },
    {
      "epoch": 11.46,
      "learning_rate": 0.001,
      "loss": 2.7019,
      "step": 59664
    },
    {
      "epoch": 11.46,
      "learning_rate": 0.001,
      "loss": 2.7115,
      "step": 59676
    },
    {
      "epoch": 11.46,
      "learning_rate": 0.001,
      "loss": 2.7079,
      "step": 59688
    },
    {
      "epoch": 11.46,
      "learning_rate": 0.001,
      "loss": 2.6948,
      "step": 59700
    },
    {
      "epoch": 11.47,
      "learning_rate": 0.001,
      "loss": 2.6998,
      "step": 59712
    },
    {
      "epoch": 11.47,
      "learning_rate": 0.001,
      "loss": 2.6991,
      "step": 59724
    },
    {
      "epoch": 11.47,
      "learning_rate": 0.001,
      "loss": 2.7023,
      "step": 59736
    },
    {
      "epoch": 11.47,
      "learning_rate": 0.001,
      "loss": 2.7074,
      "step": 59748
    },
    {
      "epoch": 11.47,
      "learning_rate": 0.001,
      "loss": 2.707,
      "step": 59760
    },
    {
      "epoch": 11.48,
      "learning_rate": 0.001,
      "loss": 2.7075,
      "step": 59772
    },
    {
      "epoch": 11.48,
      "learning_rate": 0.001,
      "loss": 2.7085,
      "step": 59784
    },
    {
      "epoch": 11.48,
      "learning_rate": 0.001,
      "loss": 2.7083,
      "step": 59796
    },
    {
      "epoch": 11.48,
      "learning_rate": 0.001,
      "loss": 2.7009,
      "step": 59808
    },
    {
      "epoch": 11.49,
      "learning_rate": 0.001,
      "loss": 2.7135,
      "step": 59820
    },
    {
      "epoch": 11.49,
      "learning_rate": 0.001,
      "loss": 2.7014,
      "step": 59832
    },
    {
      "epoch": 11.49,
      "learning_rate": 0.001,
      "loss": 2.7063,
      "step": 59844
    },
    {
      "epoch": 11.49,
      "learning_rate": 0.001,
      "loss": 2.701,
      "step": 59856
    },
    {
      "epoch": 11.5,
      "learning_rate": 0.001,
      "loss": 2.7035,
      "step": 59868
    },
    {
      "epoch": 11.5,
      "learning_rate": 0.001,
      "loss": 2.6916,
      "step": 59880
    },
    {
      "epoch": 11.5,
      "learning_rate": 0.001,
      "loss": 2.7103,
      "step": 59892
    },
    {
      "epoch": 11.5,
      "learning_rate": 0.001,
      "loss": 2.7037,
      "step": 59904
    },
    {
      "epoch": 11.5,
      "learning_rate": 0.001,
      "loss": 2.705,
      "step": 59916
    },
    {
      "epoch": 11.51,
      "learning_rate": 0.001,
      "loss": 2.6916,
      "step": 59928
    },
    {
      "epoch": 11.51,
      "learning_rate": 0.001,
      "loss": 2.7114,
      "step": 59940
    },
    {
      "epoch": 11.51,
      "learning_rate": 0.001,
      "loss": 2.7017,
      "step": 59952
    },
    {
      "epoch": 11.51,
      "learning_rate": 0.001,
      "loss": 2.6981,
      "step": 59964
    },
    {
      "epoch": 11.52,
      "learning_rate": 0.001,
      "loss": 2.7116,
      "step": 59976
    },
    {
      "epoch": 11.52,
      "learning_rate": 0.001,
      "loss": 2.7032,
      "step": 59988
    },
    {
      "epoch": 11.52,
      "learning_rate": 0.001,
      "loss": 2.7045,
      "step": 60000
    },
    {
      "epoch": 11.52,
      "eval_ag_news_accuracy": 0.30728125,
      "eval_ag_news_bleu_score": 4.530503729114923,
      "eval_ag_news_bleu_score_sem": 0.14781067904570572,
      "eval_ag_news_emb_cos_sim": 0.7807636260986328,
      "eval_ag_news_emb_cos_sim_sem": 0.008243288731825177,
      "eval_ag_news_emb_top1_equal": 0.171875,
      "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7231216430664062,
      "eval_ag_news_n_ngrams_match_1": 13.174,
      "eval_ag_news_n_ngrams_match_2": 2.712,
      "eval_ag_news_n_ngrams_match_3": 0.76,
      "eval_ag_news_num_pred_words": 46.072,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 41.393408081389815,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.32593310766267536,
      "eval_ag_news_runtime": 9.8832,
      "eval_ag_news_samples_per_second": 50.591,
      "eval_ag_news_steps_per_second": 0.101,
      "eval_ag_news_token_set_f1": 0.3318015587234676,
      "eval_ag_news_token_set_f1_sem": 0.004388282865926565,
      "eval_ag_news_token_set_precision": 0.31256188361838566,
      "eval_ag_news_token_set_recall": 0.36788600917976305,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 60000
    },
    {
      "epoch": 11.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.10790625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.011933463772888,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1307588179861984,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6478198170661926,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009703891718237257,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.398049831390381,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.824,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.762,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.456,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.90572194214503,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20052986491482772,
      "eval_anthropic_toxic_prompts_runtime": 9.2695,
      "eval_anthropic_toxic_prompts_samples_per_second": 53.94,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.108,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34510985636689057,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006500321372036515,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41112777818840385,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32562636260486505,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 60000
    },
    {
      "epoch": 11.52,
      "eval_arxiv_accuracy": 0.33259375,
      "eval_arxiv_bleu_score": 3.9454065017135553,
      "eval_arxiv_bleu_score_sem": 0.10428243358706764,
      "eval_arxiv_emb_cos_sim": 0.7328290939331055,
      "eval_arxiv_emb_cos_sim_sem": 0.008083398735098528,
      "eval_arxiv_emb_top1_equal": 0.203125,
      "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.584223747253418,
      "eval_arxiv_n_ngrams_match_1": 13.966,
      "eval_arxiv_n_ngrams_match_2": 2.624,
      "eval_arxiv_n_ngrams_match_3": 0.568,
      "eval_arxiv_num_pred_words": 40.098,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 36.025382060402094,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33173521287868857,
      "eval_arxiv_runtime": 9.8245,
      "eval_arxiv_samples_per_second": 50.893,
      "eval_arxiv_steps_per_second": 0.102,
      "eval_arxiv_token_set_f1": 0.32662150356028635,
      "eval_arxiv_token_set_f1_sem": 0.004007506383550244,
      "eval_arxiv_token_set_precision": 0.2751492498071853,
      "eval_arxiv_token_set_recall": 0.41868338660273136,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 60000
    },
    {
      "epoch": 11.52,
      "eval_python_code_alpaca_accuracy": 0.1535625,
      "eval_python_code_alpaca_bleu_score": 4.149384697311357,
      "eval_python_code_alpaca_bleu_score_sem": 0.1355033560597435,
      "eval_python_code_alpaca_emb_cos_sim": 0.7412024736404419,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009297430145376227,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.050917148590088,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.282,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.57,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.802,
      "eval_python_code_alpaca_num_pred_words": 43.734,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.134719214330342,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3068012458484429,
      "eval_python_code_alpaca_runtime": 9.572,
      "eval_python_code_alpaca_samples_per_second": 52.236,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.4570120815552914,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005492166368443662,
      "eval_python_code_alpaca_token_set_precision": 0.5080212430649951,
      "eval_python_code_alpaca_token_set_recall": 0.4389553213692472,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 60000
    },
    {
      "epoch": 11.52,
      "eval_wikibio_accuracy": 0.310625,
      "eval_wikibio_bleu_score": 5.562129065968864,
      "eval_wikibio_bleu_score_sem": 0.20208377637207953,
      "eval_wikibio_emb_cos_sim": 0.7162641882896423,
      "eval_wikibio_emb_cos_sim_sem": 0.011005607671208346,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9331448078155518,
      "eval_wikibio_n_ngrams_match_1": 9.568,
      "eval_wikibio_n_ngrams_match_2": 3.188,
      "eval_wikibio_n_ngrams_match_3": 1.16,
      "eval_wikibio_num_pred_words": 35.648,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 51.067322324856555,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33113779740325056,
      "eval_wikibio_runtime": 9.6015,
      "eval_wikibio_samples_per_second": 52.075,
      "eval_wikibio_steps_per_second": 0.104,
      "eval_wikibio_token_set_f1": 0.3064295331814496,
      "eval_wikibio_token_set_f1_sem": 0.005774884058421604,
      "eval_wikibio_token_set_precision": 0.31151168254344447,
      "eval_wikibio_token_set_recall": 0.3203673551916625,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 60000
    },
    {
      "epoch": 11.52,
      "eval_nq_accuracy": 0.5083125,
      "eval_nq_bleu_score": 10.859162359100454,
      "eval_nq_bleu_score_sem": 0.4618880507067924,
      "eval_nq_emb_cos_sim": 0.8117635250091553,
      "eval_nq_emb_cos_sim_sem": 0.007694013879873998,
      "eval_nq_emb_top1_equal": 0.2109375,
      "eval_nq_emb_top1_equal_sem": 0.03620184850179216,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.335266590118408,
      "eval_nq_n_ngrams_match_1": 22.096,
      "eval_nq_n_ngrams_match_2": 7.9,
      "eval_nq_n_ngrams_match_3": 3.592,
      "eval_nq_num_pred_words": 49.06,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.332214028406678,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42602497138570844,
      "eval_nq_runtime": 14.6595,
      "eval_nq_samples_per_second": 34.108,
      "eval_nq_steps_per_second": 0.068,
      "eval_nq_token_set_f1": 0.4409636716216321,
      "eval_nq_token_set_f1_sem": 0.004927524126723718,
      "eval_nq_token_set_precision": 0.3954249872690946,
      "eval_nq_token_set_recall": 0.5090542184669634,
      "eval_nq_true_num_tokens": 64.0,
      "step": 60000
    },
    {
      "epoch": 11.52,
      "learning_rate": 0.001,
      "loss": 2.7084,
      "step": 60012
    },
    {
      "epoch": 11.53,
      "learning_rate": 0.001,
      "loss": 2.7043,
      "step": 60024
    },
    {
      "epoch": 11.53,
      "learning_rate": 0.001,
      "loss": 2.6994,
      "step": 60036
    },
    {
      "epoch": 11.53,
      "learning_rate": 0.001,
      "loss": 2.7005,
      "step": 60048
    },
    {
      "epoch": 11.53,
      "learning_rate": 0.001,
      "loss": 2.7175,
      "step": 60060
    },
    {
      "epoch": 11.53,
      "learning_rate": 0.001,
      "loss": 2.7024,
      "step": 60072
    },
    {
      "epoch": 11.54,
      "learning_rate": 0.001,
      "loss": 2.6967,
      "step": 60084
    },
    {
      "epoch": 11.54,
      "learning_rate": 0.001,
      "loss": 2.7021,
      "step": 60096
    },
    {
      "epoch": 11.54,
      "learning_rate": 0.001,
      "loss": 2.698,
      "step": 60108
    },
    {
      "epoch": 11.54,
      "learning_rate": 0.001,
      "loss": 2.7012,
      "step": 60120
    },
    {
      "epoch": 11.55,
      "learning_rate": 0.001,
      "loss": 2.7036,
      "step": 60132
    },
    {
      "epoch": 11.55,
      "learning_rate": 0.001,
      "loss": 2.7048,
      "step": 60144
    },
    {
      "epoch": 11.55,
      "learning_rate": 0.001,
      "loss": 2.6945,
      "step": 60156
    },
    {
      "epoch": 11.55,
      "learning_rate": 0.001,
      "loss": 2.6945,
      "step": 60168
    },
    {
      "epoch": 11.56,
      "learning_rate": 0.001,
      "loss": 2.6921,
      "step": 60180
    },
    {
      "epoch": 11.56,
      "learning_rate": 0.001,
      "loss": 2.7001,
      "step": 60192
    },
    {
      "epoch": 11.56,
      "learning_rate": 0.001,
      "loss": 2.7035,
      "step": 60204
    },
    {
      "epoch": 11.56,
      "learning_rate": 0.001,
      "loss": 2.7077,
      "step": 60216
    },
    {
      "epoch": 11.56,
      "learning_rate": 0.001,
      "loss": 2.7006,
      "step": 60228
    },
    {
      "epoch": 11.57,
      "learning_rate": 0.001,
      "loss": 2.7012,
      "step": 60240
    },
    {
      "epoch": 11.57,
      "learning_rate": 0.001,
      "loss": 2.6988,
      "step": 60252
    },
    {
      "epoch": 11.57,
      "learning_rate": 0.001,
      "loss": 2.6986,
      "step": 60264
    },
    {
      "epoch": 11.57,
      "learning_rate": 0.001,
      "loss": 2.7102,
      "step": 60276
    },
    {
      "epoch": 11.58,
      "learning_rate": 0.001,
      "loss": 2.6988,
      "step": 60288
    },
    {
      "epoch": 11.58,
      "learning_rate": 0.001,
      "loss": 2.7014,
      "step": 60300
    },
    {
      "epoch": 11.58,
      "learning_rate": 0.001,
      "loss": 2.7105,
      "step": 60312
    },
    {
      "epoch": 11.58,
      "learning_rate": 0.001,
      "loss": 2.7154,
      "step": 60324
    },
    {
      "epoch": 11.59,
      "learning_rate": 0.001,
      "loss": 2.7112,
      "step": 60336
    },
    {
      "epoch": 11.59,
      "learning_rate": 0.001,
      "loss": 2.7088,
      "step": 60348
    },
    {
      "epoch": 11.59,
      "learning_rate": 0.001,
      "loss": 2.716,
      "step": 60360
    },
    {
      "epoch": 11.59,
      "learning_rate": 0.001,
      "loss": 2.703,
      "step": 60372
    },
    {
      "epoch": 11.59,
      "learning_rate": 0.001,
      "loss": 2.6966,
      "step": 60384
    },
    {
      "epoch": 11.6,
      "learning_rate": 0.001,
      "loss": 2.705,
      "step": 60396
    },
    {
      "epoch": 11.6,
      "learning_rate": 0.001,
      "loss": 2.7108,
      "step": 60408
    },
    {
      "epoch": 11.6,
      "learning_rate": 0.001,
      "loss": 2.7055,
      "step": 60420
    },
    {
      "epoch": 11.6,
      "learning_rate": 0.001,
      "loss": 2.702,
      "step": 60432
    },
    {
      "epoch": 11.61,
      "learning_rate": 0.001,
      "loss": 2.6863,
      "step": 60444
    },
    {
      "epoch": 11.61,
      "learning_rate": 0.001,
      "loss": 2.6877,
      "step": 60456
    },
    {
      "epoch": 11.61,
      "learning_rate": 0.001,
      "loss": 2.7015,
      "step": 60468
    },
    {
      "epoch": 11.61,
      "learning_rate": 0.001,
      "loss": 2.7064,
      "step": 60480
    },
    {
      "epoch": 11.62,
      "learning_rate": 0.001,
      "loss": 2.7014,
      "step": 60492
    },
    {
      "epoch": 11.62,
      "learning_rate": 0.001,
      "loss": 2.6982,
      "step": 60504
    },
    {
      "epoch": 11.62,
      "learning_rate": 0.001,
      "loss": 2.713,
      "step": 60516
    },
    {
      "epoch": 11.62,
      "learning_rate": 0.001,
      "loss": 2.7092,
      "step": 60528
    },
    {
      "epoch": 11.62,
      "learning_rate": 0.001,
      "loss": 2.6951,
      "step": 60540
    },
    {
      "epoch": 11.63,
      "learning_rate": 0.001,
      "loss": 2.7033,
      "step": 60552
    },
    {
      "epoch": 11.63,
      "learning_rate": 0.001,
      "loss": 2.6947,
      "step": 60564
    },
    {
      "epoch": 11.63,
      "learning_rate": 0.001,
      "loss": 2.7093,
      "step": 60576
    },
    {
      "epoch": 11.63,
      "learning_rate": 0.001,
      "loss": 2.7026,
      "step": 60588
    },
    {
      "epoch": 11.64,
      "learning_rate": 0.001,
      "loss": 2.6972,
      "step": 60600
    },
    {
      "epoch": 11.64,
      "learning_rate": 0.001,
      "loss": 2.7068,
      "step": 60612
    },
    {
      "epoch": 11.64,
      "learning_rate": 0.001,
      "loss": 2.6871,
      "step": 60624
    },
    {
      "epoch": 11.64,
      "eval_ag_news_accuracy": 0.30771875,
      "eval_ag_news_bleu_score": 4.562737182773448,
      "eval_ag_news_bleu_score_sem": 0.15363674128184648,
      "eval_ag_news_emb_cos_sim": 0.7819575071334839,
      "eval_ag_news_emb_cos_sim_sem": 0.007952146204846017,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.730755090713501,
      "eval_ag_news_n_ngrams_match_1": 13.17,
      "eval_ag_news_n_ngrams_match_2": 2.812,
      "eval_ag_news_n_ngrams_match_3": 0.764,
      "eval_ag_news_num_pred_words": 46.214,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 41.7105915564637,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3255476877533653,
      "eval_ag_news_runtime": 10.181,
      "eval_ag_news_samples_per_second": 49.111,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3323323333511247,
      "eval_ag_news_token_set_f1_sem": 0.004345378378307004,
      "eval_ag_news_token_set_precision": 0.3133151639567446,
      "eval_ag_news_token_set_recall": 0.3708701655661347,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 60625
    },
    {
      "epoch": 11.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.10853125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7090491693140275,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09938297862119239,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6559830904006958,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0097251297786197,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.36966872215271,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.698,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.62,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.574,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.876,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.06889558138647,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19495095790542527,
      "eval_anthropic_toxic_prompts_runtime": 11.653,
      "eval_anthropic_toxic_prompts_samples_per_second": 42.907,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.086,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3354171332896632,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006217683903982814,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4025546215448638,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3150708848694828,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 60625
    },
    {
      "epoch": 11.64,
      "eval_arxiv_accuracy": 0.3343125,
      "eval_arxiv_bleu_score": 3.975372086597374,
      "eval_arxiv_bleu_score_sem": 0.11754027122992618,
      "eval_arxiv_emb_cos_sim": 0.7152624726295471,
      "eval_arxiv_emb_cos_sim_sem": 0.009203334671897625,
      "eval_arxiv_emb_top1_equal": 0.1875,
      "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5604097843170166,
      "eval_arxiv_n_ngrams_match_1": 13.812,
      "eval_arxiv_n_ngrams_match_2": 2.642,
      "eval_arxiv_n_ngrams_match_3": 0.58,
      "eval_arxiv_num_pred_words": 39.996,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 35.177609424594216,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3280746894186754,
      "eval_arxiv_runtime": 10.9874,
      "eval_arxiv_samples_per_second": 45.507,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.3278418572741925,
      "eval_arxiv_token_set_f1_sem": 0.004322233929830818,
      "eval_arxiv_token_set_precision": 0.2739703220111268,
      "eval_arxiv_token_set_recall": 0.4314235969696949,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 60625
    },
    {
      "epoch": 11.64,
      "eval_python_code_alpaca_accuracy": 0.1534375,
      "eval_python_code_alpaca_bleu_score": 4.274866366482068,
      "eval_python_code_alpaca_bleu_score_sem": 0.14024241858965333,
      "eval_python_code_alpaca_emb_cos_sim": 0.7309517860412598,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008365820815702982,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.050659418106079,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.262,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.628,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.842,
      "eval_python_code_alpaca_num_pred_words": 44.104,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.12927285479443,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3082043485761443,
      "eval_python_code_alpaca_runtime": 10.9758,
      "eval_python_code_alpaca_samples_per_second": 45.555,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.46014841933317135,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005359291934462503,
      "eval_python_code_alpaca_token_set_precision": 0.5040148341849664,
      "eval_python_code_alpaca_token_set_recall": 0.44620450056910027,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 60625
    },
    {
      "epoch": 11.64,
      "eval_wikibio_accuracy": 0.30828125,
      "eval_wikibio_bleu_score": 5.299619657800155,
      "eval_wikibio_bleu_score_sem": 0.1901970456602861,
      "eval_wikibio_emb_cos_sim": 0.7095686197280884,
      "eval_wikibio_emb_cos_sim_sem": 0.011874961495819446,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.980469226837158,
      "eval_wikibio_n_ngrams_match_1": 9.584,
      "eval_wikibio_n_ngrams_match_2": 3.102,
      "eval_wikibio_n_ngrams_match_3": 1.068,
      "eval_wikibio_num_pred_words": 36.272,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 53.54215174864247,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3315543218178374,
      "eval_wikibio_runtime": 9.6104,
      "eval_wikibio_samples_per_second": 52.027,
      "eval_wikibio_steps_per_second": 0.104,
      "eval_wikibio_token_set_f1": 0.3016552652215996,
      "eval_wikibio_token_set_f1_sem": 0.005721542587219132,
      "eval_wikibio_token_set_precision": 0.3112307697493472,
      "eval_wikibio_token_set_recall": 0.3090902051571282,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 60625
    },
    {
      "epoch": 11.64,
      "eval_nq_accuracy": 0.50734375,
      "eval_nq_bleu_score": 10.698290321386462,
      "eval_nq_bleu_score_sem": 0.45302670300933406,
      "eval_nq_emb_cos_sim": 0.8153901696205139,
      "eval_nq_emb_cos_sim_sem": 0.007834353665792163,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3290982246398926,
      "eval_nq_n_ngrams_match_1": 22.254,
      "eval_nq_n_ngrams_match_2": 7.862,
      "eval_nq_n_ngrams_match_3": 3.494,
      "eval_nq_num_pred_words": 49.402,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.268677316365293,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4264352290778385,
      "eval_nq_runtime": 18.7127,
      "eval_nq_samples_per_second": 26.72,
      "eval_nq_steps_per_second": 0.053,
      "eval_nq_token_set_f1": 0.4434893634480182,
      "eval_nq_token_set_f1_sem": 0.0050043689965380625,
      "eval_nq_token_set_precision": 0.39965925507967776,
      "eval_nq_token_set_recall": 0.5075817085698917,
      "eval_nq_true_num_tokens": 64.0,
      "step": 60625
    },
    {
      "epoch": 11.64,
      "learning_rate": 0.001,
      "loss": 2.6982,
      "step": 60636
    },
    {
      "epoch": 11.65,
      "learning_rate": 0.001,
      "loss": 2.6959,
      "step": 60648
    },
    {
      "epoch": 11.65,
      "learning_rate": 0.001,
      "loss": 2.7022,
      "step": 60660
    },
    {
      "epoch": 11.65,
      "learning_rate": 0.001,
      "loss": 2.7051,
      "step": 60672
    },
    {
      "epoch": 11.65,
      "learning_rate": 0.001,
      "loss": 2.6916,
      "step": 60684
    },
    {
      "epoch": 11.65,
      "learning_rate": 0.001,
      "loss": 2.7054,
      "step": 60696
    },
    {
      "epoch": 11.66,
      "learning_rate": 0.001,
      "loss": 2.6958,
      "step": 60708
    },
    {
      "epoch": 11.66,
      "learning_rate": 0.001,
      "loss": 2.7033,
      "step": 60720
    },
    {
      "epoch": 11.66,
      "learning_rate": 0.001,
      "loss": 2.7103,
      "step": 60732
    },
    {
      "epoch": 11.66,
      "learning_rate": 0.001,
      "loss": 2.703,
      "step": 60744
    },
    {
      "epoch": 11.67,
      "learning_rate": 0.001,
      "loss": 2.6931,
      "step": 60756
    },
    {
      "epoch": 11.67,
      "learning_rate": 0.001,
      "loss": 2.7027,
      "step": 60768
    },
    {
      "epoch": 11.67,
      "learning_rate": 0.001,
      "loss": 2.7043,
      "step": 60780
    },
    {
      "epoch": 11.67,
      "learning_rate": 0.001,
      "loss": 2.709,
      "step": 60792
    },
    {
      "epoch": 11.68,
      "learning_rate": 0.001,
      "loss": 2.7027,
      "step": 60804
    },
    {
      "epoch": 11.68,
      "learning_rate": 0.001,
      "loss": 2.703,
      "step": 60816
    },
    {
      "epoch": 11.68,
      "learning_rate": 0.001,
      "loss": 2.7132,
      "step": 60828
    },
    {
      "epoch": 11.68,
      "learning_rate": 0.001,
      "loss": 2.6955,
      "step": 60840
    },
    {
      "epoch": 11.68,
      "learning_rate": 0.001,
      "loss": 2.7021,
      "step": 60852
    },
    {
      "epoch": 11.69,
      "learning_rate": 0.001,
      "loss": 2.6981,
      "step": 60864
    },
    {
      "epoch": 11.69,
      "learning_rate": 0.001,
      "loss": 2.704,
      "step": 60876
    },
    {
      "epoch": 11.69,
      "learning_rate": 0.001,
      "loss": 2.7075,
      "step": 60888
    },
    {
      "epoch": 11.69,
      "learning_rate": 0.001,
      "loss": 2.6954,
      "step": 60900
    },
    {
      "epoch": 11.7,
      "learning_rate": 0.001,
      "loss": 2.6977,
      "step": 60912
    },
    {
      "epoch": 11.7,
      "learning_rate": 0.001,
      "loss": 2.7029,
      "step": 60924
    },
    {
      "epoch": 11.7,
      "learning_rate": 0.001,
      "loss": 2.6989,
      "step": 60936
    },
    {
      "epoch": 11.7,
      "learning_rate": 0.001,
      "loss": 2.7017,
      "step": 60948
    },
    {
      "epoch": 11.71,
      "learning_rate": 0.001,
      "loss": 2.6938,
      "step": 60960
    },
    {
      "epoch": 11.71,
      "learning_rate": 0.001,
      "loss": 2.7032,
      "step": 60972
    },
    {
      "epoch": 11.71,
      "learning_rate": 0.001,
      "loss": 2.7023,
      "step": 60984
    },
    {
      "epoch": 11.71,
      "learning_rate": 0.001,
      "loss": 2.6931,
      "step": 60996
    },
    {
      "epoch": 11.71,
      "learning_rate": 0.001,
      "loss": 2.698,
      "step": 61008
    },
    {
      "epoch": 11.72,
      "learning_rate": 0.001,
      "loss": 2.7055,
      "step": 61020
    },
    {
      "epoch": 11.72,
      "learning_rate": 0.001,
      "loss": 2.694,
      "step": 61032
    },
    {
      "epoch": 11.72,
      "learning_rate": 0.001,
      "loss": 2.6919,
      "step": 61044
    },
    {
      "epoch": 11.72,
      "learning_rate": 0.001,
      "loss": 2.6991,
      "step": 61056
    },
    {
      "epoch": 11.73,
      "learning_rate": 0.001,
      "loss": 2.6986,
      "step": 61068
    },
    {
      "epoch": 11.73,
      "learning_rate": 0.001,
      "loss": 2.7055,
      "step": 61080
    },
    {
      "epoch": 11.73,
      "learning_rate": 0.001,
      "loss": 2.6917,
      "step": 61092
    },
    {
      "epoch": 11.73,
      "learning_rate": 0.001,
      "loss": 2.7058,
      "step": 61104
    },
    {
      "epoch": 11.74,
      "learning_rate": 0.001,
      "loss": 2.7035,
      "step": 61116
    },
    {
      "epoch": 11.74,
      "learning_rate": 0.001,
      "loss": 2.6925,
      "step": 61128
    },
    {
      "epoch": 11.74,
      "learning_rate": 0.001,
      "loss": 2.6927,
      "step": 61140
    },
    {
      "epoch": 11.74,
      "learning_rate": 0.001,
      "loss": 2.7005,
      "step": 61152
    },
    {
      "epoch": 11.74,
      "learning_rate": 0.001,
      "loss": 2.7083,
      "step": 61164
    },
    {
      "epoch": 11.75,
      "learning_rate": 0.001,
      "loss": 2.7073,
      "step": 61176
    },
    {
      "epoch": 11.75,
      "learning_rate": 0.001,
      "loss": 2.6965,
      "step": 61188
    },
    {
      "epoch": 11.75,
      "learning_rate": 0.001,
      "loss": 2.7074,
      "step": 61200
    },
    {
      "epoch": 11.75,
      "learning_rate": 0.001,
      "loss": 2.7072,
      "step": 61212
    },
    {
      "epoch": 11.76,
      "learning_rate": 0.001,
      "loss": 2.6894,
      "step": 61224
    },
    {
      "epoch": 11.76,
      "learning_rate": 0.001,
      "loss": 2.6974,
      "step": 61236
    },
    {
      "epoch": 11.76,
      "learning_rate": 0.001,
      "loss": 2.6879,
      "step": 61248
    },
    {
      "epoch": 11.76,
      "eval_ag_news_accuracy": 0.30709375,
      "eval_ag_news_bleu_score": 4.689805000042943,
      "eval_ag_news_bleu_score_sem": 0.1576112924153491,
      "eval_ag_news_emb_cos_sim": 0.7899467349052429,
      "eval_ag_news_emb_cos_sim_sem": 0.007180146990483527,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.720097303390503,
      "eval_ag_news_n_ngrams_match_1": 13.334,
      "eval_ag_news_n_ngrams_match_2": 2.868,
      "eval_ag_news_n_ngrams_match_3": 0.86,
      "eval_ag_news_num_pred_words": 46.058,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 41.26840946941555,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.32650227925978337,
      "eval_ag_news_runtime": 13.6229,
      "eval_ag_news_samples_per_second": 36.703,
      "eval_ag_news_steps_per_second": 0.073,
      "eval_ag_news_token_set_f1": 0.33495844107529343,
      "eval_ag_news_token_set_f1_sem": 0.004348004716266094,
      "eval_ag_news_token_set_precision": 0.315982404487058,
      "eval_ag_news_token_set_recall": 0.3709444094386155,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 61250
    },
    {
      "epoch": 11.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.10734375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9840282458278726,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11763155063083452,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6570160388946533,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00968382059872296,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4051480293273926,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.916,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.656,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.062,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 30.118753852817456,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20254738923714144,
      "eval_anthropic_toxic_prompts_runtime": 14.6549,
      "eval_anthropic_toxic_prompts_samples_per_second": 34.118,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.068,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33981432530372735,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006538309906611984,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4115871533611798,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3179448991673162,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 61250
    },
    {
      "epoch": 11.76,
      "eval_arxiv_accuracy": 0.33190625,
      "eval_arxiv_bleu_score": 3.9654714994858113,
      "eval_arxiv_bleu_score_sem": 0.11570126845297382,
      "eval_arxiv_emb_cos_sim": 0.7267646193504333,
      "eval_arxiv_emb_cos_sim_sem": 0.008554846138551802,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5788960456848145,
      "eval_arxiv_n_ngrams_match_1": 14.028,
      "eval_arxiv_n_ngrams_match_2": 2.65,
      "eval_arxiv_n_ngrams_match_3": 0.562,
      "eval_arxiv_num_pred_words": 39.778,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 35.8339599486153,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33284767659463765,
      "eval_arxiv_runtime": 9.7037,
      "eval_arxiv_samples_per_second": 51.527,
      "eval_arxiv_steps_per_second": 0.103,
      "eval_arxiv_token_set_f1": 0.3288644919866805,
      "eval_arxiv_token_set_f1_sem": 0.004227176318217579,
      "eval_arxiv_token_set_precision": 0.2775316020689215,
      "eval_arxiv_token_set_recall": 0.4197042043157584,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 61250
    },
    {
      "epoch": 11.76,
      "eval_python_code_alpaca_accuracy": 0.15603125,
      "eval_python_code_alpaca_bleu_score": 4.129517836489619,
      "eval_python_code_alpaca_bleu_score_sem": 0.1286746757690754,
      "eval_python_code_alpaca_emb_cos_sim": 0.7468237280845642,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007903011540560919,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.044473171234131,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.546,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.644,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.78,
      "eval_python_code_alpaca_num_pred_words": 44.426,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.99896542920991,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3143834940007143,
      "eval_python_code_alpaca_runtime": 12.1683,
      "eval_python_code_alpaca_samples_per_second": 41.09,
      "eval_python_code_alpaca_steps_per_second": 0.082,
      "eval_python_code_alpaca_token_set_f1": 0.4651287559114941,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005103564231543329,
      "eval_python_code_alpaca_token_set_precision": 0.5216447939587832,
      "eval_python_code_alpaca_token_set_recall": 0.44219869577637055,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 61250
    },
    {
      "epoch": 11.76,
      "eval_wikibio_accuracy": 0.30684375,
      "eval_wikibio_bleu_score": 5.5379691885995,
      "eval_wikibio_bleu_score_sem": 0.18660298241574955,
      "eval_wikibio_emb_cos_sim": 0.7317771315574646,
      "eval_wikibio_emb_cos_sim_sem": 0.008684989021308362,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9599239826202393,
      "eval_wikibio_n_ngrams_match_1": 9.894,
      "eval_wikibio_n_ngrams_match_2": 3.242,
      "eval_wikibio_n_ngrams_match_3": 1.138,
      "eval_wikibio_num_pred_words": 37.094,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 52.453338432193355,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3388163818003451,
      "eval_wikibio_runtime": 10.1905,
      "eval_wikibio_samples_per_second": 49.065,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.3122920384071842,
      "eval_wikibio_token_set_f1_sem": 0.0052821681371611915,
      "eval_wikibio_token_set_precision": 0.3227349334154516,
      "eval_wikibio_token_set_recall": 0.3209974433271776,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 61250
    },
    {
      "epoch": 11.76,
      "eval_nq_accuracy": 0.5088125,
      "eval_nq_bleu_score": 10.784771667148,
      "eval_nq_bleu_score_sem": 0.4582363601111698,
      "eval_nq_emb_cos_sim": 0.8136206865310669,
      "eval_nq_emb_cos_sim_sem": 0.007102759449716953,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.322532892227173,
      "eval_nq_n_ngrams_match_1": 22.012,
      "eval_nq_n_ngrams_match_2": 7.862,
      "eval_nq_n_ngrams_match_3": 3.542,
      "eval_nq_num_pred_words": 48.514,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.201480861244011,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4232035793316797,
      "eval_nq_runtime": 15.5388,
      "eval_nq_samples_per_second": 32.177,
      "eval_nq_steps_per_second": 0.064,
      "eval_nq_token_set_f1": 0.4425579124455575,
      "eval_nq_token_set_f1_sem": 0.005151415693224257,
      "eval_nq_token_set_precision": 0.3955402571592704,
      "eval_nq_token_set_recall": 0.5140255837623238,
      "eval_nq_true_num_tokens": 64.0,
      "step": 61250
    },
    {
      "epoch": 11.76,
      "learning_rate": 0.001,
      "loss": 2.6992,
      "step": 61260
    },
    {
      "epoch": 11.76,
      "learning_rate": 0.001,
      "loss": 2.7053,
      "step": 61272
    },
    {
      "epoch": 11.77,
      "learning_rate": 0.001,
      "loss": 2.6998,
      "step": 61284
    },
    {
      "epoch": 11.77,
      "learning_rate": 0.001,
      "loss": 2.6949,
      "step": 61296
    },
    {
      "epoch": 11.77,
      "learning_rate": 0.001,
      "loss": 2.6952,
      "step": 61308
    },
    {
      "epoch": 11.77,
      "learning_rate": 0.001,
      "loss": 2.7109,
      "step": 61320
    },
    {
      "epoch": 11.78,
      "learning_rate": 0.001,
      "loss": 2.71,
      "step": 61332
    },
    {
      "epoch": 11.78,
      "learning_rate": 0.001,
      "loss": 2.7152,
      "step": 61344
    },
    {
      "epoch": 11.78,
      "learning_rate": 0.001,
      "loss": 2.6965,
      "step": 61356
    },
    {
      "epoch": 11.78,
      "learning_rate": 0.001,
      "loss": 2.7132,
      "step": 61368
    },
    {
      "epoch": 11.79,
      "learning_rate": 0.001,
      "loss": 2.6985,
      "step": 61380
    },
    {
      "epoch": 11.79,
      "learning_rate": 0.001,
      "loss": 2.7132,
      "step": 61392
    },
    {
      "epoch": 11.79,
      "learning_rate": 0.001,
      "loss": 2.6961,
      "step": 61404
    },
    {
      "epoch": 11.79,
      "learning_rate": 0.001,
      "loss": 2.696,
      "step": 61416
    },
    {
      "epoch": 11.79,
      "learning_rate": 0.001,
      "loss": 2.6998,
      "step": 61428
    },
    {
      "epoch": 11.8,
      "learning_rate": 0.001,
      "loss": 2.6926,
      "step": 61440
    },
    {
      "epoch": 11.8,
      "learning_rate": 0.001,
      "loss": 2.7006,
      "step": 61452
    },
    {
      "epoch": 11.8,
      "learning_rate": 0.001,
      "loss": 2.6966,
      "step": 61464
    },
    {
      "epoch": 11.8,
      "learning_rate": 0.001,
      "loss": 2.7035,
      "step": 61476
    },
    {
      "epoch": 11.81,
      "learning_rate": 0.001,
      "loss": 2.6908,
      "step": 61488
    },
    {
      "epoch": 11.81,
      "learning_rate": 0.001,
      "loss": 2.7116,
      "step": 61500
    },
    {
      "epoch": 11.81,
      "learning_rate": 0.001,
      "loss": 2.6934,
      "step": 61512
    },
    {
      "epoch": 11.81,
      "learning_rate": 0.001,
      "loss": 2.7128,
      "step": 61524
    },
    {
      "epoch": 11.82,
      "learning_rate": 0.001,
      "loss": 2.7034,
      "step": 61536
    },
    {
      "epoch": 11.82,
      "learning_rate": 0.001,
      "loss": 2.7041,
      "step": 61548
    },
    {
      "epoch": 11.82,
      "learning_rate": 0.001,
      "loss": 2.7115,
      "step": 61560
    },
    {
      "epoch": 11.82,
      "learning_rate": 0.001,
      "loss": 2.6964,
      "step": 61572
    },
    {
      "epoch": 11.82,
      "learning_rate": 0.001,
      "loss": 2.7105,
      "step": 61584
    },
    {
      "epoch": 11.83,
      "learning_rate": 0.001,
      "loss": 2.7027,
      "step": 61596
    },
    {
      "epoch": 11.83,
      "learning_rate": 0.001,
      "loss": 2.7099,
      "step": 61608
    },
    {
      "epoch": 11.83,
      "learning_rate": 0.001,
      "loss": 2.7063,
      "step": 61620
    },
    {
      "epoch": 11.83,
      "learning_rate": 0.001,
      "loss": 2.7002,
      "step": 61632
    },
    {
      "epoch": 11.84,
      "learning_rate": 0.001,
      "loss": 2.703,
      "step": 61644
    },
    {
      "epoch": 11.84,
      "learning_rate": 0.001,
      "loss": 2.7044,
      "step": 61656
    },
    {
      "epoch": 11.84,
      "learning_rate": 0.001,
      "loss": 2.7097,
      "step": 61668
    },
    {
      "epoch": 11.84,
      "learning_rate": 0.001,
      "loss": 2.7104,
      "step": 61680
    },
    {
      "epoch": 11.85,
      "learning_rate": 0.001,
      "loss": 2.7011,
      "step": 61692
    },
    {
      "epoch": 11.85,
      "learning_rate": 0.001,
      "loss": 2.7054,
      "step": 61704
    },
    {
      "epoch": 11.85,
      "learning_rate": 0.001,
      "loss": 2.7075,
      "step": 61716
    },
    {
      "epoch": 11.85,
      "learning_rate": 0.001,
      "loss": 2.7001,
      "step": 61728
    },
    {
      "epoch": 11.85,
      "learning_rate": 0.001,
      "loss": 2.716,
      "step": 61740
    },
    {
      "epoch": 11.86,
      "learning_rate": 0.001,
      "loss": 2.7019,
      "step": 61752
    },
    {
      "epoch": 11.86,
      "learning_rate": 0.001,
      "loss": 2.6996,
      "step": 61764
    },
    {
      "epoch": 11.86,
      "learning_rate": 0.001,
      "loss": 2.7065,
      "step": 61776
    },
    {
      "epoch": 11.86,
      "learning_rate": 0.001,
      "loss": 2.7055,
      "step": 61788
    },
    {
      "epoch": 11.87,
      "learning_rate": 0.001,
      "loss": 2.7105,
      "step": 61800
    },
    {
      "epoch": 11.87,
      "learning_rate": 0.001,
      "loss": 2.6977,
      "step": 61812
    },
    {
      "epoch": 11.87,
      "learning_rate": 0.001,
      "loss": 2.7003,
      "step": 61824
    },
    {
      "epoch": 11.87,
      "learning_rate": 0.001,
      "loss": 2.708,
      "step": 61836
    },
    {
      "epoch": 11.88,
      "learning_rate": 0.001,
      "loss": 2.7062,
      "step": 61848
    },
    {
      "epoch": 11.88,
      "learning_rate": 0.001,
      "loss": 2.7097,
      "step": 61860
    },
    {
      "epoch": 11.88,
      "learning_rate": 0.001,
      "loss": 2.7201,
      "step": 61872
    },
    {
      "epoch": 11.88,
      "eval_ag_news_accuracy": 0.30621875,
      "eval_ag_news_bleu_score": 4.41548510755679,
      "eval_ag_news_bleu_score_sem": 0.13996736875655358,
      "eval_ag_news_emb_cos_sim": 0.7804596424102783,
      "eval_ag_news_emb_cos_sim_sem": 0.007677897978674645,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7277870178222656,
      "eval_ag_news_n_ngrams_match_1": 13.118,
      "eval_ag_news_n_ngrams_match_2": 2.752,
      "eval_ag_news_n_ngrams_match_3": 0.738,
      "eval_ag_news_num_pred_words": 45.822,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 41.58697502257866,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3249208746053033,
      "eval_ag_news_runtime": 10.4504,
      "eval_ag_news_samples_per_second": 47.845,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.33131955345454434,
      "eval_ag_news_token_set_f1_sem": 0.004387695599378025,
      "eval_ag_news_token_set_precision": 0.30903356028252893,
      "eval_ag_news_token_set_recall": 0.3726080987666195,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 61875
    },
    {
      "epoch": 11.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.1073125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.894107461354934,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11269438968144171,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6332647800445557,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011704065244730883,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.40533709526062,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.76,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.73,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.632,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.328,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 30.124448821467606,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19550674397571874,
      "eval_anthropic_toxic_prompts_runtime": 10.4005,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.075,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.340139873600569,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00640672044392126,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4060256077091581,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3206711305859175,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 61875
    },
    {
      "epoch": 11.88,
      "eval_arxiv_accuracy": 0.33240625,
      "eval_arxiv_bleu_score": 3.6591203070377176,
      "eval_arxiv_bleu_score_sem": 0.09933155752252189,
      "eval_arxiv_emb_cos_sim": 0.7171277403831482,
      "eval_arxiv_emb_cos_sim_sem": 0.007953261120698031,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.576629400253296,
      "eval_arxiv_n_ngrams_match_1": 13.694,
      "eval_arxiv_n_ngrams_match_2": 2.41,
      "eval_arxiv_n_ngrams_match_3": 0.468,
      "eval_arxiv_num_pred_words": 39.536,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 35.75282904923117,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3262480438072417,
      "eval_arxiv_runtime": 9.6108,
      "eval_arxiv_samples_per_second": 52.025,
      "eval_arxiv_steps_per_second": 0.104,
      "eval_arxiv_token_set_f1": 0.3218910054823966,
      "eval_arxiv_token_set_f1_sem": 0.004263475240578928,
      "eval_arxiv_token_set_precision": 0.27046558492367223,
      "eval_arxiv_token_set_recall": 0.4189519441963203,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 61875
    },
    {
      "epoch": 11.88,
      "eval_python_code_alpaca_accuracy": 0.1515625,
      "eval_python_code_alpaca_bleu_score": 4.19423223548987,
      "eval_python_code_alpaca_bleu_score_sem": 0.14827497792935423,
      "eval_python_code_alpaca_emb_cos_sim": 0.7249279022216797,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01093777363561908,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.065692663192749,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.096,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.548,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.816,
      "eval_python_code_alpaca_num_pred_words": 43.448,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.449313993181665,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3050871355268122,
      "eval_python_code_alpaca_runtime": 9.6769,
      "eval_python_code_alpaca_samples_per_second": 51.67,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.44668352804230854,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005622410141082624,
      "eval_python_code_alpaca_token_set_precision": 0.49594354684894876,
      "eval_python_code_alpaca_token_set_recall": 0.43204885732612086,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 61875
    },
    {
      "epoch": 11.88,
      "eval_wikibio_accuracy": 0.31084375,
      "eval_wikibio_bleu_score": 5.605452686263078,
      "eval_wikibio_bleu_score_sem": 0.2040778845346227,
      "eval_wikibio_emb_cos_sim": 0.7090040445327759,
      "eval_wikibio_emb_cos_sim_sem": 0.011306478217659429,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9229464530944824,
      "eval_wikibio_n_ngrams_match_1": 9.508,
      "eval_wikibio_n_ngrams_match_2": 3.138,
      "eval_wikibio_n_ngrams_match_3": 1.156,
      "eval_wikibio_num_pred_words": 35.69,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 50.549166317470224,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3254971435215501,
      "eval_wikibio_runtime": 9.7933,
      "eval_wikibio_samples_per_second": 51.055,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.3017743562233243,
      "eval_wikibio_token_set_f1_sem": 0.005772128999832789,
      "eval_wikibio_token_set_precision": 0.30794965832482085,
      "eval_wikibio_token_set_recall": 0.31451164666341436,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 61875
    },
    {
      "epoch": 11.88,
      "eval_nq_accuracy": 0.50828125,
      "eval_nq_bleu_score": 10.529073664813733,
      "eval_nq_bleu_score_sem": 0.45185408175615854,
      "eval_nq_emb_cos_sim": 0.8136910796165466,
      "eval_nq_emb_cos_sim_sem": 0.007126814763504546,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3268489837646484,
      "eval_nq_n_ngrams_match_1": 21.9,
      "eval_nq_n_ngrams_match_2": 7.634,
      "eval_nq_n_ngrams_match_3": 3.426,
      "eval_nq_num_pred_words": 48.524,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.245606543200061,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4226750559246117,
      "eval_nq_runtime": 9.7738,
      "eval_nq_samples_per_second": 51.157,
      "eval_nq_steps_per_second": 0.102,
      "eval_nq_token_set_f1": 0.4407978412452672,
      "eval_nq_token_set_f1_sem": 0.004932712350449909,
      "eval_nq_token_set_precision": 0.3935349524457392,
      "eval_nq_token_set_recall": 0.5115663170727578,
      "eval_nq_true_num_tokens": 64.0,
      "step": 61875
    },
    {
      "epoch": 11.88,
      "learning_rate": 0.001,
      "loss": 2.693,
      "step": 61884
    },
    {
      "epoch": 11.88,
      "learning_rate": 0.001,
      "loss": 2.7015,
      "step": 61896
    },
    {
      "epoch": 11.89,
      "learning_rate": 0.001,
      "loss": 2.7013,
      "step": 61908
    },
    {
      "epoch": 11.89,
      "learning_rate": 0.001,
      "loss": 2.7005,
      "step": 61920
    },
    {
      "epoch": 11.89,
      "learning_rate": 0.001,
      "loss": 2.7085,
      "step": 61932
    },
    {
      "epoch": 11.89,
      "learning_rate": 0.001,
      "loss": 2.7053,
      "step": 61944
    },
    {
      "epoch": 11.9,
      "learning_rate": 0.001,
      "loss": 2.7046,
      "step": 61956
    },
    {
      "epoch": 11.9,
      "learning_rate": 0.001,
      "loss": 2.6943,
      "step": 61968
    },
    {
      "epoch": 11.9,
      "learning_rate": 0.001,
      "loss": 2.6963,
      "step": 61980
    },
    {
      "epoch": 11.9,
      "learning_rate": 0.001,
      "loss": 2.6982,
      "step": 61992
    },
    {
      "epoch": 11.91,
      "learning_rate": 0.001,
      "loss": 2.7007,
      "step": 62004
    },
    {
      "epoch": 11.91,
      "learning_rate": 0.001,
      "loss": 2.7096,
      "step": 62016
    },
    {
      "epoch": 11.91,
      "learning_rate": 0.001,
      "loss": 2.6999,
      "step": 62028
    },
    {
      "epoch": 11.91,
      "learning_rate": 0.001,
      "loss": 2.7005,
      "step": 62040
    },
    {
      "epoch": 11.91,
      "learning_rate": 0.001,
      "loss": 2.7057,
      "step": 62052
    },
    {
      "epoch": 11.92,
      "learning_rate": 0.001,
      "loss": 2.7108,
      "step": 62064
    },
    {
      "epoch": 11.92,
      "learning_rate": 0.001,
      "loss": 2.7008,
      "step": 62076
    },
    {
      "epoch": 11.92,
      "learning_rate": 0.001,
      "loss": 2.7008,
      "step": 62088
    },
    {
      "epoch": 11.92,
      "learning_rate": 0.001,
      "loss": 2.7064,
      "step": 62100
    },
    {
      "epoch": 11.93,
      "learning_rate": 0.001,
      "loss": 2.7059,
      "step": 62112
    },
    {
      "epoch": 11.93,
      "learning_rate": 0.001,
      "loss": 2.6984,
      "step": 62124
    },
    {
      "epoch": 11.93,
      "learning_rate": 0.001,
      "loss": 2.6981,
      "step": 62136
    },
    {
      "epoch": 11.93,
      "learning_rate": 0.001,
      "loss": 2.7013,
      "step": 62148
    },
    {
      "epoch": 11.94,
      "learning_rate": 0.001,
      "loss": 2.696,
      "step": 62160
    },
    {
      "epoch": 11.94,
      "learning_rate": 0.001,
      "loss": 2.6973,
      "step": 62172
    },
    {
      "epoch": 11.94,
      "learning_rate": 0.001,
      "loss": 2.6913,
      "step": 62184
    },
    {
      "epoch": 11.94,
      "learning_rate": 0.001,
      "loss": 2.703,
      "step": 62196
    },
    {
      "epoch": 11.94,
      "learning_rate": 0.001,
      "loss": 2.6943,
      "step": 62208
    },
    {
      "epoch": 11.95,
      "learning_rate": 0.001,
      "loss": 2.699,
      "step": 62220
    },
    {
      "epoch": 11.95,
      "learning_rate": 0.001,
      "loss": 2.6943,
      "step": 62232
    },
    {
      "epoch": 11.95,
      "learning_rate": 0.001,
      "loss": 2.6976,
      "step": 62244
    },
    {
      "epoch": 11.95,
      "learning_rate": 0.001,
      "loss": 2.7005,
      "step": 62256
    },
    {
      "epoch": 11.96,
      "learning_rate": 0.001,
      "loss": 2.6908,
      "step": 62268
    },
    {
      "epoch": 11.96,
      "learning_rate": 0.001,
      "loss": 2.6996,
      "step": 62280
    },
    {
      "epoch": 11.96,
      "learning_rate": 0.001,
      "loss": 2.6979,
      "step": 62292
    },
    {
      "epoch": 11.96,
      "learning_rate": 0.001,
      "loss": 2.7052,
      "step": 62304
    },
    {
      "epoch": 11.97,
      "learning_rate": 0.001,
      "loss": 2.6867,
      "step": 62316
    },
    {
      "epoch": 11.97,
      "learning_rate": 0.001,
      "loss": 2.7018,
      "step": 62328
    },
    {
      "epoch": 11.97,
      "learning_rate": 0.001,
      "loss": 2.6935,
      "step": 62340
    },
    {
      "epoch": 11.97,
      "learning_rate": 0.001,
      "loss": 2.6965,
      "step": 62352
    },
    {
      "epoch": 11.97,
      "learning_rate": 0.001,
      "loss": 2.6965,
      "step": 62364
    },
    {
      "epoch": 11.98,
      "learning_rate": 0.001,
      "loss": 2.7123,
      "step": 62376
    },
    {
      "epoch": 11.98,
      "learning_rate": 0.001,
      "loss": 2.7131,
      "step": 62388
    },
    {
      "epoch": 11.98,
      "learning_rate": 0.001,
      "loss": 2.7006,
      "step": 62400
    },
    {
      "epoch": 11.98,
      "learning_rate": 0.001,
      "loss": 2.7059,
      "step": 62412
    },
    {
      "epoch": 11.99,
      "learning_rate": 0.001,
      "loss": 2.695,
      "step": 62424
    },
    {
      "epoch": 11.99,
      "learning_rate": 0.001,
      "loss": 2.697,
      "step": 62436
    },
    {
      "epoch": 11.99,
      "learning_rate": 0.001,
      "loss": 2.6974,
      "step": 62448
    },
    {
      "epoch": 11.99,
      "learning_rate": 0.001,
      "loss": 2.6975,
      "step": 62460
    },
    {
      "epoch": 12.0,
      "learning_rate": 0.001,
      "loss": 2.7018,
      "step": 62472
    },
    {
      "epoch": 12.0,
      "learning_rate": 0.001,
      "loss": 2.7047,
      "step": 62484
    },
    {
      "epoch": 12.0,
      "learning_rate": 0.001,
      "loss": 2.6964,
      "step": 62496
    },
    {
      "epoch": 12.0,
      "eval_ag_news_accuracy": 0.307625,
      "eval_ag_news_bleu_score": 4.68007545837061,
      "eval_ag_news_bleu_score_sem": 0.16188845797945378,
      "eval_ag_news_emb_cos_sim": 0.7910676002502441,
      "eval_ag_news_emb_cos_sim_sem": 0.007531064285363262,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.719710111618042,
      "eval_ag_news_n_ngrams_match_1": 13.346,
      "eval_ag_news_n_ngrams_match_2": 2.89,
      "eval_ag_news_n_ngrams_match_3": 0.874,
      "eval_ag_news_num_pred_words": 46.434,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 41.25243377383498,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3275251362113679,
      "eval_ag_news_runtime": 10.14,
      "eval_ag_news_samples_per_second": 49.31,
      "eval_ag_news_steps_per_second": 0.099,
      "eval_ag_news_token_set_f1": 0.3330696441627122,
      "eval_ag_news_token_set_f1_sem": 0.004532917870886484,
      "eval_ag_news_token_set_precision": 0.31643165089656067,
      "eval_ag_news_token_set_recall": 0.36630463158880894,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 62500
    },
    {
      "epoch": 12.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.107375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.803321481154465,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10794850811278522,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6432708501815796,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010721092736157525,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3977503776550293,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.78,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.694,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.582,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.292,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.896767902728254,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19830859470201195,
      "eval_anthropic_toxic_prompts_runtime": 18.9495,
      "eval_anthropic_toxic_prompts_samples_per_second": 26.386,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.053,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.343403365396251,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006609813134501554,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.40842633033296516,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32896706560018374,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 62500
    },
    {
      "epoch": 12.0,
      "eval_arxiv_accuracy": 0.33053125,
      "eval_arxiv_bleu_score": 3.914448073952949,
      "eval_arxiv_bleu_score_sem": 0.10625763105012422,
      "eval_arxiv_emb_cos_sim": 0.734485924243927,
      "eval_arxiv_emb_cos_sim_sem": 0.008628724295635335,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.58801531791687,
      "eval_arxiv_n_ngrams_match_1": 14.176,
      "eval_arxiv_n_ngrams_match_2": 2.602,
      "eval_arxiv_n_ngrams_match_3": 0.55,
      "eval_arxiv_num_pred_words": 41.322,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 36.162234120341,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3328358601305305,
      "eval_arxiv_runtime": 9.933,
      "eval_arxiv_samples_per_second": 50.337,
      "eval_arxiv_steps_per_second": 0.101,
      "eval_arxiv_token_set_f1": 0.329522240663839,
      "eval_arxiv_token_set_f1_sem": 0.004276015650791637,
      "eval_arxiv_token_set_precision": 0.27988477502890413,
      "eval_arxiv_token_set_recall": 0.4206770548598357,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 62500
    },
    {
      "epoch": 12.0,
      "eval_python_code_alpaca_accuracy": 0.15496875,
      "eval_python_code_alpaca_bleu_score": 4.102021700501079,
      "eval_python_code_alpaca_bleu_score_sem": 0.11913653363901756,
      "eval_python_code_alpaca_emb_cos_sim": 0.7436040639877319,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007712727739157343,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.028447151184082,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.478,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.656,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.83,
      "eval_python_code_alpaca_num_pred_words": 44.92,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.66511785738537,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3096689734305772,
      "eval_python_code_alpaca_runtime": 9.5782,
      "eval_python_code_alpaca_samples_per_second": 52.202,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.46491441225281094,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005213825815394596,
      "eval_python_code_alpaca_token_set_precision": 0.5197417528040283,
      "eval_python_code_alpaca_token_set_recall": 0.4421537842720535,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 62500
    },
    {
      "epoch": 12.0,
      "eval_wikibio_accuracy": 0.3054375,
      "eval_wikibio_bleu_score": 5.592110166892952,
      "eval_wikibio_bleu_score_sem": 0.18362691976756512,
      "eval_wikibio_emb_cos_sim": 0.7340899705886841,
      "eval_wikibio_emb_cos_sim_sem": 0.009408251990813084,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9633853435516357,
      "eval_wikibio_n_ngrams_match_1": 10.1,
      "eval_wikibio_n_ngrams_match_2": 3.288,
      "eval_wikibio_n_ngrams_match_3": 1.154,
      "eval_wikibio_num_pred_words": 37.666,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 52.63521295365857,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3465202796454441,
      "eval_wikibio_runtime": 10.7323,
      "eval_wikibio_samples_per_second": 46.588,
      "eval_wikibio_steps_per_second": 0.093,
      "eval_wikibio_token_set_f1": 0.3123881070814429,
      "eval_wikibio_token_set_f1_sem": 0.005081583372173606,
      "eval_wikibio_token_set_precision": 0.3261076819238875,
      "eval_wikibio_token_set_recall": 0.3126992653024321,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 62500
    },
    {
      "epoch": 12.0,
      "eval_nq_accuracy": 0.5085,
      "eval_nq_bleu_score": 10.936657635862593,
      "eval_nq_bleu_score_sem": 0.4680075246123519,
      "eval_nq_emb_cos_sim": 0.8158445358276367,
      "eval_nq_emb_cos_sim_sem": 0.007262077171266165,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3249571323394775,
      "eval_nq_n_ngrams_match_1": 22.22,
      "eval_nq_n_ngrams_match_2": 7.848,
      "eval_nq_n_ngrams_match_3": 3.62,
      "eval_nq_num_pred_words": 48.78,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.226241701337134,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42811813612744265,
      "eval_nq_runtime": 10.037,
      "eval_nq_samples_per_second": 49.816,
      "eval_nq_steps_per_second": 0.1,
      "eval_nq_token_set_f1": 0.44453237326395706,
      "eval_nq_token_set_f1_sem": 0.0050346187589386995,
      "eval_nq_token_set_precision": 0.39949567037331113,
      "eval_nq_token_set_recall": 0.5110357294680288,
      "eval_nq_true_num_tokens": 64.0,
      "step": 62500
    },
    {
      "epoch": 12.0,
      "learning_rate": 0.001,
      "loss": 2.6933,
      "step": 62508
    },
    {
      "epoch": 12.0,
      "learning_rate": 0.001,
      "loss": 2.6819,
      "step": 62520
    },
    {
      "epoch": 12.01,
      "learning_rate": 0.001,
      "loss": 2.6879,
      "step": 62532
    },
    {
      "epoch": 12.01,
      "learning_rate": 0.001,
      "loss": 2.6913,
      "step": 62544
    },
    {
      "epoch": 12.01,
      "learning_rate": 0.001,
      "loss": 2.6883,
      "step": 62556
    },
    {
      "epoch": 12.01,
      "learning_rate": 0.001,
      "loss": 2.6839,
      "step": 62568
    },
    {
      "epoch": 12.02,
      "learning_rate": 0.001,
      "loss": 2.6847,
      "step": 62580
    },
    {
      "epoch": 12.02,
      "learning_rate": 0.001,
      "loss": 2.6856,
      "step": 62592
    },
    {
      "epoch": 12.02,
      "learning_rate": 0.001,
      "loss": 2.6807,
      "step": 62604
    },
    {
      "epoch": 12.02,
      "learning_rate": 0.001,
      "loss": 2.6815,
      "step": 62616
    },
    {
      "epoch": 12.03,
      "learning_rate": 0.001,
      "loss": 2.6906,
      "step": 62628
    },
    {
      "epoch": 12.03,
      "learning_rate": 0.001,
      "loss": 2.668,
      "step": 62640
    },
    {
      "epoch": 12.03,
      "learning_rate": 0.001,
      "loss": 2.6897,
      "step": 62652
    },
    {
      "epoch": 12.03,
      "learning_rate": 0.001,
      "loss": 2.6769,
      "step": 62664
    },
    {
      "epoch": 12.03,
      "learning_rate": 0.001,
      "loss": 2.6791,
      "step": 62676
    },
    {
      "epoch": 12.04,
      "learning_rate": 0.001,
      "loss": 2.6782,
      "step": 62688
    },
    {
      "epoch": 12.04,
      "learning_rate": 0.001,
      "loss": 2.6762,
      "step": 62700
    },
    {
      "epoch": 12.04,
      "learning_rate": 0.001,
      "loss": 2.6817,
      "step": 62712
    },
    {
      "epoch": 12.04,
      "learning_rate": 0.001,
      "loss": 2.6954,
      "step": 62724
    },
    {
      "epoch": 12.05,
      "learning_rate": 0.001,
      "loss": 2.6888,
      "step": 62736
    },
    {
      "epoch": 12.05,
      "learning_rate": 0.001,
      "loss": 2.6779,
      "step": 62748
    },
    {
      "epoch": 12.05,
      "learning_rate": 0.001,
      "loss": 2.6978,
      "step": 62760
    },
    {
      "epoch": 12.05,
      "learning_rate": 0.001,
      "loss": 2.6851,
      "step": 62772
    },
    {
      "epoch": 12.06,
      "learning_rate": 0.001,
      "loss": 2.6896,
      "step": 62784
    },
    {
      "epoch": 12.06,
      "learning_rate": 0.001,
      "loss": 2.6792,
      "step": 62796
    },
    {
      "epoch": 12.06,
      "learning_rate": 0.001,
      "loss": 2.6835,
      "step": 62808
    },
    {
      "epoch": 12.06,
      "learning_rate": 0.001,
      "loss": 2.6743,
      "step": 62820
    },
    {
      "epoch": 12.06,
      "learning_rate": 0.001,
      "loss": 2.6724,
      "step": 62832
    },
    {
      "epoch": 12.07,
      "learning_rate": 0.001,
      "loss": 2.6899,
      "step": 62844
    },
    {
      "epoch": 12.07,
      "learning_rate": 0.001,
      "loss": 2.6925,
      "step": 62856
    },
    {
      "epoch": 12.07,
      "learning_rate": 0.001,
      "loss": 2.6993,
      "step": 62868
    },
    {
      "epoch": 12.07,
      "learning_rate": 0.001,
      "loss": 2.6914,
      "step": 62880
    },
    {
      "epoch": 12.08,
      "learning_rate": 0.001,
      "loss": 2.6853,
      "step": 62892
    },
    {
      "epoch": 12.08,
      "learning_rate": 0.001,
      "loss": 2.6852,
      "step": 62904
    },
    {
      "epoch": 12.08,
      "learning_rate": 0.001,
      "loss": 2.6885,
      "step": 62916
    },
    {
      "epoch": 12.08,
      "learning_rate": 0.001,
      "loss": 2.6994,
      "step": 62928
    },
    {
      "epoch": 12.09,
      "learning_rate": 0.001,
      "loss": 2.6807,
      "step": 62940
    },
    {
      "epoch": 12.09,
      "learning_rate": 0.001,
      "loss": 2.6899,
      "step": 62952
    },
    {
      "epoch": 12.09,
      "learning_rate": 0.001,
      "loss": 2.6872,
      "step": 62964
    },
    {
      "epoch": 12.09,
      "learning_rate": 0.001,
      "loss": 2.6832,
      "step": 62976
    },
    {
      "epoch": 12.09,
      "learning_rate": 0.001,
      "loss": 2.6891,
      "step": 62988
    },
    {
      "epoch": 12.1,
      "learning_rate": 0.001,
      "loss": 2.6916,
      "step": 63000
    },
    {
      "epoch": 12.1,
      "learning_rate": 0.001,
      "loss": 2.6931,
      "step": 63012
    },
    {
      "epoch": 12.1,
      "learning_rate": 0.001,
      "loss": 2.6927,
      "step": 63024
    },
    {
      "epoch": 12.1,
      "learning_rate": 0.001,
      "loss": 2.6822,
      "step": 63036
    },
    {
      "epoch": 12.11,
      "learning_rate": 0.001,
      "loss": 2.6804,
      "step": 63048
    },
    {
      "epoch": 12.11,
      "learning_rate": 0.001,
      "loss": 2.6849,
      "step": 63060
    },
    {
      "epoch": 12.11,
      "learning_rate": 0.001,
      "loss": 2.6888,
      "step": 63072
    },
    {
      "epoch": 12.11,
      "learning_rate": 0.001,
      "loss": 2.6885,
      "step": 63084
    },
    {
      "epoch": 12.12,
      "learning_rate": 0.001,
      "loss": 2.6834,
      "step": 63096
    },
    {
      "epoch": 12.12,
      "learning_rate": 0.001,
      "loss": 2.6814,
      "step": 63108
    },
    {
      "epoch": 12.12,
      "learning_rate": 0.001,
      "loss": 2.6914,
      "step": 63120
    },
    {
      "epoch": 12.12,
      "eval_ag_news_accuracy": 0.30765625,
      "eval_ag_news_bleu_score": 4.500774113097772,
      "eval_ag_news_bleu_score_sem": 0.14991835824230856,
      "eval_ag_news_emb_cos_sim": 0.7676204442977905,
      "eval_ag_news_emb_cos_sim_sem": 0.009299637586566985,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.72332501411438,
      "eval_ag_news_n_ngrams_match_1": 13.008,
      "eval_ag_news_n_ngrams_match_2": 2.758,
      "eval_ag_news_n_ngrams_match_3": 0.82,
      "eval_ag_news_num_pred_words": 46.208,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 41.401827158239755,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3203681514124064,
      "eval_ag_news_runtime": 9.9019,
      "eval_ag_news_samples_per_second": 50.495,
      "eval_ag_news_steps_per_second": 0.101,
      "eval_ag_news_token_set_f1": 0.32805322593426034,
      "eval_ag_news_token_set_f1_sem": 0.004449823759491081,
      "eval_ag_news_token_set_precision": 0.30871942973786576,
      "eval_ag_news_token_set_recall": 0.36458571455171096,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 63125
    },
    {
      "epoch": 12.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.10815625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7453645696437197,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1045175466995382,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6418386697769165,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00901131099590396,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3990821838378906,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.688,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.646,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.57,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.066,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.936611128906975,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19742908877305798,
      "eval_anthropic_toxic_prompts_runtime": 9.6915,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.592,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3343427336566926,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006309426140482984,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.40185734774643495,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31494107345323447,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 63125
    },
    {
      "epoch": 12.12,
      "eval_arxiv_accuracy": 0.334375,
      "eval_arxiv_bleu_score": 4.050790247736062,
      "eval_arxiv_bleu_score_sem": 0.11821522845329553,
      "eval_arxiv_emb_cos_sim": 0.7343416213989258,
      "eval_arxiv_emb_cos_sim_sem": 0.007763726742944541,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.566161870956421,
      "eval_arxiv_n_ngrams_match_1": 14.202,
      "eval_arxiv_n_ngrams_match_2": 2.712,
      "eval_arxiv_n_ngrams_match_3": 0.584,
      "eval_arxiv_num_pred_words": 40.534,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 35.38053715119034,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33711308866143963,
      "eval_arxiv_runtime": 10.412,
      "eval_arxiv_samples_per_second": 48.022,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.3304784644388723,
      "eval_arxiv_token_set_f1_sem": 0.004335269346582855,
      "eval_arxiv_token_set_precision": 0.27954310258673626,
      "eval_arxiv_token_set_recall": 0.4211488600504229,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 63125
    },
    {
      "epoch": 12.12,
      "eval_python_code_alpaca_accuracy": 0.15228125,
      "eval_python_code_alpaca_bleu_score": 3.9767444781405454,
      "eval_python_code_alpaca_bleu_score_sem": 0.12338299527737924,
      "eval_python_code_alpaca_emb_cos_sim": 0.7335104942321777,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008337650257285004,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.047879695892334,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.212,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.51,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.706,
      "eval_python_code_alpaca_num_pred_words": 43.646,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.07062090151464,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30927741127756464,
      "eval_python_code_alpaca_runtime": 9.7784,
      "eval_python_code_alpaca_samples_per_second": 51.133,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.45001914824801587,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005346833320944668,
      "eval_python_code_alpaca_token_set_precision": 0.49766660024009,
      "eval_python_code_alpaca_token_set_recall": 0.43293383592227264,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 63125
    },
    {
      "epoch": 12.12,
      "eval_wikibio_accuracy": 0.3104375,
      "eval_wikibio_bleu_score": 5.333561104118382,
      "eval_wikibio_bleu_score_sem": 0.19206275419922791,
      "eval_wikibio_emb_cos_sim": 0.7087751626968384,
      "eval_wikibio_emb_cos_sim_sem": 0.010300922900818327,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9345104694366455,
      "eval_wikibio_n_ngrams_match_1": 9.556,
      "eval_wikibio_n_ngrams_match_2": 3.09,
      "eval_wikibio_n_ngrams_match_3": 1.082,
      "eval_wikibio_num_pred_words": 36.578,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 51.13711064981968,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32927955702236456,
      "eval_wikibio_runtime": 10.4402,
      "eval_wikibio_samples_per_second": 47.892,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.3007899495159135,
      "eval_wikibio_token_set_f1_sem": 0.005768116716028562,
      "eval_wikibio_token_set_precision": 0.3113908594225259,
      "eval_wikibio_token_set_recall": 0.3073077821857895,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 63125
    },
    {
      "epoch": 12.12,
      "eval_nq_accuracy": 0.50765625,
      "eval_nq_bleu_score": 10.826749900962302,
      "eval_nq_bleu_score_sem": 0.45546872919723896,
      "eval_nq_emb_cos_sim": 0.8152284622192383,
      "eval_nq_emb_cos_sim_sem": 0.007648327354632697,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.325967788696289,
      "eval_nq_n_ngrams_match_1": 22.304,
      "eval_nq_n_ngrams_match_2": 7.932,
      "eval_nq_n_ngrams_match_3": 3.534,
      "eval_nq_num_pred_words": 48.744,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.236582141954736,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43105537576589414,
      "eval_nq_runtime": 10.9677,
      "eval_nq_samples_per_second": 45.589,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4460166092159248,
      "eval_nq_token_set_f1_sem": 0.004898522956187676,
      "eval_nq_token_set_precision": 0.40118708490219224,
      "eval_nq_token_set_recall": 0.5097889655947345,
      "eval_nq_true_num_tokens": 64.0,
      "step": 63125
    },
    {
      "epoch": 12.12,
      "learning_rate": 0.001,
      "loss": 2.6712,
      "step": 63132
    },
    {
      "epoch": 12.12,
      "learning_rate": 0.001,
      "loss": 2.6883,
      "step": 63144
    },
    {
      "epoch": 12.13,
      "learning_rate": 0.001,
      "loss": 2.6891,
      "step": 63156
    },
    {
      "epoch": 12.13,
      "learning_rate": 0.001,
      "loss": 2.6835,
      "step": 63168
    },
    {
      "epoch": 12.13,
      "learning_rate": 0.001,
      "loss": 2.6864,
      "step": 63180
    },
    {
      "epoch": 12.13,
      "learning_rate": 0.001,
      "loss": 2.6788,
      "step": 63192
    },
    {
      "epoch": 12.14,
      "learning_rate": 0.001,
      "loss": 2.674,
      "step": 63204
    },
    {
      "epoch": 12.14,
      "learning_rate": 0.001,
      "loss": 2.6813,
      "step": 63216
    },
    {
      "epoch": 12.14,
      "learning_rate": 0.001,
      "loss": 2.6812,
      "step": 63228
    },
    {
      "epoch": 12.14,
      "learning_rate": 0.001,
      "loss": 2.6826,
      "step": 63240
    },
    {
      "epoch": 12.15,
      "learning_rate": 0.001,
      "loss": 2.6801,
      "step": 63252
    },
    {
      "epoch": 12.15,
      "learning_rate": 0.001,
      "loss": 2.6855,
      "step": 63264
    },
    {
      "epoch": 12.15,
      "learning_rate": 0.001,
      "loss": 2.6982,
      "step": 63276
    },
    {
      "epoch": 12.15,
      "learning_rate": 0.001,
      "loss": 2.6845,
      "step": 63288
    },
    {
      "epoch": 12.15,
      "learning_rate": 0.001,
      "loss": 2.6793,
      "step": 63300
    },
    {
      "epoch": 12.16,
      "learning_rate": 0.001,
      "loss": 2.6855,
      "step": 63312
    },
    {
      "epoch": 12.16,
      "learning_rate": 0.001,
      "loss": 2.6841,
      "step": 63324
    },
    {
      "epoch": 12.16,
      "learning_rate": 0.001,
      "loss": 2.6852,
      "step": 63336
    },
    {
      "epoch": 12.16,
      "learning_rate": 0.001,
      "loss": 2.6797,
      "step": 63348
    },
    {
      "epoch": 12.17,
      "learning_rate": 0.001,
      "loss": 2.6775,
      "step": 63360
    },
    {
      "epoch": 12.17,
      "learning_rate": 0.001,
      "loss": 2.6876,
      "step": 63372
    },
    {
      "epoch": 12.17,
      "learning_rate": 0.001,
      "loss": 2.6904,
      "step": 63384
    },
    {
      "epoch": 12.17,
      "learning_rate": 0.001,
      "loss": 2.6826,
      "step": 63396
    },
    {
      "epoch": 12.18,
      "learning_rate": 0.001,
      "loss": 2.6827,
      "step": 63408
    },
    {
      "epoch": 12.18,
      "learning_rate": 0.001,
      "loss": 2.6751,
      "step": 63420
    },
    {
      "epoch": 12.18,
      "learning_rate": 0.001,
      "loss": 2.6682,
      "step": 63432
    },
    {
      "epoch": 12.18,
      "learning_rate": 0.001,
      "loss": 2.6868,
      "step": 63444
    },
    {
      "epoch": 12.18,
      "learning_rate": 0.001,
      "loss": 2.6698,
      "step": 63456
    },
    {
      "epoch": 12.19,
      "learning_rate": 0.001,
      "loss": 2.6884,
      "step": 63468
    },
    {
      "epoch": 12.19,
      "learning_rate": 0.001,
      "loss": 2.678,
      "step": 63480
    },
    {
      "epoch": 12.19,
      "learning_rate": 0.001,
      "loss": 2.6907,
      "step": 63492
    },
    {
      "epoch": 12.19,
      "learning_rate": 0.001,
      "loss": 2.693,
      "step": 63504
    },
    {
      "epoch": 12.2,
      "learning_rate": 0.001,
      "loss": 2.6939,
      "step": 63516
    },
    {
      "epoch": 12.2,
      "learning_rate": 0.001,
      "loss": 2.682,
      "step": 63528
    },
    {
      "epoch": 12.2,
      "learning_rate": 0.001,
      "loss": 2.6824,
      "step": 63540
    },
    {
      "epoch": 12.2,
      "learning_rate": 0.001,
      "loss": 2.6738,
      "step": 63552
    },
    {
      "epoch": 12.21,
      "learning_rate": 0.001,
      "loss": 2.6951,
      "step": 63564
    },
    {
      "epoch": 12.21,
      "learning_rate": 0.001,
      "loss": 2.6999,
      "step": 63576
    },
    {
      "epoch": 12.21,
      "learning_rate": 0.001,
      "loss": 2.6889,
      "step": 63588
    },
    {
      "epoch": 12.21,
      "learning_rate": 0.001,
      "loss": 2.685,
      "step": 63600
    },
    {
      "epoch": 12.21,
      "learning_rate": 0.001,
      "loss": 2.6817,
      "step": 63612
    },
    {
      "epoch": 12.22,
      "learning_rate": 0.001,
      "loss": 2.6827,
      "step": 63624
    },
    {
      "epoch": 12.22,
      "learning_rate": 0.001,
      "loss": 2.6764,
      "step": 63636
    },
    {
      "epoch": 12.22,
      "learning_rate": 0.001,
      "loss": 2.6874,
      "step": 63648
    },
    {
      "epoch": 12.22,
      "learning_rate": 0.001,
      "loss": 2.6754,
      "step": 63660
    },
    {
      "epoch": 12.23,
      "learning_rate": 0.001,
      "loss": 2.6842,
      "step": 63672
    },
    {
      "epoch": 12.23,
      "learning_rate": 0.001,
      "loss": 2.6911,
      "step": 63684
    },
    {
      "epoch": 12.23,
      "learning_rate": 0.001,
      "loss": 2.688,
      "step": 63696
    },
    {
      "epoch": 12.23,
      "learning_rate": 0.001,
      "loss": 2.6793,
      "step": 63708
    },
    {
      "epoch": 12.24,
      "learning_rate": 0.001,
      "loss": 2.6845,
      "step": 63720
    },
    {
      "epoch": 12.24,
      "learning_rate": 0.001,
      "loss": 2.6913,
      "step": 63732
    },
    {
      "epoch": 12.24,
      "learning_rate": 0.001,
      "loss": 2.681,
      "step": 63744
    },
    {
      "epoch": 12.24,
      "eval_ag_news_accuracy": 0.308125,
      "eval_ag_news_bleu_score": 4.526321771088117,
      "eval_ag_news_bleu_score_sem": 0.14523036454130234,
      "eval_ag_news_emb_cos_sim": 0.7819414734840393,
      "eval_ag_news_emb_cos_sim_sem": 0.008212183699079629,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7093119621276855,
      "eval_ag_news_n_ngrams_match_1": 13.472,
      "eval_ag_news_n_ngrams_match_2": 2.864,
      "eval_ag_news_n_ngrams_match_3": 0.84,
      "eval_ag_news_num_pred_words": 47.08,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.825707228670716,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3281437382031718,
      "eval_ag_news_runtime": 10.5789,
      "eval_ag_news_samples_per_second": 47.264,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.33616763176934533,
      "eval_ag_news_token_set_f1_sem": 0.004305632092358061,
      "eval_ag_news_token_set_precision": 0.3182175047987883,
      "eval_ag_news_token_set_recall": 0.37064655863085927,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 63750
    },
    {
      "epoch": 12.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.10715625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.853472604684086,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12340967297437792,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6327046751976013,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010196941357185888,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.4093167781829834,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.742,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.678,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.604,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.192,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 30.24457344640401,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19734087615842322,
      "eval_anthropic_toxic_prompts_runtime": 10.112,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.446,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33552012610640597,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00636690026165165,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.39787839870433245,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3181068143485201,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 63750
    },
    {
      "epoch": 12.24,
      "eval_arxiv_accuracy": 0.3333125,
      "eval_arxiv_bleu_score": 3.912833552030698,
      "eval_arxiv_bleu_score_sem": 0.11089804161981101,
      "eval_arxiv_emb_cos_sim": 0.734173059463501,
      "eval_arxiv_emb_cos_sim_sem": 0.008368075013553807,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5636889934539795,
      "eval_arxiv_n_ngrams_match_1": 14.122,
      "eval_arxiv_n_ngrams_match_2": 2.602,
      "eval_arxiv_n_ngrams_match_3": 0.548,
      "eval_arxiv_num_pred_words": 39.988,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 35.29315350590027,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3341879957728653,
      "eval_arxiv_runtime": 10.7211,
      "eval_arxiv_samples_per_second": 46.637,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.3280883759585713,
      "eval_arxiv_token_set_f1_sem": 0.004342192611877894,
      "eval_arxiv_token_set_precision": 0.2769834131744744,
      "eval_arxiv_token_set_recall": 0.42032526692521877,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 63750
    },
    {
      "epoch": 12.24,
      "eval_python_code_alpaca_accuracy": 0.152375,
      "eval_python_code_alpaca_bleu_score": 4.195882689869561,
      "eval_python_code_alpaca_bleu_score_sem": 0.12388838000149487,
      "eval_python_code_alpaca_emb_cos_sim": 0.7241489887237549,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010407032860300567,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.031872034072876,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.258,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.616,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.816,
      "eval_python_code_alpaca_num_pred_words": 42.832,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.736014803498136,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31347888850576733,
      "eval_python_code_alpaca_runtime": 9.7159,
      "eval_python_code_alpaca_samples_per_second": 51.462,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.4565831525685715,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005788586833613359,
      "eval_python_code_alpaca_token_set_precision": 0.5025236524526253,
      "eval_python_code_alpaca_token_set_recall": 0.44148507126294895,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 63750
    },
    {
      "epoch": 12.24,
      "eval_wikibio_accuracy": 0.3104375,
      "eval_wikibio_bleu_score": 5.340486692945748,
      "eval_wikibio_bleu_score_sem": 0.19693909310777077,
      "eval_wikibio_emb_cos_sim": 0.7221648693084717,
      "eval_wikibio_emb_cos_sim_sem": 0.010366201783716621,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9429478645324707,
      "eval_wikibio_n_ngrams_match_1": 9.544,
      "eval_wikibio_n_ngrams_match_2": 3.122,
      "eval_wikibio_n_ngrams_match_3": 1.092,
      "eval_wikibio_num_pred_words": 36.26,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 51.570400002688736,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.329705227825844,
      "eval_wikibio_runtime": 10.2723,
      "eval_wikibio_samples_per_second": 48.674,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3011053410463612,
      "eval_wikibio_token_set_f1_sem": 0.005862336197062749,
      "eval_wikibio_token_set_precision": 0.30746018767556144,
      "eval_wikibio_token_set_recall": 0.31231974359214926,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 63750
    },
    {
      "epoch": 12.24,
      "eval_nq_accuracy": 0.50884375,
      "eval_nq_bleu_score": 10.856529369101684,
      "eval_nq_bleu_score_sem": 0.4462073304957313,
      "eval_nq_emb_cos_sim": 0.8193342685699463,
      "eval_nq_emb_cos_sim_sem": 0.007123159182940734,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.320971965789795,
      "eval_nq_n_ngrams_match_1": 22.382,
      "eval_nq_n_ngrams_match_2": 7.954,
      "eval_nq_n_ngrams_match_3": 3.542,
      "eval_nq_num_pred_words": 48.964,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.185569521513363,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42934264662724003,
      "eval_nq_runtime": 12.2828,
      "eval_nq_samples_per_second": 40.707,
      "eval_nq_steps_per_second": 0.081,
      "eval_nq_token_set_f1": 0.4469274221234845,
      "eval_nq_token_set_f1_sem": 0.004985473419810356,
      "eval_nq_token_set_precision": 0.401762783571906,
      "eval_nq_token_set_recall": 0.5118534430117191,
      "eval_nq_true_num_tokens": 64.0,
      "step": 63750
    },
    {
      "epoch": 12.24,
      "learning_rate": 0.001,
      "loss": 2.6862,
      "step": 63756
    },
    {
      "epoch": 12.24,
      "learning_rate": 0.001,
      "loss": 2.6789,
      "step": 63768
    },
    {
      "epoch": 12.25,
      "learning_rate": 0.001,
      "loss": 2.6905,
      "step": 63780
    },
    {
      "epoch": 12.25,
      "learning_rate": 0.001,
      "loss": 2.6861,
      "step": 63792
    },
    {
      "epoch": 12.25,
      "learning_rate": 0.001,
      "loss": 2.6941,
      "step": 63804
    },
    {
      "epoch": 12.25,
      "learning_rate": 0.001,
      "loss": 2.6741,
      "step": 63816
    },
    {
      "epoch": 12.26,
      "learning_rate": 0.001,
      "loss": 2.6913,
      "step": 63828
    },
    {
      "epoch": 12.26,
      "learning_rate": 0.001,
      "loss": 2.6862,
      "step": 63840
    },
    {
      "epoch": 12.26,
      "learning_rate": 0.001,
      "loss": 2.6954,
      "step": 63852
    },
    {
      "epoch": 12.26,
      "learning_rate": 0.001,
      "loss": 2.6804,
      "step": 63864
    },
    {
      "epoch": 12.26,
      "learning_rate": 0.001,
      "loss": 2.6827,
      "step": 63876
    },
    {
      "epoch": 12.27,
      "learning_rate": 0.001,
      "loss": 2.6885,
      "step": 63888
    },
    {
      "epoch": 12.27,
      "learning_rate": 0.001,
      "loss": 2.6724,
      "step": 63900
    },
    {
      "epoch": 12.27,
      "learning_rate": 0.001,
      "loss": 2.6847,
      "step": 63912
    },
    {
      "epoch": 12.27,
      "learning_rate": 0.001,
      "loss": 2.6827,
      "step": 63924
    },
    {
      "epoch": 12.28,
      "learning_rate": 0.001,
      "loss": 2.6838,
      "step": 63936
    },
    {
      "epoch": 12.28,
      "learning_rate": 0.001,
      "loss": 2.676,
      "step": 63948
    },
    {
      "epoch": 12.28,
      "learning_rate": 0.001,
      "loss": 2.6865,
      "step": 63960
    },
    {
      "epoch": 12.28,
      "learning_rate": 0.001,
      "loss": 2.6863,
      "step": 63972
    },
    {
      "epoch": 12.29,
      "learning_rate": 0.001,
      "loss": 2.6908,
      "step": 63984
    },
    {
      "epoch": 12.29,
      "learning_rate": 0.001,
      "loss": 2.6884,
      "step": 63996
    },
    {
      "epoch": 12.29,
      "learning_rate": 0.001,
      "loss": 2.6836,
      "step": 64008
    },
    {
      "epoch": 12.29,
      "learning_rate": 0.001,
      "loss": 2.6993,
      "step": 64020
    },
    {
      "epoch": 12.29,
      "learning_rate": 0.001,
      "loss": 2.6807,
      "step": 64032
    },
    {
      "epoch": 12.3,
      "learning_rate": 0.001,
      "loss": 2.6832,
      "step": 64044
    },
    {
      "epoch": 12.3,
      "learning_rate": 0.001,
      "loss": 2.6927,
      "step": 64056
    },
    {
      "epoch": 12.3,
      "learning_rate": 0.001,
      "loss": 2.6902,
      "step": 64068
    },
    {
      "epoch": 12.3,
      "learning_rate": 0.001,
      "loss": 2.6935,
      "step": 64080
    },
    {
      "epoch": 12.31,
      "learning_rate": 0.001,
      "loss": 2.6822,
      "step": 64092
    },
    {
      "epoch": 12.31,
      "learning_rate": 0.001,
      "loss": 2.6848,
      "step": 64104
    },
    {
      "epoch": 12.31,
      "learning_rate": 0.001,
      "loss": 2.6864,
      "step": 64116
    },
    {
      "epoch": 12.31,
      "learning_rate": 0.001,
      "loss": 2.6906,
      "step": 64128
    },
    {
      "epoch": 12.32,
      "learning_rate": 0.001,
      "loss": 2.6806,
      "step": 64140
    },
    {
      "epoch": 12.32,
      "learning_rate": 0.001,
      "loss": 2.6799,
      "step": 64152
    },
    {
      "epoch": 12.32,
      "learning_rate": 0.001,
      "loss": 2.6771,
      "step": 64164
    },
    {
      "epoch": 12.32,
      "learning_rate": 0.001,
      "loss": 2.6919,
      "step": 64176
    },
    {
      "epoch": 12.32,
      "learning_rate": 0.001,
      "loss": 2.6954,
      "step": 64188
    },
    {
      "epoch": 12.33,
      "learning_rate": 0.001,
      "loss": 2.6839,
      "step": 64200
    },
    {
      "epoch": 12.33,
      "learning_rate": 0.001,
      "loss": 2.6893,
      "step": 64212
    },
    {
      "epoch": 12.33,
      "learning_rate": 0.001,
      "loss": 2.6927,
      "step": 64224
    },
    {
      "epoch": 12.33,
      "learning_rate": 0.001,
      "loss": 2.6867,
      "step": 64236
    },
    {
      "epoch": 12.34,
      "learning_rate": 0.001,
      "loss": 2.6939,
      "step": 64248
    },
    {
      "epoch": 12.34,
      "learning_rate": 0.001,
      "loss": 2.6916,
      "step": 64260
    },
    {
      "epoch": 12.34,
      "learning_rate": 0.001,
      "loss": 2.6978,
      "step": 64272
    },
    {
      "epoch": 12.34,
      "learning_rate": 0.001,
      "loss": 2.6904,
      "step": 64284
    },
    {
      "epoch": 12.35,
      "learning_rate": 0.001,
      "loss": 2.6879,
      "step": 64296
    },
    {
      "epoch": 12.35,
      "learning_rate": 0.001,
      "loss": 2.6893,
      "step": 64308
    },
    {
      "epoch": 12.35,
      "learning_rate": 0.001,
      "loss": 2.687,
      "step": 64320
    },
    {
      "epoch": 12.35,
      "learning_rate": 0.001,
      "loss": 2.7007,
      "step": 64332
    },
    {
      "epoch": 12.35,
      "learning_rate": 0.001,
      "loss": 2.6968,
      "step": 64344
    },
    {
      "epoch": 12.36,
      "learning_rate": 0.001,
      "loss": 2.6867,
      "step": 64356
    },
    {
      "epoch": 12.36,
      "learning_rate": 0.001,
      "loss": 2.6807,
      "step": 64368
    },
    {
      "epoch": 12.36,
      "eval_ag_news_accuracy": 0.3079375,
      "eval_ag_news_bleu_score": 4.3634836705640465,
      "eval_ag_news_bleu_score_sem": 0.14205787801373654,
      "eval_ag_news_emb_cos_sim": 0.77836012840271,
      "eval_ag_news_emb_cos_sim_sem": 0.008324039459394018,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7089154720306396,
      "eval_ag_news_n_ngrams_match_1": 13.274,
      "eval_ag_news_n_ngrams_match_2": 2.708,
      "eval_ag_news_n_ngrams_match_3": 0.692,
      "eval_ag_news_num_pred_words": 46.342,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.80952344861593,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.325656206203789,
      "eval_ag_news_runtime": 10.1924,
      "eval_ag_news_samples_per_second": 49.056,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.33253575499116284,
      "eval_ag_news_token_set_f1_sem": 0.004387361291739367,
      "eval_ag_news_token_set_precision": 0.3129516059175739,
      "eval_ag_news_token_set_recall": 0.3715027590816579,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 64375
    },
    {
      "epoch": 12.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.1069375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8773458552155606,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12007661660760621,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6492405533790588,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009462986917594066,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3939576148986816,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.794,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.684,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.586,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.1,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.783591316532988,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20117097610108545,
      "eval_anthropic_toxic_prompts_runtime": 9.3237,
      "eval_anthropic_toxic_prompts_samples_per_second": 53.627,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.107,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3419428027184945,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006382811990095415,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4058209300151164,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32233275173455844,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 64375
    },
    {
      "epoch": 12.36,
      "eval_arxiv_accuracy": 0.3319375,
      "eval_arxiv_bleu_score": 4.02360358169393,
      "eval_arxiv_bleu_score_sem": 0.1192204689385041,
      "eval_arxiv_emb_cos_sim": 0.7298744916915894,
      "eval_arxiv_emb_cos_sim_sem": 0.009141886542402264,
      "eval_arxiv_emb_top1_equal": 0.1953125,
      "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.575408697128296,
      "eval_arxiv_n_ngrams_match_1": 14.124,
      "eval_arxiv_n_ngrams_match_2": 2.664,
      "eval_arxiv_n_ngrams_match_3": 0.588,
      "eval_arxiv_num_pred_words": 40.322,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 35.70921208618092,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33405053184170247,
      "eval_arxiv_runtime": 9.6852,
      "eval_arxiv_samples_per_second": 51.625,
      "eval_arxiv_steps_per_second": 0.103,
      "eval_arxiv_token_set_f1": 0.3324400308353056,
      "eval_arxiv_token_set_f1_sem": 0.004194391940256961,
      "eval_arxiv_token_set_precision": 0.28065966296273387,
      "eval_arxiv_token_set_recall": 0.4290765053135769,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 64375
    },
    {
      "epoch": 12.36,
      "eval_python_code_alpaca_accuracy": 0.15390625,
      "eval_python_code_alpaca_bleu_score": 4.005114341325162,
      "eval_python_code_alpaca_bleu_score_sem": 0.12485640490112654,
      "eval_python_code_alpaca_emb_cos_sim": 0.7366974949836731,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009228837466513764,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0346732139587402,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.072,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.47,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.756,
      "eval_python_code_alpaca_num_pred_words": 43.536,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.794181540791648,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30534934416496107,
      "eval_python_code_alpaca_runtime": 9.5481,
      "eval_python_code_alpaca_samples_per_second": 52.366,
      "eval_python_code_alpaca_steps_per_second": 0.105,
      "eval_python_code_alpaca_token_set_f1": 0.4553981555124146,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005578453409727007,
      "eval_python_code_alpaca_token_set_precision": 0.49367564897478905,
      "eval_python_code_alpaca_token_set_recall": 0.44832925593773626,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 64375
    },
    {
      "epoch": 12.36,
      "eval_wikibio_accuracy": 0.30709375,
      "eval_wikibio_bleu_score": 5.458663661379506,
      "eval_wikibio_bleu_score_sem": 0.20056702303355825,
      "eval_wikibio_emb_cos_sim": 0.7180017828941345,
      "eval_wikibio_emb_cos_sim_sem": 0.010004883436877807,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9794116020202637,
      "eval_wikibio_n_ngrams_match_1": 9.59,
      "eval_wikibio_n_ngrams_match_2": 3.124,
      "eval_wikibio_n_ngrams_match_3": 1.096,
      "eval_wikibio_num_pred_words": 36.012,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 53.485554174978105,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33128895330977126,
      "eval_wikibio_runtime": 13.0042,
      "eval_wikibio_samples_per_second": 38.449,
      "eval_wikibio_steps_per_second": 0.077,
      "eval_wikibio_token_set_f1": 0.3070327763150943,
      "eval_wikibio_token_set_f1_sem": 0.00568952249062667,
      "eval_wikibio_token_set_precision": 0.3138791538788945,
      "eval_wikibio_token_set_recall": 0.3200629318680838,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 64375
    },
    {
      "epoch": 12.36,
      "eval_nq_accuracy": 0.50953125,
      "eval_nq_bleu_score": 10.862770280777887,
      "eval_nq_bleu_score_sem": 0.45677810651998757,
      "eval_nq_emb_cos_sim": 0.8177816867828369,
      "eval_nq_emb_cos_sim_sem": 0.007354519698029731,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.322997808456421,
      "eval_nq_n_ngrams_match_1": 21.96,
      "eval_nq_n_ngrams_match_2": 7.796,
      "eval_nq_n_ngrams_match_3": 3.55,
      "eval_nq_num_pred_words": 48.716,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.206224797939898,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4223990898538695,
      "eval_nq_runtime": 10.7991,
      "eval_nq_samples_per_second": 46.3,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.4442592377819496,
      "eval_nq_token_set_f1_sem": 0.004963257871358416,
      "eval_nq_token_set_precision": 0.3972643729584669,
      "eval_nq_token_set_recall": 0.5160101037245501,
      "eval_nq_true_num_tokens": 64.0,
      "step": 64375
    },
    {
      "epoch": 12.36,
      "learning_rate": 0.001,
      "loss": 2.6904,
      "step": 64380
    },
    {
      "epoch": 12.36,
      "learning_rate": 0.001,
      "loss": 2.6856,
      "step": 64392
    },
    {
      "epoch": 12.37,
      "learning_rate": 0.001,
      "loss": 2.6892,
      "step": 64404
    },
    {
      "epoch": 12.37,
      "learning_rate": 0.001,
      "loss": 2.6934,
      "step": 64416
    },
    {
      "epoch": 12.37,
      "learning_rate": 0.001,
      "loss": 2.6824,
      "step": 64428
    },
    {
      "epoch": 12.37,
      "learning_rate": 0.001,
      "loss": 2.6914,
      "step": 64440
    },
    {
      "epoch": 12.38,
      "learning_rate": 0.001,
      "loss": 2.688,
      "step": 64452
    },
    {
      "epoch": 12.38,
      "learning_rate": 0.001,
      "loss": 2.69,
      "step": 64464
    },
    {
      "epoch": 12.38,
      "learning_rate": 0.001,
      "loss": 2.6853,
      "step": 64476
    },
    {
      "epoch": 12.38,
      "learning_rate": 0.001,
      "loss": 2.6774,
      "step": 64488
    },
    {
      "epoch": 12.38,
      "learning_rate": 0.001,
      "loss": 2.6925,
      "step": 64500
    },
    {
      "epoch": 12.39,
      "learning_rate": 0.001,
      "loss": 2.6839,
      "step": 64512
    },
    {
      "epoch": 12.39,
      "learning_rate": 0.001,
      "loss": 2.6878,
      "step": 64524
    },
    {
      "epoch": 12.39,
      "learning_rate": 0.001,
      "loss": 2.6896,
      "step": 64536
    },
    {
      "epoch": 12.39,
      "learning_rate": 0.001,
      "loss": 2.6886,
      "step": 64548
    },
    {
      "epoch": 12.4,
      "learning_rate": 0.001,
      "loss": 2.6863,
      "step": 64560
    },
    {
      "epoch": 12.4,
      "learning_rate": 0.001,
      "loss": 2.6873,
      "step": 64572
    },
    {
      "epoch": 12.4,
      "learning_rate": 0.001,
      "loss": 2.681,
      "step": 64584
    },
    {
      "epoch": 12.4,
      "learning_rate": 0.001,
      "loss": 2.6856,
      "step": 64596
    },
    {
      "epoch": 12.41,
      "learning_rate": 0.001,
      "loss": 2.6789,
      "step": 64608
    },
    {
      "epoch": 12.41,
      "learning_rate": 0.001,
      "loss": 2.6911,
      "step": 64620
    },
    {
      "epoch": 12.41,
      "learning_rate": 0.001,
      "loss": 2.6839,
      "step": 64632
    },
    {
      "epoch": 12.41,
      "learning_rate": 0.001,
      "loss": 2.7002,
      "step": 64644
    },
    {
      "epoch": 12.41,
      "learning_rate": 0.001,
      "loss": 2.6871,
      "step": 64656
    },
    {
      "epoch": 12.42,
      "learning_rate": 0.001,
      "loss": 2.6924,
      "step": 64668
    },
    {
      "epoch": 12.42,
      "learning_rate": 0.001,
      "loss": 2.6904,
      "step": 64680
    },
    {
      "epoch": 12.42,
      "learning_rate": 0.001,
      "loss": 2.6838,
      "step": 64692
    },
    {
      "epoch": 12.42,
      "learning_rate": 0.001,
      "loss": 2.6814,
      "step": 64704
    },
    {
      "epoch": 12.43,
      "learning_rate": 0.001,
      "loss": 2.6835,
      "step": 64716
    },
    {
      "epoch": 12.43,
      "learning_rate": 0.001,
      "loss": 2.6906,
      "step": 64728
    },
    {
      "epoch": 12.43,
      "learning_rate": 0.001,
      "loss": 2.6931,
      "step": 64740
    },
    {
      "epoch": 12.43,
      "learning_rate": 0.001,
      "loss": 2.6826,
      "step": 64752
    },
    {
      "epoch": 12.44,
      "learning_rate": 0.001,
      "loss": 2.6874,
      "step": 64764
    },
    {
      "epoch": 12.44,
      "learning_rate": 0.001,
      "loss": 2.6944,
      "step": 64776
    },
    {
      "epoch": 12.44,
      "learning_rate": 0.001,
      "loss": 2.6914,
      "step": 64788
    },
    {
      "epoch": 12.44,
      "learning_rate": 0.001,
      "loss": 2.6891,
      "step": 64800
    },
    {
      "epoch": 12.44,
      "learning_rate": 0.001,
      "loss": 2.6803,
      "step": 64812
    },
    {
      "epoch": 12.45,
      "learning_rate": 0.001,
      "loss": 2.6768,
      "step": 64824
    },
    {
      "epoch": 12.45,
      "learning_rate": 0.001,
      "loss": 2.6961,
      "step": 64836
    },
    {
      "epoch": 12.45,
      "learning_rate": 0.001,
      "loss": 2.6856,
      "step": 64848
    },
    {
      "epoch": 12.45,
      "learning_rate": 0.001,
      "loss": 2.6857,
      "step": 64860
    },
    {
      "epoch": 12.46,
      "learning_rate": 0.001,
      "loss": 2.6954,
      "step": 64872
    },
    {
      "epoch": 12.46,
      "learning_rate": 0.001,
      "loss": 2.7019,
      "step": 64884
    },
    {
      "epoch": 12.46,
      "learning_rate": 0.001,
      "loss": 2.6918,
      "step": 64896
    },
    {
      "epoch": 12.46,
      "learning_rate": 0.001,
      "loss": 2.6831,
      "step": 64908
    },
    {
      "epoch": 12.47,
      "learning_rate": 0.001,
      "loss": 2.6823,
      "step": 64920
    },
    {
      "epoch": 12.47,
      "learning_rate": 0.001,
      "loss": 2.6848,
      "step": 64932
    },
    {
      "epoch": 12.47,
      "learning_rate": 0.001,
      "loss": 2.6873,
      "step": 64944
    },
    {
      "epoch": 12.47,
      "learning_rate": 0.001,
      "loss": 2.6795,
      "step": 64956
    },
    {
      "epoch": 12.47,
      "learning_rate": 0.001,
      "loss": 2.6887,
      "step": 64968
    },
    {
      "epoch": 12.48,
      "learning_rate": 0.001,
      "loss": 2.6906,
      "step": 64980
    },
    {
      "epoch": 12.48,
      "learning_rate": 0.001,
      "loss": 2.6797,
      "step": 64992
    },
    {
      "epoch": 12.48,
      "eval_ag_news_accuracy": 0.308,
      "eval_ag_news_bleu_score": 4.374347537843929,
      "eval_ag_news_bleu_score_sem": 0.13721734946471748,
      "eval_ag_news_emb_cos_sim": 0.7834879159927368,
      "eval_ag_news_emb_cos_sim_sem": 0.007201536516102805,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.706989049911499,
      "eval_ag_news_n_ngrams_match_1": 13.07,
      "eval_ag_news_n_ngrams_match_2": 2.74,
      "eval_ag_news_n_ngrams_match_3": 0.746,
      "eval_ag_news_num_pred_words": 45.996,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.73098275552651,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3213701484002274,
      "eval_ag_news_runtime": 10.0694,
      "eval_ag_news_samples_per_second": 49.656,
      "eval_ag_news_steps_per_second": 0.099,
      "eval_ag_news_token_set_f1": 0.3311656501783919,
      "eval_ag_news_token_set_f1_sem": 0.004351692895305975,
      "eval_ag_news_token_set_precision": 0.3101803701591917,
      "eval_ag_news_token_set_recall": 0.3718769469196561,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 65000
    },
    {
      "epoch": 12.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.1079375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.855755712661894,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11784352219996941,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6488279104232788,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00961289272103519,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3516156673431396,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.782,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.696,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.592,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.336,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.548821801245555,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1992352239985944,
      "eval_anthropic_toxic_prompts_runtime": 9.5057,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.6,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.105,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3429785052921255,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006453345361771795,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4021736002401946,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.326806010723115,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 65000
    },
    {
      "epoch": 12.48,
      "eval_arxiv_accuracy": 0.3331875,
      "eval_arxiv_bleu_score": 4.019372762429713,
      "eval_arxiv_bleu_score_sem": 0.1164797190885806,
      "eval_arxiv_emb_cos_sim": 0.7241116762161255,
      "eval_arxiv_emb_cos_sim_sem": 0.010409958115005025,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5568759441375732,
      "eval_arxiv_n_ngrams_match_1": 13.894,
      "eval_arxiv_n_ngrams_match_2": 2.618,
      "eval_arxiv_n_ngrams_match_3": 0.578,
      "eval_arxiv_num_pred_words": 39.56,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 35.05351676594371,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3335906106363149,
      "eval_arxiv_runtime": 9.7275,
      "eval_arxiv_samples_per_second": 51.4,
      "eval_arxiv_steps_per_second": 0.103,
      "eval_arxiv_token_set_f1": 0.3264062659970621,
      "eval_arxiv_token_set_f1_sem": 0.004235282809990203,
      "eval_arxiv_token_set_precision": 0.27493075307382425,
      "eval_arxiv_token_set_recall": 0.42214591942239144,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 65000
    },
    {
      "epoch": 12.48,
      "eval_python_code_alpaca_accuracy": 0.15196875,
      "eval_python_code_alpaca_bleu_score": 4.036770555605847,
      "eval_python_code_alpaca_bleu_score_sem": 0.12870676757623517,
      "eval_python_code_alpaca_emb_cos_sim": 0.7205531001091003,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010142604849832234,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0249199867248535,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.704,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.4,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.72,
      "eval_python_code_alpaca_num_pred_words": 41.566,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.592356983351635,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30164487567760767,
      "eval_python_code_alpaca_runtime": 9.6549,
      "eval_python_code_alpaca_samples_per_second": 51.787,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.43931315354708417,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005913209374507224,
      "eval_python_code_alpaca_token_set_precision": 0.4701370718763214,
      "eval_python_code_alpaca_token_set_recall": 0.4419615733144766,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 65000
    },
    {
      "epoch": 12.48,
      "eval_wikibio_accuracy": 0.31025,
      "eval_wikibio_bleu_score": 5.440243916062389,
      "eval_wikibio_bleu_score_sem": 0.19468922014442588,
      "eval_wikibio_emb_cos_sim": 0.7150151133537292,
      "eval_wikibio_emb_cos_sim_sem": 0.009565028389105988,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.914822816848755,
      "eval_wikibio_n_ngrams_match_1": 9.504,
      "eval_wikibio_n_ngrams_match_2": 3.104,
      "eval_wikibio_n_ngrams_match_3": 1.09,
      "eval_wikibio_num_pred_words": 35.788,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 50.14018672765468,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33271435348160244,
      "eval_wikibio_runtime": 9.1647,
      "eval_wikibio_samples_per_second": 54.557,
      "eval_wikibio_steps_per_second": 0.109,
      "eval_wikibio_token_set_f1": 0.30462833691382407,
      "eval_wikibio_token_set_f1_sem": 0.005561194622509602,
      "eval_wikibio_token_set_precision": 0.308499690201193,
      "eval_wikibio_token_set_recall": 0.32106598372663964,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 65000
    },
    {
      "epoch": 12.48,
      "eval_nq_accuracy": 0.509125,
      "eval_nq_bleu_score": 10.812410941434521,
      "eval_nq_bleu_score_sem": 0.4540598803301628,
      "eval_nq_emb_cos_sim": 0.8146822452545166,
      "eval_nq_emb_cos_sim_sem": 0.00762139749406776,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.317577838897705,
      "eval_nq_n_ngrams_match_1": 22.112,
      "eval_nq_n_ngrams_match_2": 7.936,
      "eval_nq_n_ngrams_match_3": 3.556,
      "eval_nq_num_pred_words": 49.076,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.151057009144601,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4239611719301859,
      "eval_nq_runtime": 10.3809,
      "eval_nq_samples_per_second": 48.166,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.44562993003973433,
      "eval_nq_token_set_f1_sem": 0.004944169031038659,
      "eval_nq_token_set_precision": 0.3981133594800232,
      "eval_nq_token_set_recall": 0.5164183346225726,
      "eval_nq_true_num_tokens": 64.0,
      "step": 65000
    },
    {
      "epoch": 12.48,
      "learning_rate": 0.001,
      "loss": 2.6821,
      "step": 65004
    },
    {
      "epoch": 12.48,
      "learning_rate": 0.001,
      "loss": 2.6806,
      "step": 65016
    },
    {
      "epoch": 12.49,
      "learning_rate": 0.001,
      "loss": 2.6801,
      "step": 65028
    },
    {
      "epoch": 12.49,
      "learning_rate": 0.001,
      "loss": 2.681,
      "step": 65040
    },
    {
      "epoch": 12.49,
      "learning_rate": 0.001,
      "loss": 2.6947,
      "step": 65052
    },
    {
      "epoch": 12.49,
      "learning_rate": 0.001,
      "loss": 2.6935,
      "step": 65064
    },
    {
      "epoch": 12.5,
      "learning_rate": 0.001,
      "loss": 2.6879,
      "step": 65076
    },
    {
      "epoch": 12.5,
      "learning_rate": 0.001,
      "loss": 2.6819,
      "step": 65088
    },
    {
      "epoch": 12.5,
      "learning_rate": 0.001,
      "loss": 2.6909,
      "step": 65100
    },
    {
      "epoch": 12.5,
      "learning_rate": 0.001,
      "loss": 2.6952,
      "step": 65112
    },
    {
      "epoch": 12.5,
      "learning_rate": 0.001,
      "loss": 2.6819,
      "step": 65124
    },
    {
      "epoch": 12.51,
      "learning_rate": 0.001,
      "loss": 2.687,
      "step": 65136
    },
    {
      "epoch": 12.51,
      "learning_rate": 0.001,
      "loss": 2.6737,
      "step": 65148
    },
    {
      "epoch": 12.51,
      "learning_rate": 0.001,
      "loss": 2.6802,
      "step": 65160
    },
    {
      "epoch": 12.51,
      "learning_rate": 0.001,
      "loss": 2.6811,
      "step": 65172
    },
    {
      "epoch": 12.52,
      "learning_rate": 0.001,
      "loss": 2.6866,
      "step": 65184
    },
    {
      "epoch": 12.52,
      "learning_rate": 0.001,
      "loss": 2.6879,
      "step": 65196
    },
    {
      "epoch": 12.52,
      "learning_rate": 0.001,
      "loss": 2.6738,
      "step": 65208
    },
    {
      "epoch": 12.52,
      "learning_rate": 0.001,
      "loss": 2.6868,
      "step": 65220
    },
    {
      "epoch": 12.53,
      "learning_rate": 0.001,
      "loss": 2.6861,
      "step": 65232
    },
    {
      "epoch": 12.53,
      "learning_rate": 0.001,
      "loss": 2.6764,
      "step": 65244
    },
    {
      "epoch": 12.53,
      "learning_rate": 0.001,
      "loss": 2.6772,
      "step": 65256
    },
    {
      "epoch": 12.53,
      "learning_rate": 0.001,
      "loss": 2.6941,
      "step": 65268
    },
    {
      "epoch": 12.53,
      "learning_rate": 0.001,
      "loss": 2.6784,
      "step": 65280
    },
    {
      "epoch": 12.54,
      "learning_rate": 0.001,
      "loss": 2.6809,
      "step": 65292
    },
    {
      "epoch": 12.54,
      "learning_rate": 0.001,
      "loss": 2.6854,
      "step": 65304
    },
    {
      "epoch": 12.54,
      "learning_rate": 0.001,
      "loss": 2.6852,
      "step": 65316
    },
    {
      "epoch": 12.54,
      "learning_rate": 0.001,
      "loss": 2.6801,
      "step": 65328
    },
    {
      "epoch": 12.55,
      "learning_rate": 0.001,
      "loss": 2.6873,
      "step": 65340
    },
    {
      "epoch": 12.55,
      "learning_rate": 0.001,
      "loss": 2.6878,
      "step": 65352
    },
    {
      "epoch": 12.55,
      "learning_rate": 0.001,
      "loss": 2.6828,
      "step": 65364
    },
    {
      "epoch": 12.55,
      "learning_rate": 0.001,
      "loss": 2.6776,
      "step": 65376
    },
    {
      "epoch": 12.56,
      "learning_rate": 0.001,
      "loss": 2.6807,
      "step": 65388
    },
    {
      "epoch": 12.56,
      "learning_rate": 0.001,
      "loss": 2.6886,
      "step": 65400
    },
    {
      "epoch": 12.56,
      "learning_rate": 0.001,
      "loss": 2.6701,
      "step": 65412
    },
    {
      "epoch": 12.56,
      "learning_rate": 0.001,
      "loss": 2.6794,
      "step": 65424
    },
    {
      "epoch": 12.56,
      "learning_rate": 0.001,
      "loss": 2.6799,
      "step": 65436
    },
    {
      "epoch": 12.57,
      "learning_rate": 0.001,
      "loss": 2.6858,
      "step": 65448
    },
    {
      "epoch": 12.57,
      "learning_rate": 0.001,
      "loss": 2.6834,
      "step": 65460
    },
    {
      "epoch": 12.57,
      "learning_rate": 0.001,
      "loss": 2.6737,
      "step": 65472
    },
    {
      "epoch": 12.57,
      "learning_rate": 0.001,
      "loss": 2.6833,
      "step": 65484
    },
    {
      "epoch": 12.58,
      "learning_rate": 0.001,
      "loss": 2.6794,
      "step": 65496
    },
    {
      "epoch": 12.58,
      "learning_rate": 0.001,
      "loss": 2.6803,
      "step": 65508
    },
    {
      "epoch": 12.58,
      "learning_rate": 0.001,
      "loss": 2.6815,
      "step": 65520
    },
    {
      "epoch": 12.58,
      "learning_rate": 0.001,
      "loss": 2.6901,
      "step": 65532
    },
    {
      "epoch": 12.59,
      "learning_rate": 0.001,
      "loss": 2.6797,
      "step": 65544
    },
    {
      "epoch": 12.59,
      "learning_rate": 0.001,
      "loss": 2.6786,
      "step": 65556
    },
    {
      "epoch": 12.59,
      "learning_rate": 0.001,
      "loss": 2.6901,
      "step": 65568
    },
    {
      "epoch": 12.59,
      "learning_rate": 0.001,
      "loss": 2.6897,
      "step": 65580
    },
    {
      "epoch": 12.59,
      "learning_rate": 0.001,
      "loss": 2.6794,
      "step": 65592
    },
    {
      "epoch": 12.6,
      "learning_rate": 0.001,
      "loss": 2.6954,
      "step": 65604
    },
    {
      "epoch": 12.6,
      "learning_rate": 0.001,
      "loss": 2.693,
      "step": 65616
    },
    {
      "epoch": 12.6,
      "eval_ag_news_accuracy": 0.3091875,
      "eval_ag_news_bleu_score": 4.52125321349297,
      "eval_ag_news_bleu_score_sem": 0.14446214421973427,
      "eval_ag_news_emb_cos_sim": 0.7879672646522522,
      "eval_ag_news_emb_cos_sim_sem": 0.007861516651112926,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.7005038261413574,
      "eval_ag_news_n_ngrams_match_1": 13.25,
      "eval_ag_news_n_ngrams_match_2": 2.798,
      "eval_ag_news_n_ngrams_match_3": 0.79,
      "eval_ag_news_num_pred_words": 45.912,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.46768790380134,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3276524051288814,
      "eval_ag_news_runtime": 10.0003,
      "eval_ag_news_samples_per_second": 49.998,
      "eval_ag_news_steps_per_second": 0.1,
      "eval_ag_news_token_set_f1": 0.3330800950225034,
      "eval_ag_news_token_set_f1_sem": 0.004447843565181526,
      "eval_ag_news_token_set_precision": 0.31251921517980064,
      "eval_ag_news_token_set_recall": 0.3719246932783074,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 65625
    },
    {
      "epoch": 12.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.10803125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.878082959644404,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1155829724994168,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6497258543968201,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009816536414872208,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.39844012260437,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.762,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.746,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.63,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.546,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.917396160691325,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19520056254722135,
      "eval_anthropic_toxic_prompts_runtime": 17.2126,
      "eval_anthropic_toxic_prompts_samples_per_second": 29.048,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.058,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34596108229662076,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006492018951235808,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.40347523374858574,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3318174650246588,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 65625
    },
    {
      "epoch": 12.6,
      "eval_arxiv_accuracy": 0.33315625,
      "eval_arxiv_bleu_score": 3.974489017421528,
      "eval_arxiv_bleu_score_sem": 0.11660001088546988,
      "eval_arxiv_emb_cos_sim": 0.7308480143547058,
      "eval_arxiv_emb_cos_sim_sem": 0.009328456152780816,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.56752347946167,
      "eval_arxiv_n_ngrams_match_1": 14.306,
      "eval_arxiv_n_ngrams_match_2": 2.68,
      "eval_arxiv_n_ngrams_match_3": 0.558,
      "eval_arxiv_num_pred_words": 40.606,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 35.4287444037503,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33742940953837974,
      "eval_arxiv_runtime": 10.8206,
      "eval_arxiv_samples_per_second": 46.208,
      "eval_arxiv_steps_per_second": 0.092,
      "eval_arxiv_token_set_f1": 0.3349212228296785,
      "eval_arxiv_token_set_f1_sem": 0.004246400294438401,
      "eval_arxiv_token_set_precision": 0.28452112061955465,
      "eval_arxiv_token_set_recall": 0.42511080807128715,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 65625
    },
    {
      "epoch": 12.6,
      "eval_python_code_alpaca_accuracy": 0.15303125,
      "eval_python_code_alpaca_bleu_score": 3.881755119300968,
      "eval_python_code_alpaca_bleu_score_sem": 0.11477316286635027,
      "eval_python_code_alpaca_emb_cos_sim": 0.7408278584480286,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008647342007420514,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.027876138687134,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.184,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.442,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.666,
      "eval_python_code_alpaca_num_pred_words": 44.004,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.653321185182048,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3065611845554336,
      "eval_python_code_alpaca_runtime": 10.5685,
      "eval_python_code_alpaca_samples_per_second": 47.31,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.4558760969528058,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054846314345116335,
      "eval_python_code_alpaca_token_set_precision": 0.4970199148987927,
      "eval_python_code_alpaca_token_set_recall": 0.44236831507373053,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 65625
    },
    {
      "epoch": 12.6,
      "eval_wikibio_accuracy": 0.309625,
      "eval_wikibio_bleu_score": 5.386481420932874,
      "eval_wikibio_bleu_score_sem": 0.20129296594179516,
      "eval_wikibio_emb_cos_sim": 0.7220320701599121,
      "eval_wikibio_emb_cos_sim_sem": 0.009761132406871115,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9536080360412598,
      "eval_wikibio_n_ngrams_match_1": 9.648,
      "eval_wikibio_n_ngrams_match_2": 3.08,
      "eval_wikibio_n_ngrams_match_3": 1.062,
      "eval_wikibio_num_pred_words": 36.232,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 52.12308996244558,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3337593941987438,
      "eval_wikibio_runtime": 15.9833,
      "eval_wikibio_samples_per_second": 31.283,
      "eval_wikibio_steps_per_second": 0.063,
      "eval_wikibio_token_set_f1": 0.3018513600041814,
      "eval_wikibio_token_set_f1_sem": 0.005631970735854331,
      "eval_wikibio_token_set_precision": 0.3113866945580489,
      "eval_wikibio_token_set_recall": 0.3093409905871538,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 65625
    },
    {
      "epoch": 12.6,
      "eval_nq_accuracy": 0.5074375,
      "eval_nq_bleu_score": 10.758401580059093,
      "eval_nq_bleu_score_sem": 0.46143902103771645,
      "eval_nq_emb_cos_sim": 0.8136475086212158,
      "eval_nq_emb_cos_sim_sem": 0.007764021770884117,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.320363759994507,
      "eval_nq_n_ngrams_match_1": 22.06,
      "eval_nq_n_ngrams_match_2": 7.76,
      "eval_nq_n_ngrams_match_3": 3.518,
      "eval_nq_num_pred_words": 48.952,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.179376482614051,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42556207563774306,
      "eval_nq_runtime": 10.3468,
      "eval_nq_samples_per_second": 48.324,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4420851953351193,
      "eval_nq_token_set_f1_sem": 0.005058979697250073,
      "eval_nq_token_set_precision": 0.39631357733695816,
      "eval_nq_token_set_recall": 0.5104212615548354,
      "eval_nq_true_num_tokens": 64.0,
      "step": 65625
    },
    {
      "epoch": 12.6,
      "learning_rate": 0.001,
      "loss": 2.6874,
      "step": 65628
    },
    {
      "epoch": 12.6,
      "learning_rate": 0.001,
      "loss": 2.6819,
      "step": 65640
    },
    {
      "epoch": 12.61,
      "learning_rate": 0.001,
      "loss": 2.6784,
      "step": 65652
    },
    {
      "epoch": 12.61,
      "learning_rate": 0.001,
      "loss": 2.692,
      "step": 65664
    },
    {
      "epoch": 12.61,
      "learning_rate": 0.001,
      "loss": 2.6899,
      "step": 65676
    },
    {
      "epoch": 12.61,
      "learning_rate": 0.001,
      "loss": 2.6817,
      "step": 65688
    },
    {
      "epoch": 12.62,
      "learning_rate": 0.001,
      "loss": 2.68,
      "step": 65700
    },
    {
      "epoch": 12.62,
      "learning_rate": 0.001,
      "loss": 2.6841,
      "step": 65712
    },
    {
      "epoch": 12.62,
      "learning_rate": 0.001,
      "loss": 2.6906,
      "step": 65724
    },
    {
      "epoch": 12.62,
      "learning_rate": 0.001,
      "loss": 2.6877,
      "step": 65736
    },
    {
      "epoch": 12.62,
      "learning_rate": 0.001,
      "loss": 2.6877,
      "step": 65748
    },
    {
      "epoch": 12.63,
      "learning_rate": 0.001,
      "loss": 2.6867,
      "step": 65760
    },
    {
      "epoch": 12.63,
      "learning_rate": 0.001,
      "loss": 2.6872,
      "step": 65772
    },
    {
      "epoch": 12.63,
      "learning_rate": 0.001,
      "loss": 2.681,
      "step": 65784
    },
    {
      "epoch": 12.63,
      "learning_rate": 0.001,
      "loss": 2.6837,
      "step": 65796
    },
    {
      "epoch": 12.64,
      "learning_rate": 0.001,
      "loss": 2.6825,
      "step": 65808
    },
    {
      "epoch": 12.64,
      "learning_rate": 0.001,
      "loss": 2.678,
      "step": 65820
    },
    {
      "epoch": 12.64,
      "learning_rate": 0.001,
      "loss": 2.6822,
      "step": 65832
    },
    {
      "epoch": 12.64,
      "learning_rate": 0.001,
      "loss": 2.6865,
      "step": 65844
    },
    {
      "epoch": 12.65,
      "learning_rate": 0.001,
      "loss": 2.6712,
      "step": 65856
    },
    {
      "epoch": 12.65,
      "learning_rate": 0.001,
      "loss": 2.6836,
      "step": 65868
    },
    {
      "epoch": 12.65,
      "learning_rate": 0.001,
      "loss": 2.6695,
      "step": 65880
    },
    {
      "epoch": 12.65,
      "learning_rate": 0.001,
      "loss": 2.6868,
      "step": 65892
    },
    {
      "epoch": 12.65,
      "learning_rate": 0.001,
      "loss": 2.6885,
      "step": 65904
    },
    {
      "epoch": 12.66,
      "learning_rate": 0.001,
      "loss": 2.6816,
      "step": 65916
    },
    {
      "epoch": 12.66,
      "learning_rate": 0.001,
      "loss": 2.6727,
      "step": 65928
    },
    {
      "epoch": 12.66,
      "learning_rate": 0.001,
      "loss": 2.686,
      "step": 65940
    },
    {
      "epoch": 12.66,
      "learning_rate": 0.001,
      "loss": 2.6903,
      "step": 65952
    },
    {
      "epoch": 12.67,
      "learning_rate": 0.001,
      "loss": 2.6858,
      "step": 65964
    },
    {
      "epoch": 12.67,
      "learning_rate": 0.001,
      "loss": 2.6777,
      "step": 65976
    },
    {
      "epoch": 12.67,
      "learning_rate": 0.001,
      "loss": 2.6715,
      "step": 65988
    },
    {
      "epoch": 12.67,
      "learning_rate": 0.001,
      "loss": 2.6947,
      "step": 66000
    },
    {
      "epoch": 12.68,
      "learning_rate": 0.001,
      "loss": 2.6762,
      "step": 66012
    },
    {
      "epoch": 12.68,
      "learning_rate": 0.001,
      "loss": 2.6789,
      "step": 66024
    },
    {
      "epoch": 12.68,
      "learning_rate": 0.001,
      "loss": 2.6846,
      "step": 66036
    },
    {
      "epoch": 12.68,
      "learning_rate": 0.001,
      "loss": 2.6804,
      "step": 66048
    },
    {
      "epoch": 12.68,
      "learning_rate": 0.001,
      "loss": 2.6946,
      "step": 66060
    },
    {
      "epoch": 12.69,
      "learning_rate": 0.001,
      "loss": 2.6971,
      "step": 66072
    },
    {
      "epoch": 12.69,
      "learning_rate": 0.001,
      "loss": 2.6944,
      "step": 66084
    },
    {
      "epoch": 12.69,
      "learning_rate": 0.001,
      "loss": 2.6882,
      "step": 66096
    },
    {
      "epoch": 12.69,
      "learning_rate": 0.001,
      "loss": 2.6861,
      "step": 66108
    },
    {
      "epoch": 12.7,
      "learning_rate": 0.001,
      "loss": 2.699,
      "step": 66120
    },
    {
      "epoch": 12.7,
      "learning_rate": 0.001,
      "loss": 2.685,
      "step": 66132
    },
    {
      "epoch": 12.7,
      "learning_rate": 0.001,
      "loss": 2.686,
      "step": 66144
    },
    {
      "epoch": 12.7,
      "learning_rate": 0.001,
      "loss": 2.6768,
      "step": 66156
    },
    {
      "epoch": 12.71,
      "learning_rate": 0.001,
      "loss": 2.6897,
      "step": 66168
    },
    {
      "epoch": 12.71,
      "learning_rate": 0.001,
      "loss": 2.6843,
      "step": 66180
    },
    {
      "epoch": 12.71,
      "learning_rate": 0.001,
      "loss": 2.6797,
      "step": 66192
    },
    {
      "epoch": 12.71,
      "learning_rate": 0.001,
      "loss": 2.6775,
      "step": 66204
    },
    {
      "epoch": 12.71,
      "learning_rate": 0.001,
      "loss": 2.6889,
      "step": 66216
    },
    {
      "epoch": 12.72,
      "learning_rate": 0.001,
      "loss": 2.6816,
      "step": 66228
    },
    {
      "epoch": 12.72,
      "learning_rate": 0.001,
      "loss": 2.6813,
      "step": 66240
    },
    {
      "epoch": 12.72,
      "eval_ag_news_accuracy": 0.30821875,
      "eval_ag_news_bleu_score": 4.442264679828784,
      "eval_ag_news_bleu_score_sem": 0.14079742645729434,
      "eval_ag_news_emb_cos_sim": 0.7865789532661438,
      "eval_ag_news_emb_cos_sim_sem": 0.00901036071617894,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.714829683303833,
      "eval_ag_news_n_ngrams_match_1": 13.166,
      "eval_ag_news_n_ngrams_match_2": 2.81,
      "eval_ag_news_n_ngrams_match_3": 0.772,
      "eval_ag_news_num_pred_words": 46.096,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 41.051594716966605,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.32317659516122965,
      "eval_ag_news_runtime": 9.6051,
      "eval_ag_news_samples_per_second": 52.056,
      "eval_ag_news_steps_per_second": 0.104,
      "eval_ag_news_token_set_f1": 0.3328451876967112,
      "eval_ag_news_token_set_f1_sem": 0.004605688669783893,
      "eval_ag_news_token_set_precision": 0.31148388027314167,
      "eval_ag_news_token_set_recall": 0.37319743833185304,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 66250
    },
    {
      "epoch": 12.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.109125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.858608667803098,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10941246782311037,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6308979392051697,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010247484648173561,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3844096660614014,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.772,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.71,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.578,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.188,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.500572385663904,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19990366830034043,
      "eval_anthropic_toxic_prompts_runtime": 9.5142,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.553,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.105,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.341990617461851,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00657963308209183,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4026880005881458,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.327636712255066,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 66250
    },
    {
      "epoch": 12.72,
      "eval_arxiv_accuracy": 0.33315625,
      "eval_arxiv_bleu_score": 3.8363732637314643,
      "eval_arxiv_bleu_score_sem": 0.11065596222794323,
      "eval_arxiv_emb_cos_sim": 0.7208958864212036,
      "eval_arxiv_emb_cos_sim_sem": 0.009142144033304886,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.55238676071167,
      "eval_arxiv_n_ngrams_match_1": 13.842,
      "eval_arxiv_n_ngrams_match_2": 2.624,
      "eval_arxiv_n_ngrams_match_3": 0.52,
      "eval_arxiv_num_pred_words": 39.604,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.89650778419807,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3293064781497945,
      "eval_arxiv_runtime": 14.1947,
      "eval_arxiv_samples_per_second": 35.224,
      "eval_arxiv_steps_per_second": 0.07,
      "eval_arxiv_token_set_f1": 0.32895586797041415,
      "eval_arxiv_token_set_f1_sem": 0.0042815217620811455,
      "eval_arxiv_token_set_precision": 0.273232899376966,
      "eval_arxiv_token_set_recall": 0.4384903963321714,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 66250
    },
    {
      "epoch": 12.72,
      "eval_python_code_alpaca_accuracy": 0.1526875,
      "eval_python_code_alpaca_bleu_score": 4.138481210565817,
      "eval_python_code_alpaca_bleu_score_sem": 0.13264847535511604,
      "eval_python_code_alpaca_emb_cos_sim": 0.735192060470581,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008064912073670438,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.03822660446167,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.11,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.51,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.782,
      "eval_python_code_alpaca_num_pred_words": 42.886,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.868202823369845,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3079078065785898,
      "eval_python_code_alpaca_runtime": 10.467,
      "eval_python_code_alpaca_samples_per_second": 47.769,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.4511836295390964,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00555212428173263,
      "eval_python_code_alpaca_token_set_precision": 0.4930051052844708,
      "eval_python_code_alpaca_token_set_recall": 0.43687350950886256,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 66250
    },
    {
      "epoch": 12.72,
      "eval_wikibio_accuracy": 0.31246875,
      "eval_wikibio_bleu_score": 5.573293920063445,
      "eval_wikibio_bleu_score_sem": 0.21811539733169838,
      "eval_wikibio_emb_cos_sim": 0.7096949815750122,
      "eval_wikibio_emb_cos_sim_sem": 0.011197715640899034,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9283409118652344,
      "eval_wikibio_n_ngrams_match_1": 9.36,
      "eval_wikibio_n_ngrams_match_2": 3.042,
      "eval_wikibio_n_ngrams_match_3": 1.106,
      "eval_wikibio_num_pred_words": 35.092,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 50.82258853044036,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32281070223805103,
      "eval_wikibio_runtime": 12.0996,
      "eval_wikibio_samples_per_second": 41.324,
      "eval_wikibio_steps_per_second": 0.083,
      "eval_wikibio_token_set_f1": 0.29821455820854004,
      "eval_wikibio_token_set_f1_sem": 0.005784116259377843,
      "eval_wikibio_token_set_precision": 0.30255744755681513,
      "eval_wikibio_token_set_recall": 0.31359513218539303,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 66250
    },
    {
      "epoch": 12.72,
      "eval_nq_accuracy": 0.509125,
      "eval_nq_bleu_score": 10.913670274634493,
      "eval_nq_bleu_score_sem": 0.47292821120609474,
      "eval_nq_emb_cos_sim": 0.8075646162033081,
      "eval_nq_emb_cos_sim_sem": 0.007970604548860264,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.315042018890381,
      "eval_nq_n_ngrams_match_1": 21.996,
      "eval_nq_n_ngrams_match_2": 7.912,
      "eval_nq_n_ngrams_match_3": 3.584,
      "eval_nq_num_pred_words": 48.52,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.125348365707886,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4221893049153954,
      "eval_nq_runtime": 9.8319,
      "eval_nq_samples_per_second": 50.855,
      "eval_nq_steps_per_second": 0.102,
      "eval_nq_token_set_f1": 0.4429696630094818,
      "eval_nq_token_set_f1_sem": 0.005257962618743179,
      "eval_nq_token_set_precision": 0.39488895617080366,
      "eval_nq_token_set_recall": 0.5162034044853563,
      "eval_nq_true_num_tokens": 64.0,
      "step": 66250
    },
    {
      "epoch": 12.72,
      "learning_rate": 0.001,
      "loss": 2.682,
      "step": 66252
    },
    {
      "epoch": 12.72,
      "learning_rate": 0.001,
      "loss": 2.6797,
      "step": 66264
    },
    {
      "epoch": 12.73,
      "learning_rate": 0.001,
      "loss": 2.6733,
      "step": 66276
    },
    {
      "epoch": 12.73,
      "learning_rate": 0.001,
      "loss": 2.6888,
      "step": 66288
    },
    {
      "epoch": 12.73,
      "learning_rate": 0.001,
      "loss": 2.6859,
      "step": 66300
    },
    {
      "epoch": 12.73,
      "learning_rate": 0.001,
      "loss": 2.684,
      "step": 66312
    },
    {
      "epoch": 12.74,
      "learning_rate": 0.001,
      "loss": 2.6831,
      "step": 66324
    },
    {
      "epoch": 12.74,
      "learning_rate": 0.001,
      "loss": 2.678,
      "step": 66336
    },
    {
      "epoch": 12.74,
      "learning_rate": 0.001,
      "loss": 2.6855,
      "step": 66348
    },
    {
      "epoch": 12.74,
      "learning_rate": 0.001,
      "loss": 2.6827,
      "step": 66360
    },
    {
      "epoch": 12.74,
      "learning_rate": 0.001,
      "loss": 2.6789,
      "step": 66372
    },
    {
      "epoch": 12.75,
      "learning_rate": 0.001,
      "loss": 2.6834,
      "step": 66384
    },
    {
      "epoch": 12.75,
      "learning_rate": 0.001,
      "loss": 2.6793,
      "step": 66396
    },
    {
      "epoch": 12.75,
      "learning_rate": 0.001,
      "loss": 2.6647,
      "step": 66408
    },
    {
      "epoch": 12.75,
      "learning_rate": 0.001,
      "loss": 2.6817,
      "step": 66420
    },
    {
      "epoch": 12.76,
      "learning_rate": 0.001,
      "loss": 2.6787,
      "step": 66432
    },
    {
      "epoch": 12.76,
      "learning_rate": 0.001,
      "loss": 2.6916,
      "step": 66444
    },
    {
      "epoch": 12.76,
      "learning_rate": 0.001,
      "loss": 2.6769,
      "step": 66456
    },
    {
      "epoch": 12.76,
      "learning_rate": 0.001,
      "loss": 2.6812,
      "step": 66468
    },
    {
      "epoch": 12.76,
      "learning_rate": 0.001,
      "loss": 2.6789,
      "step": 66480
    },
    {
      "epoch": 12.77,
      "learning_rate": 0.001,
      "loss": 2.6803,
      "step": 66492
    },
    {
      "epoch": 12.77,
      "learning_rate": 0.001,
      "loss": 2.6928,
      "step": 66504
    },
    {
      "epoch": 12.77,
      "learning_rate": 0.001,
      "loss": 2.6873,
      "step": 66516
    },
    {
      "epoch": 12.77,
      "learning_rate": 0.001,
      "loss": 2.6802,
      "step": 66528
    },
    {
      "epoch": 12.78,
      "learning_rate": 0.001,
      "loss": 2.6918,
      "step": 66540
    },
    {
      "epoch": 12.78,
      "learning_rate": 0.001,
      "loss": 2.6771,
      "step": 66552
    },
    {
      "epoch": 12.78,
      "learning_rate": 0.001,
      "loss": 2.6758,
      "step": 66564
    },
    {
      "epoch": 12.78,
      "learning_rate": 0.001,
      "loss": 2.6857,
      "step": 66576
    },
    {
      "epoch": 12.79,
      "learning_rate": 0.001,
      "loss": 2.6764,
      "step": 66588
    },
    {
      "epoch": 12.79,
      "learning_rate": 0.001,
      "loss": 2.6821,
      "step": 66600
    },
    {
      "epoch": 12.79,
      "learning_rate": 0.001,
      "loss": 2.6706,
      "step": 66612
    },
    {
      "epoch": 12.79,
      "learning_rate": 0.001,
      "loss": 2.6728,
      "step": 66624
    },
    {
      "epoch": 12.79,
      "learning_rate": 0.001,
      "loss": 2.6798,
      "step": 66636
    },
    {
      "epoch": 12.8,
      "learning_rate": 0.001,
      "loss": 2.6915,
      "step": 66648
    },
    {
      "epoch": 12.8,
      "learning_rate": 0.001,
      "loss": 2.6825,
      "step": 66660
    },
    {
      "epoch": 12.8,
      "learning_rate": 0.001,
      "loss": 2.6724,
      "step": 66672
    },
    {
      "epoch": 12.8,
      "learning_rate": 0.001,
      "loss": 2.6862,
      "step": 66684
    },
    {
      "epoch": 12.81,
      "learning_rate": 0.001,
      "loss": 2.6827,
      "step": 66696
    },
    {
      "epoch": 12.81,
      "learning_rate": 0.001,
      "loss": 2.6874,
      "step": 66708
    },
    {
      "epoch": 12.81,
      "learning_rate": 0.001,
      "loss": 2.6923,
      "step": 66720
    },
    {
      "epoch": 12.81,
      "learning_rate": 0.001,
      "loss": 2.6744,
      "step": 66732
    },
    {
      "epoch": 12.82,
      "learning_rate": 0.001,
      "loss": 2.678,
      "step": 66744
    },
    {
      "epoch": 12.82,
      "learning_rate": 0.001,
      "loss": 2.6865,
      "step": 66756
    },
    {
      "epoch": 12.82,
      "learning_rate": 0.001,
      "loss": 2.6837,
      "step": 66768
    },
    {
      "epoch": 12.82,
      "learning_rate": 0.001,
      "loss": 2.695,
      "step": 66780
    },
    {
      "epoch": 12.82,
      "learning_rate": 0.001,
      "loss": 2.6833,
      "step": 66792
    },
    {
      "epoch": 12.83,
      "learning_rate": 0.001,
      "loss": 2.6914,
      "step": 66804
    },
    {
      "epoch": 12.83,
      "learning_rate": 0.001,
      "loss": 2.6772,
      "step": 66816
    },
    {
      "epoch": 12.83,
      "learning_rate": 0.001,
      "loss": 2.6745,
      "step": 66828
    },
    {
      "epoch": 12.83,
      "learning_rate": 0.001,
      "loss": 2.6764,
      "step": 66840
    },
    {
      "epoch": 12.84,
      "learning_rate": 0.001,
      "loss": 2.6876,
      "step": 66852
    },
    {
      "epoch": 12.84,
      "learning_rate": 0.001,
      "loss": 2.6884,
      "step": 66864
    },
    {
      "epoch": 12.84,
      "eval_ag_news_accuracy": 0.30990625,
      "eval_ag_news_bleu_score": 4.591833444851182,
      "eval_ag_news_bleu_score_sem": 0.14933293000068457,
      "eval_ag_news_emb_cos_sim": 0.7915327548980713,
      "eval_ag_news_emb_cos_sim_sem": 0.007402045538081828,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6918368339538574,
      "eval_ag_news_n_ngrams_match_1": 13.414,
      "eval_ag_news_n_ngrams_match_2": 2.868,
      "eval_ag_news_n_ngrams_match_3": 0.854,
      "eval_ag_news_num_pred_words": 46.658,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.118470288071606,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3289175244151389,
      "eval_ag_news_runtime": 11.2562,
      "eval_ag_news_samples_per_second": 44.42,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.33717054342571146,
      "eval_ag_news_token_set_f1_sem": 0.004481826732358835,
      "eval_ag_news_token_set_precision": 0.31705917946290574,
      "eval_ag_news_token_set_recall": 0.37557800892346177,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 66875
    },
    {
      "epoch": 12.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.10896875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.837071483032518,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10972539959970126,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.649604320526123,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009455121261990721,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3737759590148926,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.646,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.676,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.608,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.306,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.18853394422275,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19521387073120994,
      "eval_anthropic_toxic_prompts_runtime": 9.5167,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.539,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.105,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33675572830619005,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006248050830522037,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.3964869716059104,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32680392516684714,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 66875
    },
    {
      "epoch": 12.84,
      "eval_arxiv_accuracy": 0.3318125,
      "eval_arxiv_bleu_score": 4.0579457856255505,
      "eval_arxiv_bleu_score_sem": 0.11436450405212448,
      "eval_arxiv_emb_cos_sim": 0.725619912147522,
      "eval_arxiv_emb_cos_sim_sem": 0.009693216711762858,
      "eval_arxiv_emb_top1_equal": 0.21875,
      "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.549726724624634,
      "eval_arxiv_n_ngrams_match_1": 14.132,
      "eval_arxiv_n_ngrams_match_2": 2.762,
      "eval_arxiv_n_ngrams_match_3": 0.608,
      "eval_arxiv_num_pred_words": 39.788,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.80380516499881,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3340143393950715,
      "eval_arxiv_runtime": 10.1627,
      "eval_arxiv_samples_per_second": 49.199,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.33056309315273186,
      "eval_arxiv_token_set_f1_sem": 0.0042826263102017665,
      "eval_arxiv_token_set_precision": 0.27711654656472556,
      "eval_arxiv_token_set_recall": 0.4354247871721328,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 66875
    },
    {
      "epoch": 12.84,
      "eval_python_code_alpaca_accuracy": 0.15496875,
      "eval_python_code_alpaca_bleu_score": 4.203211920627831,
      "eval_python_code_alpaca_bleu_score_sem": 0.1376860840598939,
      "eval_python_code_alpaca_emb_cos_sim": 0.7402169704437256,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008571539583284654,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9985663890838623,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.232,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.598,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.82,
      "eval_python_code_alpaca_num_pred_words": 43.698,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.0567627086403,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30921974570170274,
      "eval_python_code_alpaca_runtime": 9.3537,
      "eval_python_code_alpaca_samples_per_second": 53.455,
      "eval_python_code_alpaca_steps_per_second": 0.107,
      "eval_python_code_alpaca_token_set_f1": 0.4596053739933328,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005531879858469444,
      "eval_python_code_alpaca_token_set_precision": 0.5029333867277892,
      "eval_python_code_alpaca_token_set_recall": 0.4452142632244949,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 66875
    },
    {
      "epoch": 12.84,
      "eval_wikibio_accuracy": 0.31234375,
      "eval_wikibio_bleu_score": 5.632277098470768,
      "eval_wikibio_bleu_score_sem": 0.19903619332642364,
      "eval_wikibio_emb_cos_sim": 0.7091946601867676,
      "eval_wikibio_emb_cos_sim_sem": 0.011196256306269345,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8549296855926514,
      "eval_wikibio_n_ngrams_match_1": 9.93,
      "eval_wikibio_n_ngrams_match_2": 3.22,
      "eval_wikibio_n_ngrams_match_3": 1.116,
      "eval_wikibio_num_pred_words": 36.456,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.22529620581375,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34000471228423174,
      "eval_wikibio_runtime": 9.3876,
      "eval_wikibio_samples_per_second": 53.262,
      "eval_wikibio_steps_per_second": 0.107,
      "eval_wikibio_token_set_f1": 0.31407373977693276,
      "eval_wikibio_token_set_f1_sem": 0.005409696911181756,
      "eval_wikibio_token_set_precision": 0.32044028303480515,
      "eval_wikibio_token_set_recall": 0.32712047689466467,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 66875
    },
    {
      "epoch": 12.84,
      "eval_nq_accuracy": 0.51165625,
      "eval_nq_bleu_score": 11.062017085381326,
      "eval_nq_bleu_score_sem": 0.46683574358760765,
      "eval_nq_emb_cos_sim": 0.8149175047874451,
      "eval_nq_emb_cos_sim_sem": 0.007250901670965698,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3130195140838623,
      "eval_nq_n_ngrams_match_1": 22.3,
      "eval_nq_n_ngrams_match_2": 7.954,
      "eval_nq_n_ngrams_match_3": 3.638,
      "eval_nq_num_pred_words": 48.986,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.10489049501509,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4281655217571667,
      "eval_nq_runtime": 10.1168,
      "eval_nq_samples_per_second": 49.423,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.44807086562864745,
      "eval_nq_token_set_f1_sem": 0.004996636239231897,
      "eval_nq_token_set_precision": 0.40002648637197,
      "eval_nq_token_set_recall": 0.5194673376425163,
      "eval_nq_true_num_tokens": 64.0,
      "step": 66875
    },
    {
      "epoch": 12.84,
      "learning_rate": 0.001,
      "loss": 2.6864,
      "step": 66876
    },
    {
      "epoch": 12.84,
      "learning_rate": 0.001,
      "loss": 2.6706,
      "step": 66888
    },
    {
      "epoch": 12.85,
      "learning_rate": 0.001,
      "loss": 2.6789,
      "step": 66900
    },
    {
      "epoch": 12.85,
      "learning_rate": 0.001,
      "loss": 2.6782,
      "step": 66912
    },
    {
      "epoch": 12.85,
      "learning_rate": 0.001,
      "loss": 2.6924,
      "step": 66924
    },
    {
      "epoch": 12.85,
      "learning_rate": 0.001,
      "loss": 2.6715,
      "step": 66936
    },
    {
      "epoch": 12.85,
      "learning_rate": 0.001,
      "loss": 2.6778,
      "step": 66948
    },
    {
      "epoch": 12.86,
      "learning_rate": 0.001,
      "loss": 2.671,
      "step": 66960
    },
    {
      "epoch": 12.86,
      "learning_rate": 0.001,
      "loss": 2.6823,
      "step": 66972
    },
    {
      "epoch": 12.86,
      "learning_rate": 0.001,
      "loss": 2.6806,
      "step": 66984
    },
    {
      "epoch": 12.86,
      "learning_rate": 0.001,
      "loss": 2.6845,
      "step": 66996
    },
    {
      "epoch": 12.87,
      "learning_rate": 0.001,
      "loss": 2.6824,
      "step": 67008
    },
    {
      "epoch": 12.87,
      "learning_rate": 0.001,
      "loss": 2.6784,
      "step": 67020
    },
    {
      "epoch": 12.87,
      "learning_rate": 0.001,
      "loss": 2.6751,
      "step": 67032
    },
    {
      "epoch": 12.87,
      "learning_rate": 0.001,
      "loss": 2.6705,
      "step": 67044
    },
    {
      "epoch": 12.88,
      "learning_rate": 0.001,
      "loss": 2.6841,
      "step": 67056
    },
    {
      "epoch": 12.88,
      "learning_rate": 0.001,
      "loss": 2.6756,
      "step": 67068
    },
    {
      "epoch": 12.88,
      "learning_rate": 0.001,
      "loss": 2.6812,
      "step": 67080
    },
    {
      "epoch": 12.88,
      "learning_rate": 0.001,
      "loss": 2.6815,
      "step": 67092
    },
    {
      "epoch": 12.88,
      "learning_rate": 0.001,
      "loss": 2.6835,
      "step": 67104
    },
    {
      "epoch": 12.89,
      "learning_rate": 0.001,
      "loss": 2.6718,
      "step": 67116
    },
    {
      "epoch": 12.89,
      "learning_rate": 0.001,
      "loss": 2.6845,
      "step": 67128
    },
    {
      "epoch": 12.89,
      "learning_rate": 0.001,
      "loss": 2.6866,
      "step": 67140
    },
    {
      "epoch": 12.89,
      "learning_rate": 0.001,
      "loss": 2.6847,
      "step": 67152
    },
    {
      "epoch": 12.9,
      "learning_rate": 0.001,
      "loss": 2.6763,
      "step": 67164
    },
    {
      "epoch": 12.9,
      "learning_rate": 0.001,
      "loss": 2.68,
      "step": 67176
    },
    {
      "epoch": 12.9,
      "learning_rate": 0.001,
      "loss": 2.6935,
      "step": 67188
    },
    {
      "epoch": 12.9,
      "learning_rate": 0.001,
      "loss": 2.6817,
      "step": 67200
    },
    {
      "epoch": 12.91,
      "learning_rate": 0.001,
      "loss": 2.6757,
      "step": 67212
    },
    {
      "epoch": 12.91,
      "learning_rate": 0.001,
      "loss": 2.6912,
      "step": 67224
    },
    {
      "epoch": 12.91,
      "learning_rate": 0.001,
      "loss": 2.6834,
      "step": 67236
    },
    {
      "epoch": 12.91,
      "learning_rate": 0.001,
      "loss": 2.6749,
      "step": 67248
    },
    {
      "epoch": 12.91,
      "learning_rate": 0.001,
      "loss": 2.6718,
      "step": 67260
    },
    {
      "epoch": 12.92,
      "learning_rate": 0.001,
      "loss": 2.6803,
      "step": 67272
    },
    {
      "epoch": 12.92,
      "learning_rate": 0.001,
      "loss": 2.6856,
      "step": 67284
    },
    {
      "epoch": 12.92,
      "learning_rate": 0.001,
      "loss": 2.6763,
      "step": 67296
    },
    {
      "epoch": 12.92,
      "learning_rate": 0.001,
      "loss": 2.6815,
      "step": 67308
    },
    {
      "epoch": 12.93,
      "learning_rate": 0.001,
      "loss": 2.6859,
      "step": 67320
    },
    {
      "epoch": 12.93,
      "learning_rate": 0.001,
      "loss": 2.6765,
      "step": 67332
    },
    {
      "epoch": 12.93,
      "learning_rate": 0.001,
      "loss": 2.6863,
      "step": 67344
    },
    {
      "epoch": 12.93,
      "learning_rate": 0.001,
      "loss": 2.6805,
      "step": 67356
    },
    {
      "epoch": 12.94,
      "learning_rate": 0.001,
      "loss": 2.6925,
      "step": 67368
    },
    {
      "epoch": 12.94,
      "learning_rate": 0.001,
      "loss": 2.6823,
      "step": 67380
    },
    {
      "epoch": 12.94,
      "learning_rate": 0.001,
      "loss": 2.6786,
      "step": 67392
    },
    {
      "epoch": 12.94,
      "learning_rate": 0.001,
      "loss": 2.6854,
      "step": 67404
    },
    {
      "epoch": 12.94,
      "learning_rate": 0.001,
      "loss": 2.6788,
      "step": 67416
    },
    {
      "epoch": 12.95,
      "learning_rate": 0.001,
      "loss": 2.6905,
      "step": 67428
    },
    {
      "epoch": 12.95,
      "learning_rate": 0.001,
      "loss": 2.6842,
      "step": 67440
    },
    {
      "epoch": 12.95,
      "learning_rate": 0.001,
      "loss": 2.6868,
      "step": 67452
    },
    {
      "epoch": 12.95,
      "learning_rate": 0.001,
      "loss": 2.6771,
      "step": 67464
    },
    {
      "epoch": 12.96,
      "learning_rate": 0.001,
      "loss": 2.6762,
      "step": 67476
    },
    {
      "epoch": 12.96,
      "learning_rate": 0.001,
      "loss": 2.6791,
      "step": 67488
    },
    {
      "epoch": 12.96,
      "learning_rate": 0.001,
      "loss": 2.6745,
      "step": 67500
    },
    {
      "epoch": 12.96,
      "eval_ag_news_accuracy": 0.31034375,
      "eval_ag_news_bleu_score": 4.9097837775554565,
      "eval_ag_news_bleu_score_sem": 0.17102450069994085,
      "eval_ag_news_emb_cos_sim": 0.7790735960006714,
      "eval_ag_news_emb_cos_sim_sem": 0.00912392671658056,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.695110559463501,
      "eval_ag_news_n_ngrams_match_1": 13.518,
      "eval_ag_news_n_ngrams_match_2": 3.026,
      "eval_ag_news_n_ngrams_match_3": 0.934,
      "eval_ag_news_num_pred_words": 46.508,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.25002236286309,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3329665298968007,
      "eval_ag_news_runtime": 9.8951,
      "eval_ag_news_samples_per_second": 50.53,
      "eval_ag_news_steps_per_second": 0.101,
      "eval_ag_news_token_set_f1": 0.3412295947698137,
      "eval_ag_news_token_set_f1_sem": 0.00443394131806274,
      "eval_ag_news_token_set_precision": 0.3225903596697039,
      "eval_ag_news_token_set_recall": 0.37946670280096373,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 67500
    },
    {
      "epoch": 12.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.108625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8250688312923695,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10556560110337446,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6539926528930664,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00944554009794408,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.400261163711548,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.864,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.726,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.574,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.578,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.971926604940506,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.1998104341574898,
      "eval_anthropic_toxic_prompts_runtime": 10.228,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.885,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3479491034903623,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006443801240407627,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41329762194510244,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3311717031414803,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 67500
    },
    {
      "epoch": 12.96,
      "eval_arxiv_accuracy": 0.33596875,
      "eval_arxiv_bleu_score": 3.9865242934319967,
      "eval_arxiv_bleu_score_sem": 0.11830888829685494,
      "eval_arxiv_emb_cos_sim": 0.7328368425369263,
      "eval_arxiv_emb_cos_sim_sem": 0.007983401385739384,
      "eval_arxiv_emb_top1_equal": 0.125,
      "eval_arxiv_emb_top1_equal_sem": 0.02934655822437397,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.544830083847046,
      "eval_arxiv_n_ngrams_match_1": 14.01,
      "eval_arxiv_n_ngrams_match_2": 2.712,
      "eval_arxiv_n_ngrams_match_3": 0.59,
      "eval_arxiv_num_pred_words": 39.814,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.633800000209455,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33420571012969413,
      "eval_arxiv_runtime": 9.7206,
      "eval_arxiv_samples_per_second": 51.437,
      "eval_arxiv_steps_per_second": 0.103,
      "eval_arxiv_token_set_f1": 0.32996026233340675,
      "eval_arxiv_token_set_f1_sem": 0.004316633847777857,
      "eval_arxiv_token_set_precision": 0.2782812022263219,
      "eval_arxiv_token_set_recall": 0.4296221216268673,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 67500
    },
    {
      "epoch": 12.96,
      "eval_python_code_alpaca_accuracy": 0.1529375,
      "eval_python_code_alpaca_bleu_score": 4.071551624261715,
      "eval_python_code_alpaca_bleu_score_sem": 0.1336623014443639,
      "eval_python_code_alpaca_emb_cos_sim": 0.7235852479934692,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010651427288291557,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.029174327850342,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.048,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.522,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.804,
      "eval_python_code_alpaca_num_pred_words": 44.054,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.68015051393288,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.2987197686221755,
      "eval_python_code_alpaca_runtime": 9.5597,
      "eval_python_code_alpaca_samples_per_second": 52.303,
      "eval_python_code_alpaca_steps_per_second": 0.105,
      "eval_python_code_alpaca_token_set_f1": 0.45357819135085164,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00574702628741405,
      "eval_python_code_alpaca_token_set_precision": 0.489936989337317,
      "eval_python_code_alpaca_token_set_recall": 0.44687850692105435,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 67500
    },
    {
      "epoch": 12.96,
      "eval_wikibio_accuracy": 0.30984375,
      "eval_wikibio_bleu_score": 5.2291804459236655,
      "eval_wikibio_bleu_score_sem": 0.18658081950377017,
      "eval_wikibio_emb_cos_sim": 0.7154478430747986,
      "eval_wikibio_emb_cos_sim_sem": 0.00980031119528453,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.892882823944092,
      "eval_wikibio_n_ngrams_match_1": 9.324,
      "eval_wikibio_n_ngrams_match_2": 3.004,
      "eval_wikibio_n_ngrams_match_3": 1.032,
      "eval_wikibio_num_pred_words": 35.456,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 49.052091435186824,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3233041867106038,
      "eval_wikibio_runtime": 9.5184,
      "eval_wikibio_samples_per_second": 52.53,
      "eval_wikibio_steps_per_second": 0.105,
      "eval_wikibio_token_set_f1": 0.29772587721299637,
      "eval_wikibio_token_set_f1_sem": 0.005766830164159475,
      "eval_wikibio_token_set_precision": 0.30238157146024425,
      "eval_wikibio_token_set_recall": 0.3147604126563878,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 67500
    },
    {
      "epoch": 12.96,
      "eval_nq_accuracy": 0.51184375,
      "eval_nq_bleu_score": 11.119146236730009,
      "eval_nq_bleu_score_sem": 0.4754835950562762,
      "eval_nq_emb_cos_sim": 0.8124707341194153,
      "eval_nq_emb_cos_sim_sem": 0.00822658343329118,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3081138134002686,
      "eval_nq_n_ngrams_match_1": 22.21,
      "eval_nq_n_ngrams_match_2": 8.04,
      "eval_nq_n_ngrams_match_3": 3.716,
      "eval_nq_num_pred_words": 48.796,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.055440319856965,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42727853031246865,
      "eval_nq_runtime": 9.8735,
      "eval_nq_samples_per_second": 50.641,
      "eval_nq_steps_per_second": 0.101,
      "eval_nq_token_set_f1": 0.44714704357924,
      "eval_nq_token_set_f1_sem": 0.005114017566815793,
      "eval_nq_token_set_precision": 0.4008724627693947,
      "eval_nq_token_set_recall": 0.5164713116086179,
      "eval_nq_true_num_tokens": 64.0,
      "step": 67500
    },
    {
      "epoch": 12.96,
      "learning_rate": 0.001,
      "loss": 2.6721,
      "step": 67512
    },
    {
      "epoch": 12.97,
      "learning_rate": 0.001,
      "loss": 2.674,
      "step": 67524
    },
    {
      "epoch": 12.97,
      "learning_rate": 0.001,
      "loss": 2.6899,
      "step": 67536
    },
    {
      "epoch": 12.97,
      "learning_rate": 0.001,
      "loss": 2.6842,
      "step": 67548
    },
    {
      "epoch": 12.97,
      "learning_rate": 0.001,
      "loss": 2.6837,
      "step": 67560
    },
    {
      "epoch": 12.97,
      "learning_rate": 0.001,
      "loss": 2.6807,
      "step": 67572
    },
    {
      "epoch": 12.98,
      "learning_rate": 0.001,
      "loss": 2.6744,
      "step": 67584
    },
    {
      "epoch": 12.98,
      "learning_rate": 0.001,
      "loss": 2.6846,
      "step": 67596
    },
    {
      "epoch": 12.98,
      "learning_rate": 0.001,
      "loss": 2.6828,
      "step": 67608
    },
    {
      "epoch": 12.98,
      "learning_rate": 0.001,
      "loss": 2.6809,
      "step": 67620
    },
    {
      "epoch": 12.99,
      "learning_rate": 0.001,
      "loss": 2.6746,
      "step": 67632
    },
    {
      "epoch": 12.99,
      "learning_rate": 0.001,
      "loss": 2.6822,
      "step": 67644
    },
    {
      "epoch": 12.99,
      "learning_rate": 0.001,
      "loss": 2.6836,
      "step": 67656
    },
    {
      "epoch": 12.99,
      "learning_rate": 0.001,
      "loss": 2.6772,
      "step": 67668
    },
    {
      "epoch": 13.0,
      "learning_rate": 0.001,
      "loss": 2.6781,
      "step": 67680
    },
    {
      "epoch": 13.0,
      "learning_rate": 0.001,
      "loss": 2.6709,
      "step": 67692
    },
    {
      "epoch": 13.0,
      "learning_rate": 0.001,
      "loss": 2.6831,
      "step": 67704
    },
    {
      "epoch": 13.0,
      "learning_rate": 0.001,
      "loss": 2.6599,
      "step": 67716
    },
    {
      "epoch": 13.0,
      "learning_rate": 0.001,
      "loss": 2.6605,
      "step": 67728
    },
    {
      "epoch": 13.01,
      "learning_rate": 0.001,
      "loss": 2.6638,
      "step": 67740
    },
    {
      "epoch": 13.01,
      "learning_rate": 0.001,
      "loss": 2.6605,
      "step": 67752
    },
    {
      "epoch": 13.01,
      "learning_rate": 0.001,
      "loss": 2.6564,
      "step": 67764
    },
    {
      "epoch": 13.01,
      "learning_rate": 0.001,
      "loss": 2.6639,
      "step": 67776
    },
    {
      "epoch": 13.02,
      "learning_rate": 0.001,
      "loss": 2.665,
      "step": 67788
    },
    {
      "epoch": 13.02,
      "learning_rate": 0.001,
      "loss": 2.663,
      "step": 67800
    },
    {
      "epoch": 13.02,
      "learning_rate": 0.001,
      "loss": 2.6563,
      "step": 67812
    },
    {
      "epoch": 13.02,
      "learning_rate": 0.001,
      "loss": 2.6696,
      "step": 67824
    },
    {
      "epoch": 13.03,
      "learning_rate": 0.001,
      "loss": 2.6653,
      "step": 67836
    },
    {
      "epoch": 13.03,
      "learning_rate": 0.001,
      "loss": 2.6685,
      "step": 67848
    },
    {
      "epoch": 13.03,
      "learning_rate": 0.001,
      "loss": 2.6593,
      "step": 67860
    },
    {
      "epoch": 13.03,
      "learning_rate": 0.001,
      "loss": 2.6686,
      "step": 67872
    },
    {
      "epoch": 13.03,
      "learning_rate": 0.001,
      "loss": 2.6686,
      "step": 67884
    },
    {
      "epoch": 13.04,
      "learning_rate": 0.001,
      "loss": 2.6651,
      "step": 67896
    },
    {
      "epoch": 13.04,
      "learning_rate": 0.001,
      "loss": 2.6659,
      "step": 67908
    },
    {
      "epoch": 13.04,
      "learning_rate": 0.001,
      "loss": 2.6617,
      "step": 67920
    },
    {
      "epoch": 13.04,
      "learning_rate": 0.001,
      "loss": 2.6548,
      "step": 67932
    },
    {
      "epoch": 13.05,
      "learning_rate": 0.001,
      "loss": 2.672,
      "step": 67944
    },
    {
      "epoch": 13.05,
      "learning_rate": 0.001,
      "loss": 2.6747,
      "step": 67956
    },
    {
      "epoch": 13.05,
      "learning_rate": 0.001,
      "loss": 2.6748,
      "step": 67968
    },
    {
      "epoch": 13.05,
      "learning_rate": 0.001,
      "loss": 2.6701,
      "step": 67980
    },
    {
      "epoch": 13.06,
      "learning_rate": 0.001,
      "loss": 2.673,
      "step": 67992
    },
    {
      "epoch": 13.06,
      "learning_rate": 0.001,
      "loss": 2.6646,
      "step": 68004
    },
    {
      "epoch": 13.06,
      "learning_rate": 0.001,
      "loss": 2.6697,
      "step": 68016
    },
    {
      "epoch": 13.06,
      "learning_rate": 0.001,
      "loss": 2.6748,
      "step": 68028
    },
    {
      "epoch": 13.06,
      "learning_rate": 0.001,
      "loss": 2.6532,
      "step": 68040
    },
    {
      "epoch": 13.07,
      "learning_rate": 0.001,
      "loss": 2.6662,
      "step": 68052
    },
    {
      "epoch": 13.07,
      "learning_rate": 0.001,
      "loss": 2.6549,
      "step": 68064
    },
    {
      "epoch": 13.07,
      "learning_rate": 0.001,
      "loss": 2.6803,
      "step": 68076
    },
    {
      "epoch": 13.07,
      "learning_rate": 0.001,
      "loss": 2.6693,
      "step": 68088
    },
    {
      "epoch": 13.08,
      "learning_rate": 0.001,
      "loss": 2.6576,
      "step": 68100
    },
    {
      "epoch": 13.08,
      "learning_rate": 0.001,
      "loss": 2.6649,
      "step": 68112
    },
    {
      "epoch": 13.08,
      "learning_rate": 0.001,
      "loss": 2.6607,
      "step": 68124
    },
    {
      "epoch": 13.08,
      "eval_ag_news_accuracy": 0.309625,
      "eval_ag_news_bleu_score": 4.7448037431196735,
      "eval_ag_news_bleu_score_sem": 0.160589362417903,
      "eval_ag_news_emb_cos_sim": 0.7907446026802063,
      "eval_ag_news_emb_cos_sim_sem": 0.007665262485199981,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.701359748840332,
      "eval_ag_news_n_ngrams_match_1": 13.568,
      "eval_ag_news_n_ngrams_match_2": 2.956,
      "eval_ag_news_n_ngrams_match_3": 0.882,
      "eval_ag_news_num_pred_words": 46.826,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.50233994407163,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3312926724207253,
      "eval_ag_news_runtime": 9.7446,
      "eval_ag_news_samples_per_second": 51.311,
      "eval_ag_news_steps_per_second": 0.103,
      "eval_ag_news_token_set_f1": 0.33965231379733674,
      "eval_ag_news_token_set_f1_sem": 0.004385185917675816,
      "eval_ag_news_token_set_precision": 0.3213535078222124,
      "eval_ag_news_token_set_recall": 0.37530165885678113,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 68125
    },
    {
      "epoch": 13.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.10921875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8766572105496078,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12002093063810151,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6464630365371704,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009594136714903334,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3689913749694824,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.648,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.682,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.582,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.502,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.04921251373265,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19510746121215689,
      "eval_anthropic_toxic_prompts_runtime": 10.6897,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.774,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.33532634323936267,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006168976290780714,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.40212052115917796,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32078323402372705,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 68125
    },
    {
      "epoch": 13.08,
      "eval_arxiv_accuracy": 0.33575,
      "eval_arxiv_bleu_score": 4.04035494667653,
      "eval_arxiv_bleu_score_sem": 0.11102423848290659,
      "eval_arxiv_emb_cos_sim": 0.7298687100410461,
      "eval_arxiv_emb_cos_sim_sem": 0.009560354698732836,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5450620651245117,
      "eval_arxiv_n_ngrams_match_1": 14.162,
      "eval_arxiv_n_ngrams_match_2": 2.762,
      "eval_arxiv_n_ngrams_match_3": 0.596,
      "eval_arxiv_num_pred_words": 40.256,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.64183532536346,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3356571932925493,
      "eval_arxiv_runtime": 10.9475,
      "eval_arxiv_samples_per_second": 45.672,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.3320950244764943,
      "eval_arxiv_token_set_f1_sem": 0.004129027109094983,
      "eval_arxiv_token_set_precision": 0.2800544516277869,
      "eval_arxiv_token_set_recall": 0.4308921783799877,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 68125
    },
    {
      "epoch": 13.08,
      "eval_python_code_alpaca_accuracy": 0.1523125,
      "eval_python_code_alpaca_bleu_score": 3.807709214154026,
      "eval_python_code_alpaca_bleu_score_sem": 0.11969278969621434,
      "eval_python_code_alpaca_emb_cos_sim": 0.7322779297828674,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008263473779539355,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0557587146759033,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.094,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.458,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.742,
      "eval_python_code_alpaca_num_pred_words": 45.214,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.237292461322472,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29347032422015207,
      "eval_python_code_alpaca_runtime": 10.2936,
      "eval_python_code_alpaca_samples_per_second": 48.574,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.4547427611928941,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0058399147795201055,
      "eval_python_code_alpaca_token_set_precision": 0.48975050999984454,
      "eval_python_code_alpaca_token_set_recall": 0.44880822517963664,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 68125
    },
    {
      "epoch": 13.08,
      "eval_wikibio_accuracy": 0.30815625,
      "eval_wikibio_bleu_score": 5.54789595361968,
      "eval_wikibio_bleu_score_sem": 0.20018210865217148,
      "eval_wikibio_emb_cos_sim": 0.7136615514755249,
      "eval_wikibio_emb_cos_sim_sem": 0.009998222259179563,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8820266723632812,
      "eval_wikibio_n_ngrams_match_1": 9.576,
      "eval_wikibio_n_ngrams_match_2": 3.124,
      "eval_wikibio_n_ngrams_match_3": 1.126,
      "eval_wikibio_num_pred_words": 35.922,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 48.522454605792824,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33374879998906337,
      "eval_wikibio_runtime": 9.7629,
      "eval_wikibio_samples_per_second": 51.214,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.30359156176689833,
      "eval_wikibio_token_set_f1_sem": 0.005773896479971032,
      "eval_wikibio_token_set_precision": 0.3112087128395294,
      "eval_wikibio_token_set_recall": 0.31372348842232456,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 68125
    },
    {
      "epoch": 13.08,
      "eval_nq_accuracy": 0.51140625,
      "eval_nq_bleu_score": 10.867103186483988,
      "eval_nq_bleu_score_sem": 0.4541071049699152,
      "eval_nq_emb_cos_sim": 0.821334958076477,
      "eval_nq_emb_cos_sim_sem": 0.007155670208210825,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3038601875305176,
      "eval_nq_n_ngrams_match_1": 22.416,
      "eval_nq_n_ngrams_match_2": 7.946,
      "eval_nq_n_ngrams_match_3": 3.558,
      "eval_nq_num_pred_words": 49.132,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.012759078151428,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43032634348292514,
      "eval_nq_runtime": 10.1803,
      "eval_nq_samples_per_second": 49.115,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.448369981182756,
      "eval_nq_token_set_f1_sem": 0.004900509370407529,
      "eval_nq_token_set_precision": 0.40386325250842453,
      "eval_nq_token_set_recall": 0.5126012702131447,
      "eval_nq_true_num_tokens": 64.0,
      "step": 68125
    },
    {
      "epoch": 13.08,
      "learning_rate": 0.001,
      "loss": 2.6699,
      "step": 68136
    },
    {
      "epoch": 13.09,
      "learning_rate": 0.001,
      "loss": 2.6603,
      "step": 68148
    },
    {
      "epoch": 13.09,
      "learning_rate": 0.001,
      "loss": 2.6621,
      "step": 68160
    },
    {
      "epoch": 13.09,
      "learning_rate": 0.001,
      "loss": 2.6612,
      "step": 68172
    },
    {
      "epoch": 13.09,
      "learning_rate": 0.001,
      "loss": 2.6615,
      "step": 68184
    },
    {
      "epoch": 13.09,
      "learning_rate": 0.001,
      "loss": 2.6636,
      "step": 68196
    },
    {
      "epoch": 13.1,
      "learning_rate": 0.001,
      "loss": 2.6679,
      "step": 68208
    },
    {
      "epoch": 13.1,
      "learning_rate": 0.001,
      "loss": 2.6694,
      "step": 68220
    },
    {
      "epoch": 13.1,
      "learning_rate": 0.001,
      "loss": 2.6671,
      "step": 68232
    },
    {
      "epoch": 13.1,
      "learning_rate": 0.001,
      "loss": 2.668,
      "step": 68244
    },
    {
      "epoch": 13.11,
      "learning_rate": 0.001,
      "loss": 2.67,
      "step": 68256
    },
    {
      "epoch": 13.11,
      "learning_rate": 0.001,
      "loss": 2.6634,
      "step": 68268
    },
    {
      "epoch": 13.11,
      "learning_rate": 0.001,
      "loss": 2.6695,
      "step": 68280
    },
    {
      "epoch": 13.11,
      "learning_rate": 0.001,
      "loss": 2.6688,
      "step": 68292
    },
    {
      "epoch": 13.12,
      "learning_rate": 0.001,
      "loss": 2.6745,
      "step": 68304
    },
    {
      "epoch": 13.12,
      "learning_rate": 0.001,
      "loss": 2.6649,
      "step": 68316
    },
    {
      "epoch": 13.12,
      "learning_rate": 0.001,
      "loss": 2.6722,
      "step": 68328
    },
    {
      "epoch": 13.12,
      "learning_rate": 0.001,
      "loss": 2.6601,
      "step": 68340
    },
    {
      "epoch": 13.12,
      "learning_rate": 0.001,
      "loss": 2.6636,
      "step": 68352
    },
    {
      "epoch": 13.13,
      "learning_rate": 0.001,
      "loss": 2.6732,
      "step": 68364
    },
    {
      "epoch": 13.13,
      "learning_rate": 0.001,
      "loss": 2.6771,
      "step": 68376
    },
    {
      "epoch": 13.13,
      "learning_rate": 0.001,
      "loss": 2.6675,
      "step": 68388
    },
    {
      "epoch": 13.13,
      "learning_rate": 0.001,
      "loss": 2.668,
      "step": 68400
    },
    {
      "epoch": 13.14,
      "learning_rate": 0.001,
      "loss": 2.6699,
      "step": 68412
    },
    {
      "epoch": 13.14,
      "learning_rate": 0.001,
      "loss": 2.6767,
      "step": 68424
    },
    {
      "epoch": 13.14,
      "learning_rate": 0.001,
      "loss": 2.6762,
      "step": 68436
    },
    {
      "epoch": 13.14,
      "learning_rate": 0.001,
      "loss": 2.6668,
      "step": 68448
    },
    {
      "epoch": 13.15,
      "learning_rate": 0.001,
      "loss": 2.6665,
      "step": 68460
    },
    {
      "epoch": 13.15,
      "learning_rate": 0.001,
      "loss": 2.6708,
      "step": 68472
    },
    {
      "epoch": 13.15,
      "learning_rate": 0.001,
      "loss": 2.6719,
      "step": 68484
    },
    {
      "epoch": 13.15,
      "learning_rate": 0.001,
      "loss": 2.6707,
      "step": 68496
    },
    {
      "epoch": 13.15,
      "learning_rate": 0.001,
      "loss": 2.6661,
      "step": 68508
    },
    {
      "epoch": 13.16,
      "learning_rate": 0.001,
      "loss": 2.6641,
      "step": 68520
    },
    {
      "epoch": 13.16,
      "learning_rate": 0.001,
      "loss": 2.6714,
      "step": 68532
    },
    {
      "epoch": 13.16,
      "learning_rate": 0.001,
      "loss": 2.6817,
      "step": 68544
    },
    {
      "epoch": 13.16,
      "learning_rate": 0.001,
      "loss": 2.6605,
      "step": 68556
    },
    {
      "epoch": 13.17,
      "learning_rate": 0.001,
      "loss": 2.6603,
      "step": 68568
    },
    {
      "epoch": 13.17,
      "learning_rate": 0.001,
      "loss": 2.6605,
      "step": 68580
    },
    {
      "epoch": 13.17,
      "learning_rate": 0.001,
      "loss": 2.6548,
      "step": 68592
    },
    {
      "epoch": 13.17,
      "learning_rate": 0.001,
      "loss": 2.6625,
      "step": 68604
    },
    {
      "epoch": 13.18,
      "learning_rate": 0.001,
      "loss": 2.668,
      "step": 68616
    },
    {
      "epoch": 13.18,
      "learning_rate": 0.001,
      "loss": 2.6748,
      "step": 68628
    },
    {
      "epoch": 13.18,
      "learning_rate": 0.001,
      "loss": 2.6626,
      "step": 68640
    },
    {
      "epoch": 13.18,
      "learning_rate": 0.001,
      "loss": 2.6631,
      "step": 68652
    },
    {
      "epoch": 13.18,
      "learning_rate": 0.001,
      "loss": 2.6686,
      "step": 68664
    },
    {
      "epoch": 13.19,
      "learning_rate": 0.001,
      "loss": 2.6647,
      "step": 68676
    },
    {
      "epoch": 13.19,
      "learning_rate": 0.001,
      "loss": 2.6649,
      "step": 68688
    },
    {
      "epoch": 13.19,
      "learning_rate": 0.001,
      "loss": 2.6665,
      "step": 68700
    },
    {
      "epoch": 13.19,
      "learning_rate": 0.001,
      "loss": 2.6709,
      "step": 68712
    },
    {
      "epoch": 13.2,
      "learning_rate": 0.001,
      "loss": 2.6636,
      "step": 68724
    },
    {
      "epoch": 13.2,
      "learning_rate": 0.001,
      "loss": 2.6579,
      "step": 68736
    },
    {
      "epoch": 13.2,
      "learning_rate": 0.001,
      "loss": 2.6651,
      "step": 68748
    },
    {
      "epoch": 13.2,
      "eval_ag_news_accuracy": 0.310625,
      "eval_ag_news_bleu_score": 4.6155978527157675,
      "eval_ag_news_bleu_score_sem": 0.1552693501920158,
      "eval_ag_news_emb_cos_sim": 0.7810306549072266,
      "eval_ag_news_emb_cos_sim_sem": 0.008831619882911286,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.696045160293579,
      "eval_ag_news_n_ngrams_match_1": 13.328,
      "eval_ag_news_n_ngrams_match_2": 2.858,
      "eval_ag_news_n_ngrams_match_3": 0.83,
      "eval_ag_news_num_pred_words": 46.072,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.28765765142058,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3283812898810733,
      "eval_ag_news_runtime": 9.9896,
      "eval_ag_news_samples_per_second": 50.052,
      "eval_ag_news_steps_per_second": 0.1,
      "eval_ag_news_token_set_f1": 0.3341037532538496,
      "eval_ag_news_token_set_f1_sem": 0.004429807422104174,
      "eval_ag_news_token_set_precision": 0.3158248782973446,
      "eval_ag_news_token_set_recall": 0.3715378578222612,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 68750
    },
    {
      "epoch": 13.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.10865625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8600628603223934,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11390856853425778,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6436045169830322,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011180452555166774,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.362196207046509,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.908,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.74,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.62,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.882,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.852487383787974,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20083028314594234,
      "eval_anthropic_toxic_prompts_runtime": 10.4342,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.919,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3457827278665309,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006685697604567867,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4139586110775843,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3258544196661757,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 68750
    },
    {
      "epoch": 13.2,
      "eval_arxiv_accuracy": 0.3361875,
      "eval_arxiv_bleu_score": 4.030972821543451,
      "eval_arxiv_bleu_score_sem": 0.11366599754262402,
      "eval_arxiv_emb_cos_sim": 0.7315679788589478,
      "eval_arxiv_emb_cos_sim_sem": 0.008143049038087095,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5502352714538574,
      "eval_arxiv_n_ngrams_match_1": 14.044,
      "eval_arxiv_n_ngrams_match_2": 2.714,
      "eval_arxiv_n_ngrams_match_3": 0.556,
      "eval_arxiv_num_pred_words": 39.36,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.821509031001305,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3381924420260421,
      "eval_arxiv_runtime": 9.4521,
      "eval_arxiv_samples_per_second": 52.899,
      "eval_arxiv_steps_per_second": 0.106,
      "eval_arxiv_token_set_f1": 0.3302554859161884,
      "eval_arxiv_token_set_f1_sem": 0.004087113864123824,
      "eval_arxiv_token_set_precision": 0.2769591943860666,
      "eval_arxiv_token_set_recall": 0.42862873582586186,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 68750
    },
    {
      "epoch": 13.2,
      "eval_python_code_alpaca_accuracy": 0.15275,
      "eval_python_code_alpaca_bleu_score": 3.9426884987931303,
      "eval_python_code_alpaca_bleu_score_sem": 0.12794384498963288,
      "eval_python_code_alpaca_emb_cos_sim": 0.7394332885742188,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008318306829452029,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0522122383117676,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.932,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.364,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.688,
      "eval_python_code_alpaca_num_pred_words": 42.928,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.162108303791317,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30114324266018666,
      "eval_python_code_alpaca_runtime": 9.5793,
      "eval_python_code_alpaca_samples_per_second": 52.196,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.4469390430769559,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005585685118982306,
      "eval_python_code_alpaca_token_set_precision": 0.4918287461633235,
      "eval_python_code_alpaca_token_set_recall": 0.4340804132285679,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 68750
    },
    {
      "epoch": 13.2,
      "eval_wikibio_accuracy": 0.31175,
      "eval_wikibio_bleu_score": 5.472833364398747,
      "eval_wikibio_bleu_score_sem": 0.2225170765844248,
      "eval_wikibio_emb_cos_sim": 0.6997572779655457,
      "eval_wikibio_emb_cos_sim_sem": 0.011623450177330687,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.897064447402954,
      "eval_wikibio_n_ngrams_match_1": 9.448,
      "eval_wikibio_n_ngrams_match_2": 3.07,
      "eval_wikibio_n_ngrams_match_3": 1.076,
      "eval_wikibio_num_pred_words": 35.5,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 49.25763827165962,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3291576614584369,
      "eval_wikibio_runtime": 10.0306,
      "eval_wikibio_samples_per_second": 49.847,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3053627003733574,
      "eval_wikibio_token_set_f1_sem": 0.005795180778443444,
      "eval_wikibio_token_set_precision": 0.30806992090280705,
      "eval_wikibio_token_set_recall": 0.3209511001358559,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 68750
    },
    {
      "epoch": 13.2,
      "eval_nq_accuracy": 0.5131875,
      "eval_nq_bleu_score": 11.00189847339383,
      "eval_nq_bleu_score_sem": 0.44652311316152377,
      "eval_nq_emb_cos_sim": 0.8179467916488647,
      "eval_nq_emb_cos_sim_sem": 0.0073945690822827,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3040249347686768,
      "eval_nq_n_ngrams_match_1": 22.306,
      "eval_nq_n_ngrams_match_2": 7.938,
      "eval_nq_n_ngrams_match_3": 3.58,
      "eval_nq_num_pred_words": 48.882,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 10.014408788444781,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42907752370581764,
      "eval_nq_runtime": 10.1256,
      "eval_nq_samples_per_second": 49.38,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.4464199475182248,
      "eval_nq_token_set_f1_sem": 0.004902545794572234,
      "eval_nq_token_set_precision": 0.3995018268823006,
      "eval_nq_token_set_recall": 0.5150652449000738,
      "eval_nq_true_num_tokens": 64.0,
      "step": 68750
    },
    {
      "epoch": 13.2,
      "learning_rate": 0.001,
      "loss": 2.6642,
      "step": 68760
    },
    {
      "epoch": 13.21,
      "learning_rate": 0.001,
      "loss": 2.6662,
      "step": 68772
    },
    {
      "epoch": 13.21,
      "learning_rate": 0.001,
      "loss": 2.6725,
      "step": 68784
    },
    {
      "epoch": 13.21,
      "learning_rate": 0.001,
      "loss": 2.6595,
      "step": 68796
    },
    {
      "epoch": 13.21,
      "learning_rate": 0.001,
      "loss": 2.6711,
      "step": 68808
    },
    {
      "epoch": 13.21,
      "learning_rate": 0.001,
      "loss": 2.6715,
      "step": 68820
    },
    {
      "epoch": 13.22,
      "learning_rate": 0.001,
      "loss": 2.6571,
      "step": 68832
    },
    {
      "epoch": 13.22,
      "learning_rate": 0.001,
      "loss": 2.6644,
      "step": 68844
    },
    {
      "epoch": 13.22,
      "learning_rate": 0.001,
      "loss": 2.6647,
      "step": 68856
    },
    {
      "epoch": 13.22,
      "learning_rate": 0.001,
      "loss": 2.6854,
      "step": 68868
    },
    {
      "epoch": 13.23,
      "learning_rate": 0.001,
      "loss": 2.6669,
      "step": 68880
    },
    {
      "epoch": 13.23,
      "learning_rate": 0.001,
      "loss": 2.6665,
      "step": 68892
    },
    {
      "epoch": 13.23,
      "learning_rate": 0.001,
      "loss": 2.6722,
      "step": 68904
    },
    {
      "epoch": 13.23,
      "learning_rate": 0.001,
      "loss": 2.6685,
      "step": 68916
    },
    {
      "epoch": 13.24,
      "learning_rate": 0.001,
      "loss": 2.6637,
      "step": 68928
    },
    {
      "epoch": 13.24,
      "learning_rate": 0.001,
      "loss": 2.6648,
      "step": 68940
    },
    {
      "epoch": 13.24,
      "learning_rate": 0.001,
      "loss": 2.6674,
      "step": 68952
    },
    {
      "epoch": 13.24,
      "learning_rate": 0.001,
      "loss": 2.6636,
      "step": 68964
    },
    {
      "epoch": 13.24,
      "learning_rate": 0.001,
      "loss": 2.6746,
      "step": 68976
    },
    {
      "epoch": 13.25,
      "learning_rate": 0.001,
      "loss": 2.6598,
      "step": 68988
    },
    {
      "epoch": 13.25,
      "learning_rate": 0.001,
      "loss": 2.6762,
      "step": 69000
    },
    {
      "epoch": 13.25,
      "learning_rate": 0.001,
      "loss": 2.6702,
      "step": 69012
    },
    {
      "epoch": 13.25,
      "learning_rate": 0.001,
      "loss": 2.6623,
      "step": 69024
    },
    {
      "epoch": 13.26,
      "learning_rate": 0.001,
      "loss": 2.6636,
      "step": 69036
    },
    {
      "epoch": 13.26,
      "learning_rate": 0.001,
      "loss": 2.664,
      "step": 69048
    },
    {
      "epoch": 13.26,
      "learning_rate": 0.001,
      "loss": 2.6669,
      "step": 69060
    },
    {
      "epoch": 13.26,
      "learning_rate": 0.001,
      "loss": 2.6669,
      "step": 69072
    },
    {
      "epoch": 13.26,
      "learning_rate": 0.001,
      "loss": 2.6624,
      "step": 69084
    },
    {
      "epoch": 13.27,
      "learning_rate": 0.001,
      "loss": 2.6658,
      "step": 69096
    },
    {
      "epoch": 13.27,
      "learning_rate": 0.001,
      "loss": 2.6583,
      "step": 69108
    },
    {
      "epoch": 13.27,
      "learning_rate": 0.001,
      "loss": 2.671,
      "step": 69120
    },
    {
      "epoch": 13.27,
      "learning_rate": 0.001,
      "loss": 2.6678,
      "step": 69132
    },
    {
      "epoch": 13.28,
      "learning_rate": 0.001,
      "loss": 2.6701,
      "step": 69144
    },
    {
      "epoch": 13.28,
      "learning_rate": 0.001,
      "loss": 2.6681,
      "step": 69156
    },
    {
      "epoch": 13.28,
      "learning_rate": 0.001,
      "loss": 2.6664,
      "step": 69168
    },
    {
      "epoch": 13.28,
      "learning_rate": 0.001,
      "loss": 2.6621,
      "step": 69180
    },
    {
      "epoch": 13.29,
      "learning_rate": 0.001,
      "loss": 2.6734,
      "step": 69192
    },
    {
      "epoch": 13.29,
      "learning_rate": 0.001,
      "loss": 2.6707,
      "step": 69204
    },
    {
      "epoch": 13.29,
      "learning_rate": 0.001,
      "loss": 2.6621,
      "step": 69216
    },
    {
      "epoch": 13.29,
      "learning_rate": 0.001,
      "loss": 2.6667,
      "step": 69228
    },
    {
      "epoch": 13.29,
      "learning_rate": 0.001,
      "loss": 2.6536,
      "step": 69240
    },
    {
      "epoch": 13.3,
      "learning_rate": 0.001,
      "loss": 2.6631,
      "step": 69252
    },
    {
      "epoch": 13.3,
      "learning_rate": 0.001,
      "loss": 2.6661,
      "step": 69264
    },
    {
      "epoch": 13.3,
      "learning_rate": 0.001,
      "loss": 2.6621,
      "step": 69276
    },
    {
      "epoch": 13.3,
      "learning_rate": 0.001,
      "loss": 2.6574,
      "step": 69288
    },
    {
      "epoch": 13.31,
      "learning_rate": 0.001,
      "loss": 2.6754,
      "step": 69300
    },
    {
      "epoch": 13.31,
      "learning_rate": 0.001,
      "loss": 2.6827,
      "step": 69312
    },
    {
      "epoch": 13.31,
      "learning_rate": 0.001,
      "loss": 2.6756,
      "step": 69324
    },
    {
      "epoch": 13.31,
      "learning_rate": 0.001,
      "loss": 2.6699,
      "step": 69336
    },
    {
      "epoch": 13.32,
      "learning_rate": 0.001,
      "loss": 2.6684,
      "step": 69348
    },
    {
      "epoch": 13.32,
      "learning_rate": 0.001,
      "loss": 2.6604,
      "step": 69360
    },
    {
      "epoch": 13.32,
      "learning_rate": 0.001,
      "loss": 2.6678,
      "step": 69372
    },
    {
      "epoch": 13.32,
      "eval_ag_news_accuracy": 0.310125,
      "eval_ag_news_bleu_score": 4.581901813509112,
      "eval_ag_news_bleu_score_sem": 0.15242013163267312,
      "eval_ag_news_emb_cos_sim": 0.7905258536338806,
      "eval_ag_news_emb_cos_sim_sem": 0.0077469239798233615,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6883883476257324,
      "eval_ag_news_n_ngrams_match_1": 13.472,
      "eval_ag_news_n_ngrams_match_2": 2.842,
      "eval_ag_news_n_ngrams_match_3": 0.822,
      "eval_ag_news_num_pred_words": 46.368,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.980360563393944,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33098513498113397,
      "eval_ag_news_runtime": 10.1598,
      "eval_ag_news_samples_per_second": 49.214,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.33922392287556286,
      "eval_ag_news_token_set_f1_sem": 0.004505298905916181,
      "eval_ag_news_token_set_precision": 0.3201332667459879,
      "eval_ag_news_token_set_recall": 0.3767736654439968,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 69375
    },
    {
      "epoch": 13.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.10884375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.913702358280627,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11300536149323476,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6561837196350098,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009399365591222364,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.375993490219116,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.992,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.77,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.786,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.2533322486271,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20566045886685008,
      "eval_anthropic_toxic_prompts_runtime": 10.0332,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.835,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3432457286718833,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062829507629327,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42217432608134114,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31544592796649684,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 69375
    },
    {
      "epoch": 13.32,
      "eval_arxiv_accuracy": 0.33503125,
      "eval_arxiv_bleu_score": 4.044976229554206,
      "eval_arxiv_bleu_score_sem": 0.10695914419101472,
      "eval_arxiv_emb_cos_sim": 0.7321943044662476,
      "eval_arxiv_emb_cos_sim_sem": 0.009260307991461242,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5358974933624268,
      "eval_arxiv_n_ngrams_match_1": 14.3,
      "eval_arxiv_n_ngrams_match_2": 2.784,
      "eval_arxiv_n_ngrams_match_3": 0.604,
      "eval_arxiv_num_pred_words": 40.908,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.325808078652294,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33809581914191467,
      "eval_arxiv_runtime": 10.3364,
      "eval_arxiv_samples_per_second": 48.373,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3347097694629011,
      "eval_arxiv_token_set_f1_sem": 0.004218430772697236,
      "eval_arxiv_token_set_precision": 0.2829724458053254,
      "eval_arxiv_token_set_recall": 0.4284973973118188,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 69375
    },
    {
      "epoch": 13.32,
      "eval_python_code_alpaca_accuracy": 0.1530625,
      "eval_python_code_alpaca_bleu_score": 3.998613167735816,
      "eval_python_code_alpaca_bleu_score_sem": 0.12504761548996998,
      "eval_python_code_alpaca_emb_cos_sim": 0.7307700514793396,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009256189454108317,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.031555652618408,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.092,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.464,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.706,
      "eval_python_code_alpaca_num_pred_words": 42.87,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.72945535067407,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3084186231728019,
      "eval_python_code_alpaca_runtime": 10.1751,
      "eval_python_code_alpaca_samples_per_second": 49.139,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.44877587607077674,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005642050858873661,
      "eval_python_code_alpaca_token_set_precision": 0.4951120972017968,
      "eval_python_code_alpaca_token_set_recall": 0.43973178737615537,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 69375
    },
    {
      "epoch": 13.32,
      "eval_wikibio_accuracy": 0.31128125,
      "eval_wikibio_bleu_score": 5.684142259335463,
      "eval_wikibio_bleu_score_sem": 0.20882331101485738,
      "eval_wikibio_emb_cos_sim": 0.719696581363678,
      "eval_wikibio_emb_cos_sim_sem": 0.010332893919591872,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.928328037261963,
      "eval_wikibio_n_ngrams_match_1": 9.95,
      "eval_wikibio_n_ngrams_match_2": 3.33,
      "eval_wikibio_n_ngrams_match_3": 1.2,
      "eval_wikibio_num_pred_words": 37.204,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 50.821934213987845,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34082381137321227,
      "eval_wikibio_runtime": 9.5101,
      "eval_wikibio_samples_per_second": 52.576,
      "eval_wikibio_steps_per_second": 0.105,
      "eval_wikibio_token_set_f1": 0.31031726279902844,
      "eval_wikibio_token_set_f1_sem": 0.005588077460700187,
      "eval_wikibio_token_set_precision": 0.3217614520503324,
      "eval_wikibio_token_set_recall": 0.3139498032925318,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 69375
    },
    {
      "epoch": 13.32,
      "eval_nq_accuracy": 0.510875,
      "eval_nq_bleu_score": 10.918513871697016,
      "eval_nq_bleu_score_sem": 0.45210457420266803,
      "eval_nq_emb_cos_sim": 0.8174967765808105,
      "eval_nq_emb_cos_sim_sem": 0.007653251291918884,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.300996780395508,
      "eval_nq_n_ngrams_match_1": 22.268,
      "eval_nq_n_ngrams_match_2": 7.998,
      "eval_nq_n_ngrams_match_3": 3.62,
      "eval_nq_num_pred_words": 49.196,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.984129481023668,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.42666553562390397,
      "eval_nq_runtime": 9.8801,
      "eval_nq_samples_per_second": 50.607,
      "eval_nq_steps_per_second": 0.101,
      "eval_nq_token_set_f1": 0.44503201543651233,
      "eval_nq_token_set_f1_sem": 0.005089316551111861,
      "eval_nq_token_set_precision": 0.40013100551289443,
      "eval_nq_token_set_recall": 0.5114309493525862,
      "eval_nq_true_num_tokens": 64.0,
      "step": 69375
    },
    {
      "epoch": 13.32,
      "learning_rate": 0.001,
      "loss": 2.6693,
      "step": 69384
    },
    {
      "epoch": 13.32,
      "learning_rate": 0.001,
      "loss": 2.6669,
      "step": 69396
    },
    {
      "epoch": 13.33,
      "learning_rate": 0.001,
      "loss": 2.6726,
      "step": 69408
    },
    {
      "epoch": 13.33,
      "learning_rate": 0.001,
      "loss": 2.6701,
      "step": 69420
    },
    {
      "epoch": 13.33,
      "learning_rate": 0.001,
      "loss": 2.6811,
      "step": 69432
    },
    {
      "epoch": 13.33,
      "learning_rate": 0.001,
      "loss": 2.6685,
      "step": 69444
    },
    {
      "epoch": 13.34,
      "learning_rate": 0.001,
      "loss": 2.6592,
      "step": 69456
    },
    {
      "epoch": 13.34,
      "learning_rate": 0.001,
      "loss": 2.6515,
      "step": 69468
    },
    {
      "epoch": 13.34,
      "learning_rate": 0.001,
      "loss": 2.6728,
      "step": 69480
    },
    {
      "epoch": 13.34,
      "learning_rate": 0.001,
      "loss": 2.6761,
      "step": 69492
    },
    {
      "epoch": 13.35,
      "learning_rate": 0.001,
      "loss": 2.6652,
      "step": 69504
    },
    {
      "epoch": 13.35,
      "learning_rate": 0.001,
      "loss": 2.6584,
      "step": 69516
    },
    {
      "epoch": 13.35,
      "learning_rate": 0.001,
      "loss": 2.6581,
      "step": 69528
    },
    {
      "epoch": 13.35,
      "learning_rate": 0.001,
      "loss": 2.6735,
      "step": 69540
    },
    {
      "epoch": 13.35,
      "learning_rate": 0.001,
      "loss": 2.6673,
      "step": 69552
    },
    {
      "epoch": 13.36,
      "learning_rate": 0.001,
      "loss": 2.6744,
      "step": 69564
    },
    {
      "epoch": 13.36,
      "learning_rate": 0.001,
      "loss": 2.6665,
      "step": 69576
    },
    {
      "epoch": 13.36,
      "learning_rate": 0.001,
      "loss": 2.6696,
      "step": 69588
    },
    {
      "epoch": 13.36,
      "learning_rate": 0.001,
      "loss": 2.6674,
      "step": 69600
    },
    {
      "epoch": 13.37,
      "learning_rate": 0.001,
      "loss": 2.655,
      "step": 69612
    },
    {
      "epoch": 13.37,
      "learning_rate": 0.001,
      "loss": 2.6592,
      "step": 69624
    },
    {
      "epoch": 13.37,
      "learning_rate": 0.001,
      "loss": 2.6639,
      "step": 69636
    },
    {
      "epoch": 13.37,
      "learning_rate": 0.001,
      "loss": 2.674,
      "step": 69648
    },
    {
      "epoch": 13.38,
      "learning_rate": 0.001,
      "loss": 2.6659,
      "step": 69660
    },
    {
      "epoch": 13.38,
      "learning_rate": 0.001,
      "loss": 2.6624,
      "step": 69672
    },
    {
      "epoch": 13.38,
      "learning_rate": 0.001,
      "loss": 2.6572,
      "step": 69684
    },
    {
      "epoch": 13.38,
      "learning_rate": 0.001,
      "loss": 2.6686,
      "step": 69696
    },
    {
      "epoch": 13.38,
      "learning_rate": 0.001,
      "loss": 2.6699,
      "step": 69708
    },
    {
      "epoch": 13.39,
      "learning_rate": 0.001,
      "loss": 2.6658,
      "step": 69720
    },
    {
      "epoch": 13.39,
      "learning_rate": 0.001,
      "loss": 2.6625,
      "step": 69732
    },
    {
      "epoch": 13.39,
      "learning_rate": 0.001,
      "loss": 2.6666,
      "step": 69744
    },
    {
      "epoch": 13.39,
      "learning_rate": 0.001,
      "loss": 2.6587,
      "step": 69756
    },
    {
      "epoch": 13.4,
      "learning_rate": 0.001,
      "loss": 2.6698,
      "step": 69768
    },
    {
      "epoch": 13.4,
      "learning_rate": 0.001,
      "loss": 2.6676,
      "step": 69780
    },
    {
      "epoch": 13.4,
      "learning_rate": 0.001,
      "loss": 2.6656,
      "step": 69792
    },
    {
      "epoch": 13.4,
      "learning_rate": 0.001,
      "loss": 2.6658,
      "step": 69804
    },
    {
      "epoch": 13.41,
      "learning_rate": 0.001,
      "loss": 2.6683,
      "step": 69816
    },
    {
      "epoch": 13.41,
      "learning_rate": 0.001,
      "loss": 2.6737,
      "step": 69828
    },
    {
      "epoch": 13.41,
      "learning_rate": 0.001,
      "loss": 2.6731,
      "step": 69840
    },
    {
      "epoch": 13.41,
      "learning_rate": 0.001,
      "loss": 2.6654,
      "step": 69852
    },
    {
      "epoch": 13.41,
      "learning_rate": 0.001,
      "loss": 2.6848,
      "step": 69864
    },
    {
      "epoch": 13.42,
      "learning_rate": 0.001,
      "loss": 2.6713,
      "step": 69876
    },
    {
      "epoch": 13.42,
      "learning_rate": 0.001,
      "loss": 2.6709,
      "step": 69888
    },
    {
      "epoch": 13.42,
      "learning_rate": 0.001,
      "loss": 2.6807,
      "step": 69900
    },
    {
      "epoch": 13.42,
      "learning_rate": 0.001,
      "loss": 2.6669,
      "step": 69912
    },
    {
      "epoch": 13.43,
      "learning_rate": 0.001,
      "loss": 2.6664,
      "step": 69924
    },
    {
      "epoch": 13.43,
      "learning_rate": 0.001,
      "loss": 2.6708,
      "step": 69936
    },
    {
      "epoch": 13.43,
      "learning_rate": 0.001,
      "loss": 2.6699,
      "step": 69948
    },
    {
      "epoch": 13.43,
      "learning_rate": 0.001,
      "loss": 2.674,
      "step": 69960
    },
    {
      "epoch": 13.44,
      "learning_rate": 0.001,
      "loss": 2.6645,
      "step": 69972
    },
    {
      "epoch": 13.44,
      "learning_rate": 0.001,
      "loss": 2.6635,
      "step": 69984
    },
    {
      "epoch": 13.44,
      "learning_rate": 0.001,
      "loss": 2.6682,
      "step": 69996
    },
    {
      "epoch": 13.44,
      "eval_ag_news_accuracy": 0.31046875,
      "eval_ag_news_bleu_score": 4.573999983592996,
      "eval_ag_news_bleu_score_sem": 0.14607512891896027,
      "eval_ag_news_emb_cos_sim": 0.7891677618026733,
      "eval_ag_news_emb_cos_sim_sem": 0.007683784049486994,
      "eval_ag_news_emb_top1_equal": 0.1953125,
      "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.693073272705078,
      "eval_ag_news_n_ngrams_match_1": 13.536,
      "eval_ag_news_n_ngrams_match_2": 2.978,
      "eval_ag_news_n_ngrams_match_3": 0.842,
      "eval_ag_news_num_pred_words": 46.744,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.16810499819159,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.331465413135851,
      "eval_ag_news_runtime": 9.8732,
      "eval_ag_news_samples_per_second": 50.642,
      "eval_ag_news_steps_per_second": 0.101,
      "eval_ag_news_token_set_f1": 0.3381465570788198,
      "eval_ag_news_token_set_f1_sem": 0.0045067678996548325,
      "eval_ag_news_token_set_precision": 0.31930283102408513,
      "eval_ag_news_token_set_recall": 0.37345723278823784,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 70000
    },
    {
      "epoch": 13.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.1089375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8453579870685455,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11041804017860082,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6511286497116089,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00983327990898767,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3859524726867676,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.91,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.716,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.63,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.428,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.54612119165763,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9765625,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20057807952564702,
      "eval_anthropic_toxic_prompts_runtime": 9.388,
      "eval_anthropic_toxic_prompts_samples_per_second": 53.26,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.107,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3402589784710454,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006507210742780276,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41184701315785754,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31321571923591446,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 70000
    },
    {
      "epoch": 13.44,
      "eval_arxiv_accuracy": 0.33609375,
      "eval_arxiv_bleu_score": 4.107066981436177,
      "eval_arxiv_bleu_score_sem": 0.11677242817579854,
      "eval_arxiv_emb_cos_sim": 0.734245777130127,
      "eval_arxiv_emb_cos_sim_sem": 0.008520608409581779,
      "eval_arxiv_emb_top1_equal": 0.21875,
      "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5367684364318848,
      "eval_arxiv_n_ngrams_match_1": 14.502,
      "eval_arxiv_n_ngrams_match_2": 2.758,
      "eval_arxiv_n_ngrams_match_3": 0.63,
      "eval_arxiv_num_pred_words": 40.782,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.35571692586295,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3392695386136979,
      "eval_arxiv_runtime": 10.1355,
      "eval_arxiv_samples_per_second": 49.332,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.336308068725055,
      "eval_arxiv_token_set_f1_sem": 0.004249186490275496,
      "eval_arxiv_token_set_precision": 0.2877258366278578,
      "eval_arxiv_token_set_recall": 0.42504069226446467,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 70000
    },
    {
      "epoch": 13.44,
      "eval_python_code_alpaca_accuracy": 0.15359375,
      "eval_python_code_alpaca_bleu_score": 4.035798468787673,
      "eval_python_code_alpaca_bleu_score_sem": 0.13010785153561488,
      "eval_python_code_alpaca_emb_cos_sim": 0.7340409755706787,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007725683285262887,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0539517402648926,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.184,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.476,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.724,
      "eval_python_code_alpaca_num_pred_words": 43.642,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.198951867953607,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31022072658054345,
      "eval_python_code_alpaca_runtime": 10.0354,
      "eval_python_code_alpaca_samples_per_second": 49.823,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4504469738901408,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005308590798637979,
      "eval_python_code_alpaca_token_set_precision": 0.49919039450371916,
      "eval_python_code_alpaca_token_set_recall": 0.4351724132169895,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 70000
    },
    {
      "epoch": 13.44,
      "eval_wikibio_accuracy": 0.31009375,
      "eval_wikibio_bleu_score": 5.706691967496832,
      "eval_wikibio_bleu_score_sem": 0.21379787769856498,
      "eval_wikibio_emb_cos_sim": 0.706870436668396,
      "eval_wikibio_emb_cos_sim_sem": 0.011782312186080373,
      "eval_wikibio_emb_top1_equal": 0.1171875,
      "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9185338020324707,
      "eval_wikibio_n_ngrams_match_1": 9.928,
      "eval_wikibio_n_ngrams_match_2": 3.302,
      "eval_wikibio_n_ngrams_match_3": 1.16,
      "eval_wikibio_num_pred_words": 37.016,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 50.32660189573987,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3437490115546855,
      "eval_wikibio_runtime": 11.5447,
      "eval_wikibio_samples_per_second": 43.31,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.31203896200058007,
      "eval_wikibio_token_set_f1_sem": 0.005555645412727883,
      "eval_wikibio_token_set_precision": 0.3215287833226996,
      "eval_wikibio_token_set_recall": 0.3168401352548879,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 70000
    },
    {
      "epoch": 13.44,
      "eval_nq_accuracy": 0.51159375,
      "eval_nq_bleu_score": 10.973708463687188,
      "eval_nq_bleu_score_sem": 0.4487555880509462,
      "eval_nq_emb_cos_sim": 0.8272305130958557,
      "eval_nq_emb_cos_sim_sem": 0.007815281558296475,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.295389175415039,
      "eval_nq_n_ngrams_match_1": 22.356,
      "eval_nq_n_ngrams_match_2": 7.938,
      "eval_nq_n_ngrams_match_3": 3.572,
      "eval_nq_num_pred_words": 48.956,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.928299110452118,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43195680494780875,
      "eval_nq_runtime": 10.5117,
      "eval_nq_samples_per_second": 47.566,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4467195710626159,
      "eval_nq_token_set_f1_sem": 0.004996189643011763,
      "eval_nq_token_set_precision": 0.402965767620914,
      "eval_nq_token_set_recall": 0.5103217045561563,
      "eval_nq_true_num_tokens": 64.0,
      "step": 70000
    },
    {
      "epoch": 13.44,
      "learning_rate": 0.001,
      "loss": 2.6702,
      "step": 70008
    },
    {
      "epoch": 13.44,
      "learning_rate": 0.001,
      "loss": 2.6688,
      "step": 70020
    },
    {
      "epoch": 13.45,
      "learning_rate": 0.001,
      "loss": 2.6746,
      "step": 70032
    },
    {
      "epoch": 13.45,
      "learning_rate": 0.001,
      "loss": 2.6556,
      "step": 70044
    },
    {
      "epoch": 13.45,
      "learning_rate": 0.001,
      "loss": 2.6615,
      "step": 70056
    },
    {
      "epoch": 13.45,
      "learning_rate": 0.001,
      "loss": 2.6685,
      "step": 70068
    },
    {
      "epoch": 13.46,
      "learning_rate": 0.001,
      "loss": 2.6616,
      "step": 70080
    },
    {
      "epoch": 13.46,
      "learning_rate": 0.001,
      "loss": 2.6663,
      "step": 70092
    },
    {
      "epoch": 13.46,
      "learning_rate": 0.001,
      "loss": 2.6654,
      "step": 70104
    },
    {
      "epoch": 13.46,
      "learning_rate": 0.001,
      "loss": 2.6641,
      "step": 70116
    },
    {
      "epoch": 13.47,
      "learning_rate": 0.001,
      "loss": 2.6651,
      "step": 70128
    },
    {
      "epoch": 13.47,
      "learning_rate": 0.001,
      "loss": 2.6752,
      "step": 70140
    },
    {
      "epoch": 13.47,
      "learning_rate": 0.001,
      "loss": 2.6638,
      "step": 70152
    },
    {
      "epoch": 13.47,
      "learning_rate": 0.001,
      "loss": 2.6674,
      "step": 70164
    },
    {
      "epoch": 13.47,
      "learning_rate": 0.001,
      "loss": 2.665,
      "step": 70176
    },
    {
      "epoch": 13.48,
      "learning_rate": 0.001,
      "loss": 2.6732,
      "step": 70188
    },
    {
      "epoch": 13.48,
      "learning_rate": 0.001,
      "loss": 2.6699,
      "step": 70200
    },
    {
      "epoch": 13.48,
      "learning_rate": 0.001,
      "loss": 2.6709,
      "step": 70212
    },
    {
      "epoch": 13.48,
      "learning_rate": 0.001,
      "loss": 2.6648,
      "step": 70224
    },
    {
      "epoch": 13.49,
      "learning_rate": 0.001,
      "loss": 2.6688,
      "step": 70236
    },
    {
      "epoch": 13.49,
      "learning_rate": 0.001,
      "loss": 2.6604,
      "step": 70248
    },
    {
      "epoch": 13.49,
      "learning_rate": 0.001,
      "loss": 2.6731,
      "step": 70260
    },
    {
      "epoch": 13.49,
      "learning_rate": 0.001,
      "loss": 2.6625,
      "step": 70272
    },
    {
      "epoch": 13.5,
      "learning_rate": 0.001,
      "loss": 2.6643,
      "step": 70284
    },
    {
      "epoch": 13.5,
      "learning_rate": 0.001,
      "loss": 2.6589,
      "step": 70296
    },
    {
      "epoch": 13.5,
      "learning_rate": 0.001,
      "loss": 2.6574,
      "step": 70308
    },
    {
      "epoch": 13.5,
      "learning_rate": 0.001,
      "loss": 2.6668,
      "step": 70320
    },
    {
      "epoch": 13.5,
      "learning_rate": 0.001,
      "loss": 2.6688,
      "step": 70332
    },
    {
      "epoch": 13.51,
      "learning_rate": 0.001,
      "loss": 2.6654,
      "step": 70344
    },
    {
      "epoch": 13.51,
      "learning_rate": 0.001,
      "loss": 2.6758,
      "step": 70356
    },
    {
      "epoch": 13.51,
      "learning_rate": 0.001,
      "loss": 2.6628,
      "step": 70368
    },
    {
      "epoch": 13.51,
      "learning_rate": 0.001,
      "loss": 2.6644,
      "step": 70380
    },
    {
      "epoch": 13.52,
      "learning_rate": 0.001,
      "loss": 2.6664,
      "step": 70392
    },
    {
      "epoch": 13.52,
      "learning_rate": 0.001,
      "loss": 2.6609,
      "step": 70404
    },
    {
      "epoch": 13.52,
      "learning_rate": 0.001,
      "loss": 2.6575,
      "step": 70416
    },
    {
      "epoch": 13.52,
      "learning_rate": 0.001,
      "loss": 2.6786,
      "step": 70428
    },
    {
      "epoch": 13.53,
      "learning_rate": 0.001,
      "loss": 2.6648,
      "step": 70440
    },
    {
      "epoch": 13.53,
      "learning_rate": 0.001,
      "loss": 2.6582,
      "step": 70452
    },
    {
      "epoch": 13.53,
      "learning_rate": 0.001,
      "loss": 2.6648,
      "step": 70464
    },
    {
      "epoch": 13.53,
      "learning_rate": 0.001,
      "loss": 2.6545,
      "step": 70476
    },
    {
      "epoch": 13.53,
      "learning_rate": 0.001,
      "loss": 2.6709,
      "step": 70488
    },
    {
      "epoch": 13.54,
      "learning_rate": 0.001,
      "loss": 2.6563,
      "step": 70500
    },
    {
      "epoch": 13.54,
      "learning_rate": 0.001,
      "loss": 2.6644,
      "step": 70512
    },
    {
      "epoch": 13.54,
      "learning_rate": 0.001,
      "loss": 2.6627,
      "step": 70524
    },
    {
      "epoch": 13.54,
      "learning_rate": 0.001,
      "loss": 2.6724,
      "step": 70536
    },
    {
      "epoch": 13.55,
      "learning_rate": 0.001,
      "loss": 2.6639,
      "step": 70548
    },
    {
      "epoch": 13.55,
      "learning_rate": 0.001,
      "loss": 2.6661,
      "step": 70560
    },
    {
      "epoch": 13.55,
      "learning_rate": 0.001,
      "loss": 2.6663,
      "step": 70572
    },
    {
      "epoch": 13.55,
      "learning_rate": 0.001,
      "loss": 2.6606,
      "step": 70584
    },
    {
      "epoch": 13.56,
      "learning_rate": 0.001,
      "loss": 2.6688,
      "step": 70596
    },
    {
      "epoch": 13.56,
      "learning_rate": 0.001,
      "loss": 2.6743,
      "step": 70608
    },
    {
      "epoch": 13.56,
      "learning_rate": 0.001,
      "loss": 2.6646,
      "step": 70620
    },
    {
      "epoch": 13.56,
      "eval_ag_news_accuracy": 0.311125,
      "eval_ag_news_bleu_score": 4.603646543310611,
      "eval_ag_news_bleu_score_sem": 0.1554574416001967,
      "eval_ag_news_emb_cos_sim": 0.7936511039733887,
      "eval_ag_news_emb_cos_sim_sem": 0.007636620408914288,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6890857219696045,
      "eval_ag_news_n_ngrams_match_1": 13.344,
      "eval_ag_news_n_ngrams_match_2": 2.88,
      "eval_ag_news_n_ngrams_match_3": 0.83,
      "eval_ag_news_num_pred_words": 46.178,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 40.008251565213804,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3304506415259759,
      "eval_ag_news_runtime": 9.7421,
      "eval_ag_news_samples_per_second": 51.323,
      "eval_ag_news_steps_per_second": 0.103,
      "eval_ag_news_token_set_f1": 0.3346660807683896,
      "eval_ag_news_token_set_f1_sem": 0.004521146964380114,
      "eval_ag_news_token_set_precision": 0.31481641540733385,
      "eval_ag_news_token_set_recall": 0.37520403392083945,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 70625
    },
    {
      "epoch": 13.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.1094375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.95346093583356,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11451633694222511,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6503604650497437,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008357328225728036,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3711435794830322,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.978,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.766,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.566,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 29.111799686058593,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20569145427292096,
      "eval_anthropic_toxic_prompts_runtime": 9.373,
      "eval_anthropic_toxic_prompts_samples_per_second": 53.345,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.107,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3484633256637001,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006319512916057749,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.420448466137835,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3235423783033733,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 70625
    },
    {
      "epoch": 13.56,
      "eval_arxiv_accuracy": 0.335,
      "eval_arxiv_bleu_score": 4.043129903314623,
      "eval_arxiv_bleu_score_sem": 0.11305360510649545,
      "eval_arxiv_emb_cos_sim": 0.7320790886878967,
      "eval_arxiv_emb_cos_sim_sem": 0.007941114925792797,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.544421911239624,
      "eval_arxiv_n_ngrams_match_1": 14.282,
      "eval_arxiv_n_ngrams_match_2": 2.666,
      "eval_arxiv_n_ngrams_match_3": 0.572,
      "eval_arxiv_num_pred_words": 40.854,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.619666316445944,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33588146033544647,
      "eval_arxiv_runtime": 9.9187,
      "eval_arxiv_samples_per_second": 50.41,
      "eval_arxiv_steps_per_second": 0.101,
      "eval_arxiv_token_set_f1": 0.33192611120256144,
      "eval_arxiv_token_set_f1_sem": 0.00402073157478049,
      "eval_arxiv_token_set_precision": 0.28117471575818076,
      "eval_arxiv_token_set_recall": 0.4201901092794416,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 70625
    },
    {
      "epoch": 13.56,
      "eval_python_code_alpaca_accuracy": 0.15175,
      "eval_python_code_alpaca_bleu_score": 3.978865267523391,
      "eval_python_code_alpaca_bleu_score_sem": 0.12158400088243494,
      "eval_python_code_alpaca_emb_cos_sim": 0.7289148569107056,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009450559524183924,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0616848468780518,
      "eval_python_code_alpaca_n_ngrams_match_1": 8.994,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.426,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.69,
      "eval_python_code_alpaca_num_pred_words": 42.596,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 21.363521118500106,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30567471782513955,
      "eval_python_code_alpaca_runtime": 10.6029,
      "eval_python_code_alpaca_samples_per_second": 47.157,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.4440848260978374,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005813810310104646,
      "eval_python_code_alpaca_token_set_precision": 0.48757103106215893,
      "eval_python_code_alpaca_token_set_recall": 0.4310197499467643,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 70625
    },
    {
      "epoch": 13.56,
      "eval_wikibio_accuracy": 0.30753125,
      "eval_wikibio_bleu_score": 5.573081484533731,
      "eval_wikibio_bleu_score_sem": 0.20239320777496794,
      "eval_wikibio_emb_cos_sim": 0.7133356332778931,
      "eval_wikibio_emb_cos_sim_sem": 0.011062724553474705,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.9174001216888428,
      "eval_wikibio_n_ngrams_match_1": 9.838,
      "eval_wikibio_n_ngrams_match_2": 3.182,
      "eval_wikibio_n_ngrams_match_3": 1.108,
      "eval_wikibio_num_pred_words": 36.462,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 50.269579944848715,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3430982805601802,
      "eval_wikibio_runtime": 9.4774,
      "eval_wikibio_samples_per_second": 52.757,
      "eval_wikibio_steps_per_second": 0.106,
      "eval_wikibio_token_set_f1": 0.3104528276888094,
      "eval_wikibio_token_set_f1_sem": 0.005324313675163187,
      "eval_wikibio_token_set_precision": 0.3172004388108864,
      "eval_wikibio_token_set_recall": 0.32004895169764813,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 70625
    },
    {
      "epoch": 13.56,
      "eval_nq_accuracy": 0.5111875,
      "eval_nq_bleu_score": 10.94347444405283,
      "eval_nq_bleu_score_sem": 0.45145234848873483,
      "eval_nq_emb_cos_sim": 0.8182129263877869,
      "eval_nq_emb_cos_sim_sem": 0.007668962846790335,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.3012492656707764,
      "eval_nq_n_ngrams_match_1": 22.3,
      "eval_nq_n_ngrams_match_2": 8.006,
      "eval_nq_n_ngrams_match_3": 3.596,
      "eval_nq_num_pred_words": 49.174,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.986650644968993,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4297293570062266,
      "eval_nq_runtime": 10.5276,
      "eval_nq_samples_per_second": 47.494,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.44614669768141174,
      "eval_nq_token_set_f1_sem": 0.004936122819480593,
      "eval_nq_token_set_precision": 0.40120142831249606,
      "eval_nq_token_set_recall": 0.5127012666681858,
      "eval_nq_true_num_tokens": 64.0,
      "step": 70625
    },
    {
      "epoch": 13.56,
      "learning_rate": 0.001,
      "loss": 2.6639,
      "step": 70632
    },
    {
      "epoch": 13.56,
      "learning_rate": 0.001,
      "loss": 2.6641,
      "step": 70644
    },
    {
      "epoch": 13.57,
      "learning_rate": 0.001,
      "loss": 2.6658,
      "step": 70656
    },
    {
      "epoch": 13.57,
      "learning_rate": 0.001,
      "loss": 2.6754,
      "step": 70668
    },
    {
      "epoch": 13.57,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 70680
    },
    {
      "epoch": 13.57,
      "learning_rate": 0.001,
      "loss": 2.6688,
      "step": 70692
    },
    {
      "epoch": 13.58,
      "learning_rate": 0.001,
      "loss": 2.6808,
      "step": 70704
    },
    {
      "epoch": 13.58,
      "learning_rate": 0.001,
      "loss": 2.6637,
      "step": 70716
    },
    {
      "epoch": 13.58,
      "learning_rate": 0.001,
      "loss": 2.6631,
      "step": 70728
    },
    {
      "epoch": 13.58,
      "learning_rate": 0.001,
      "loss": 2.6617,
      "step": 70740
    },
    {
      "epoch": 13.59,
      "learning_rate": 0.001,
      "loss": 2.6536,
      "step": 70752
    },
    {
      "epoch": 13.59,
      "learning_rate": 0.001,
      "loss": 2.6718,
      "step": 70764
    },
    {
      "epoch": 13.59,
      "learning_rate": 0.001,
      "loss": 2.6616,
      "step": 70776
    },
    {
      "epoch": 13.59,
      "learning_rate": 0.001,
      "loss": 2.6629,
      "step": 70788
    },
    {
      "epoch": 13.59,
      "learning_rate": 0.001,
      "loss": 2.6657,
      "step": 70800
    },
    {
      "epoch": 13.6,
      "learning_rate": 0.001,
      "loss": 2.6713,
      "step": 70812
    },
    {
      "epoch": 13.6,
      "learning_rate": 0.001,
      "loss": 2.6799,
      "step": 70824
    },
    {
      "epoch": 13.6,
      "learning_rate": 0.001,
      "loss": 2.6655,
      "step": 70836
    },
    {
      "epoch": 13.6,
      "learning_rate": 0.001,
      "loss": 2.6658,
      "step": 70848
    },
    {
      "epoch": 13.61,
      "learning_rate": 0.001,
      "loss": 2.6802,
      "step": 70860
    },
    {
      "epoch": 13.61,
      "learning_rate": 0.001,
      "loss": 2.6671,
      "step": 70872
    },
    {
      "epoch": 13.61,
      "learning_rate": 0.001,
      "loss": 2.6602,
      "step": 70884
    },
    {
      "epoch": 13.61,
      "learning_rate": 0.001,
      "loss": 2.6693,
      "step": 70896
    },
    {
      "epoch": 13.62,
      "learning_rate": 0.001,
      "loss": 2.6638,
      "step": 70908
    },
    {
      "epoch": 13.62,
      "learning_rate": 0.001,
      "loss": 2.6678,
      "step": 70920
    },
    {
      "epoch": 13.62,
      "learning_rate": 0.001,
      "loss": 2.6677,
      "step": 70932
    },
    {
      "epoch": 13.62,
      "learning_rate": 0.001,
      "loss": 2.6596,
      "step": 70944
    },
    {
      "epoch": 13.62,
      "learning_rate": 0.001,
      "loss": 2.6573,
      "step": 70956
    },
    {
      "epoch": 13.63,
      "learning_rate": 0.001,
      "loss": 2.6532,
      "step": 70968
    },
    {
      "epoch": 13.63,
      "learning_rate": 0.001,
      "loss": 2.6716,
      "step": 70980
    },
    {
      "epoch": 13.63,
      "learning_rate": 0.001,
      "loss": 2.6692,
      "step": 70992
    },
    {
      "epoch": 13.63,
      "learning_rate": 0.001,
      "loss": 2.6733,
      "step": 71004
    },
    {
      "epoch": 13.64,
      "learning_rate": 0.001,
      "loss": 2.6586,
      "step": 71016
    },
    {
      "epoch": 13.64,
      "learning_rate": 0.001,
      "loss": 2.6654,
      "step": 71028
    },
    {
      "epoch": 13.64,
      "learning_rate": 0.001,
      "loss": 2.6688,
      "step": 71040
    },
    {
      "epoch": 13.64,
      "learning_rate": 0.001,
      "loss": 2.6533,
      "step": 71052
    },
    {
      "epoch": 13.65,
      "learning_rate": 0.001,
      "loss": 2.6543,
      "step": 71064
    },
    {
      "epoch": 13.65,
      "learning_rate": 0.001,
      "loss": 2.6695,
      "step": 71076
    },
    {
      "epoch": 13.65,
      "learning_rate": 0.001,
      "loss": 2.672,
      "step": 71088
    },
    {
      "epoch": 13.65,
      "learning_rate": 0.001,
      "loss": 2.6645,
      "step": 71100
    },
    {
      "epoch": 13.65,
      "learning_rate": 0.001,
      "loss": 2.6653,
      "step": 71112
    },
    {
      "epoch": 13.66,
      "learning_rate": 0.001,
      "loss": 2.6723,
      "step": 71124
    },
    {
      "epoch": 13.66,
      "learning_rate": 0.001,
      "loss": 2.6645,
      "step": 71136
    },
    {
      "epoch": 13.66,
      "learning_rate": 0.001,
      "loss": 2.6656,
      "step": 71148
    },
    {
      "epoch": 13.66,
      "learning_rate": 0.001,
      "loss": 2.6629,
      "step": 71160
    },
    {
      "epoch": 13.67,
      "learning_rate": 0.001,
      "loss": 2.6736,
      "step": 71172
    },
    {
      "epoch": 13.67,
      "learning_rate": 0.001,
      "loss": 2.6761,
      "step": 71184
    },
    {
      "epoch": 13.67,
      "learning_rate": 0.001,
      "loss": 2.6577,
      "step": 71196
    },
    {
      "epoch": 13.67,
      "learning_rate": 0.001,
      "loss": 2.6663,
      "step": 71208
    },
    {
      "epoch": 13.68,
      "learning_rate": 0.001,
      "loss": 2.6711,
      "step": 71220
    },
    {
      "epoch": 13.68,
      "learning_rate": 0.001,
      "loss": 2.6666,
      "step": 71232
    },
    {
      "epoch": 13.68,
      "learning_rate": 0.001,
      "loss": 2.6693,
      "step": 71244
    },
    {
      "epoch": 13.68,
      "eval_ag_news_accuracy": 0.31021875,
      "eval_ag_news_bleu_score": 4.667875158309431,
      "eval_ag_news_bleu_score_sem": 0.16041649619612317,
      "eval_ag_news_emb_cos_sim": 0.7917337417602539,
      "eval_ag_news_emb_cos_sim_sem": 0.007676240422148053,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6827380657196045,
      "eval_ag_news_n_ngrams_match_1": 13.356,
      "eval_ag_news_n_ngrams_match_2": 2.856,
      "eval_ag_news_n_ngrams_match_3": 0.838,
      "eval_ag_news_num_pred_words": 46.732,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.75509725540556,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.32941069343876306,
      "eval_ag_news_runtime": 14.7825,
      "eval_ag_news_samples_per_second": 33.824,
      "eval_ag_news_steps_per_second": 0.068,
      "eval_ag_news_token_set_f1": 0.3359234164658063,
      "eval_ag_news_token_set_f1_sem": 0.0044045344092549345,
      "eval_ag_news_token_set_precision": 0.31729274051631484,
      "eval_ag_news_token_set_recall": 0.37187330534113067,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 71250
    },
    {
      "epoch": 13.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.1105625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.952286324413179,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11558982129429374,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6516097784042358,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009899142920864546,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.357330560684204,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.848,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.768,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.652,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.346,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.71244236456615,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20258695510547492,
      "eval_anthropic_toxic_prompts_runtime": 9.569,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.252,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.105,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34489874424172334,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006526369755584896,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4115063487217594,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3264698538390632,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 71250
    },
    {
      "epoch": 13.68,
      "eval_arxiv_accuracy": 0.3369375,
      "eval_arxiv_bleu_score": 4.021699182832508,
      "eval_arxiv_bleu_score_sem": 0.11346164989068308,
      "eval_arxiv_emb_cos_sim": 0.7389757633209229,
      "eval_arxiv_emb_cos_sim_sem": 0.007385155847085595,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5371644496917725,
      "eval_arxiv_n_ngrams_match_1": 14.578,
      "eval_arxiv_n_ngrams_match_2": 2.77,
      "eval_arxiv_n_ngrams_match_3": 0.564,
      "eval_arxiv_num_pred_words": 41.632,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.36932493961764,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3420320790702561,
      "eval_arxiv_runtime": 9.6675,
      "eval_arxiv_samples_per_second": 51.72,
      "eval_arxiv_steps_per_second": 0.103,
      "eval_arxiv_token_set_f1": 0.3372291777879464,
      "eval_arxiv_token_set_f1_sem": 0.004324223442877177,
      "eval_arxiv_token_set_precision": 0.28780419173800104,
      "eval_arxiv_token_set_recall": 0.42224308944809263,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 71250
    },
    {
      "epoch": 13.68,
      "eval_python_code_alpaca_accuracy": 0.15490625,
      "eval_python_code_alpaca_bleu_score": 4.105479368780234,
      "eval_python_code_alpaca_bleu_score_sem": 0.1397634669415215,
      "eval_python_code_alpaca_emb_cos_sim": 0.7371370792388916,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009321790365271016,
      "eval_python_code_alpaca_emb_top1_equal": 0.078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.011915445327759,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.184,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.566,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.778,
      "eval_python_code_alpaca_num_pred_words": 43.342,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.326296570946823,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30698513107405345,
      "eval_python_code_alpaca_runtime": 19.6582,
      "eval_python_code_alpaca_samples_per_second": 25.435,
      "eval_python_code_alpaca_steps_per_second": 0.051,
      "eval_python_code_alpaca_token_set_f1": 0.4524428336990582,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005853690275783923,
      "eval_python_code_alpaca_token_set_precision": 0.4975520347473566,
      "eval_python_code_alpaca_token_set_recall": 0.4353982240933636,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 71250
    },
    {
      "epoch": 13.68,
      "eval_wikibio_accuracy": 0.3101875,
      "eval_wikibio_bleu_score": 5.552077654959087,
      "eval_wikibio_bleu_score_sem": 0.20290481390862938,
      "eval_wikibio_emb_cos_sim": 0.7196710109710693,
      "eval_wikibio_emb_cos_sim_sem": 0.010501207359120126,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8840525150299072,
      "eval_wikibio_n_ngrams_match_1": 9.65,
      "eval_wikibio_n_ngrams_match_2": 3.116,
      "eval_wikibio_n_ngrams_match_3": 1.114,
      "eval_wikibio_num_pred_words": 35.86,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 48.620853100904924,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3353852515763316,
      "eval_wikibio_runtime": 10.0876,
      "eval_wikibio_samples_per_second": 49.566,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.30591112482158145,
      "eval_wikibio_token_set_f1_sem": 0.005615498855016447,
      "eval_wikibio_token_set_precision": 0.3130418628646202,
      "eval_wikibio_token_set_recall": 0.3176680076579202,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 71250
    },
    {
      "epoch": 13.68,
      "eval_nq_accuracy": 0.5136875,
      "eval_nq_bleu_score": 11.090034028053548,
      "eval_nq_bleu_score_sem": 0.4577365293142572,
      "eval_nq_emb_cos_sim": 0.827858567237854,
      "eval_nq_emb_cos_sim_sem": 0.007347543077512162,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.293893575668335,
      "eval_nq_n_ngrams_match_1": 22.402,
      "eval_nq_n_ngrams_match_2": 8.046,
      "eval_nq_n_ngrams_match_3": 3.664,
      "eval_nq_num_pred_words": 49.216,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.913461447185798,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43262336671948687,
      "eval_nq_runtime": 11.1904,
      "eval_nq_samples_per_second": 44.681,
      "eval_nq_steps_per_second": 0.089,
      "eval_nq_token_set_f1": 0.4496634724741236,
      "eval_nq_token_set_f1_sem": 0.004967090223294023,
      "eval_nq_token_set_precision": 0.4048731003537667,
      "eval_nq_token_set_recall": 0.5146779370454185,
      "eval_nq_true_num_tokens": 64.0,
      "step": 71250
    },
    {
      "epoch": 13.68,
      "learning_rate": 0.001,
      "loss": 2.6632,
      "step": 71256
    },
    {
      "epoch": 13.68,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 71268
    },
    {
      "epoch": 13.69,
      "learning_rate": 0.001,
      "loss": 2.6589,
      "step": 71280
    },
    {
      "epoch": 13.69,
      "learning_rate": 0.001,
      "loss": 2.6609,
      "step": 71292
    },
    {
      "epoch": 13.69,
      "learning_rate": 0.001,
      "loss": 2.6629,
      "step": 71304
    },
    {
      "epoch": 13.69,
      "learning_rate": 0.001,
      "loss": 2.6707,
      "step": 71316
    },
    {
      "epoch": 13.7,
      "learning_rate": 0.001,
      "loss": 2.6655,
      "step": 71328
    },
    {
      "epoch": 13.7,
      "learning_rate": 0.001,
      "loss": 2.6645,
      "step": 71340
    },
    {
      "epoch": 13.7,
      "learning_rate": 0.001,
      "loss": 2.6617,
      "step": 71352
    },
    {
      "epoch": 13.7,
      "learning_rate": 0.001,
      "loss": 2.6654,
      "step": 71364
    },
    {
      "epoch": 13.71,
      "learning_rate": 0.001,
      "loss": 2.6634,
      "step": 71376
    },
    {
      "epoch": 13.71,
      "learning_rate": 0.001,
      "loss": 2.6649,
      "step": 71388
    },
    {
      "epoch": 13.71,
      "learning_rate": 0.001,
      "loss": 2.6628,
      "step": 71400
    },
    {
      "epoch": 13.71,
      "learning_rate": 0.001,
      "loss": 2.6589,
      "step": 71412
    },
    {
      "epoch": 13.71,
      "learning_rate": 0.001,
      "loss": 2.6611,
      "step": 71424
    },
    {
      "epoch": 13.72,
      "learning_rate": 0.001,
      "loss": 2.6718,
      "step": 71436
    },
    {
      "epoch": 13.72,
      "learning_rate": 0.001,
      "loss": 2.665,
      "step": 71448
    },
    {
      "epoch": 13.72,
      "learning_rate": 0.001,
      "loss": 2.6679,
      "step": 71460
    },
    {
      "epoch": 13.72,
      "learning_rate": 0.001,
      "loss": 2.6719,
      "step": 71472
    },
    {
      "epoch": 13.73,
      "learning_rate": 0.001,
      "loss": 2.6593,
      "step": 71484
    },
    {
      "epoch": 13.73,
      "learning_rate": 0.001,
      "loss": 2.6693,
      "step": 71496
    },
    {
      "epoch": 13.73,
      "learning_rate": 0.001,
      "loss": 2.6518,
      "step": 71508
    },
    {
      "epoch": 13.73,
      "learning_rate": 0.001,
      "loss": 2.6698,
      "step": 71520
    },
    {
      "epoch": 13.74,
      "learning_rate": 0.001,
      "loss": 2.6589,
      "step": 71532
    },
    {
      "epoch": 13.74,
      "learning_rate": 0.001,
      "loss": 2.6649,
      "step": 71544
    },
    {
      "epoch": 13.74,
      "learning_rate": 0.001,
      "loss": 2.6646,
      "step": 71556
    },
    {
      "epoch": 13.74,
      "learning_rate": 0.001,
      "loss": 2.665,
      "step": 71568
    },
    {
      "epoch": 13.74,
      "learning_rate": 0.001,
      "loss": 2.6629,
      "step": 71580
    },
    {
      "epoch": 13.75,
      "learning_rate": 0.001,
      "loss": 2.6687,
      "step": 71592
    },
    {
      "epoch": 13.75,
      "learning_rate": 0.001,
      "loss": 2.6657,
      "step": 71604
    },
    {
      "epoch": 13.75,
      "learning_rate": 0.001,
      "loss": 2.6561,
      "step": 71616
    },
    {
      "epoch": 13.75,
      "learning_rate": 0.001,
      "loss": 2.661,
      "step": 71628
    },
    {
      "epoch": 13.76,
      "learning_rate": 0.001,
      "loss": 2.6707,
      "step": 71640
    },
    {
      "epoch": 13.76,
      "learning_rate": 0.001,
      "loss": 2.661,
      "step": 71652
    },
    {
      "epoch": 13.76,
      "learning_rate": 0.001,
      "loss": 2.6625,
      "step": 71664
    },
    {
      "epoch": 13.76,
      "learning_rate": 0.001,
      "loss": 2.6744,
      "step": 71676
    },
    {
      "epoch": 13.76,
      "learning_rate": 0.001,
      "loss": 2.6708,
      "step": 71688
    },
    {
      "epoch": 13.77,
      "learning_rate": 0.001,
      "loss": 2.658,
      "step": 71700
    },
    {
      "epoch": 13.77,
      "learning_rate": 0.001,
      "loss": 2.6655,
      "step": 71712
    },
    {
      "epoch": 13.77,
      "learning_rate": 0.001,
      "loss": 2.6591,
      "step": 71724
    },
    {
      "epoch": 13.77,
      "learning_rate": 0.001,
      "loss": 2.6566,
      "step": 71736
    },
    {
      "epoch": 13.78,
      "learning_rate": 0.001,
      "loss": 2.6615,
      "step": 71748
    },
    {
      "epoch": 13.78,
      "learning_rate": 0.001,
      "loss": 2.6641,
      "step": 71760
    },
    {
      "epoch": 13.78,
      "learning_rate": 0.001,
      "loss": 2.6701,
      "step": 71772
    },
    {
      "epoch": 13.78,
      "learning_rate": 0.001,
      "loss": 2.6658,
      "step": 71784
    },
    {
      "epoch": 13.79,
      "learning_rate": 0.001,
      "loss": 2.6607,
      "step": 71796
    },
    {
      "epoch": 13.79,
      "learning_rate": 0.001,
      "loss": 2.6566,
      "step": 71808
    },
    {
      "epoch": 13.79,
      "learning_rate": 0.001,
      "loss": 2.6662,
      "step": 71820
    },
    {
      "epoch": 13.79,
      "learning_rate": 0.001,
      "loss": 2.6689,
      "step": 71832
    },
    {
      "epoch": 13.79,
      "learning_rate": 0.001,
      "loss": 2.6707,
      "step": 71844
    },
    {
      "epoch": 13.8,
      "learning_rate": 0.001,
      "loss": 2.6592,
      "step": 71856
    },
    {
      "epoch": 13.8,
      "learning_rate": 0.001,
      "loss": 2.6658,
      "step": 71868
    },
    {
      "epoch": 13.8,
      "eval_ag_news_accuracy": 0.31121875,
      "eval_ag_news_bleu_score": 4.560318739593933,
      "eval_ag_news_bleu_score_sem": 0.1493677755464744,
      "eval_ag_news_emb_cos_sim": 0.7990758419036865,
      "eval_ag_news_emb_cos_sim_sem": 0.00687573610875753,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6722798347473145,
      "eval_ag_news_n_ngrams_match_1": 13.344,
      "eval_ag_news_n_ngrams_match_2": 2.886,
      "eval_ag_news_n_ngrams_match_3": 0.828,
      "eval_ag_news_num_pred_words": 46.784,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.34149580551345,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3299312068651786,
      "eval_ag_news_runtime": 15.38,
      "eval_ag_news_samples_per_second": 32.51,
      "eval_ag_news_steps_per_second": 0.065,
      "eval_ag_news_token_set_f1": 0.3333838736347365,
      "eval_ag_news_token_set_f1_sem": 0.004457908956553333,
      "eval_ag_news_token_set_precision": 0.3173950521645402,
      "eval_ag_news_token_set_recall": 0.3653645208821789,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 71875
    },
    {
      "epoch": 13.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.11028125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.904053292432784,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11569520381222666,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6541334390640259,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009347251671583151,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3467636108398438,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.962,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.742,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.618,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.158,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.410636816541246,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20639918789805434,
      "eval_anthropic_toxic_prompts_runtime": 10.2305,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.873,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34726239836305783,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006441835381410861,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4216924323209074,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3250254926472682,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 71875
    },
    {
      "epoch": 13.8,
      "eval_arxiv_accuracy": 0.33675,
      "eval_arxiv_bleu_score": 3.9815023022627165,
      "eval_arxiv_bleu_score_sem": 0.10948450822139191,
      "eval_arxiv_emb_cos_sim": 0.7407854795455933,
      "eval_arxiv_emb_cos_sim_sem": 0.008157875509139817,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5466015338897705,
      "eval_arxiv_n_ngrams_match_1": 14.204,
      "eval_arxiv_n_ngrams_match_2": 2.732,
      "eval_arxiv_n_ngrams_match_3": 0.566,
      "eval_arxiv_num_pred_words": 39.626,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.69520641984392,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34001852498102725,
      "eval_arxiv_runtime": 9.8907,
      "eval_arxiv_samples_per_second": 50.552,
      "eval_arxiv_steps_per_second": 0.101,
      "eval_arxiv_token_set_f1": 0.33070976303227806,
      "eval_arxiv_token_set_f1_sem": 0.004240608477417651,
      "eval_arxiv_token_set_precision": 0.28113501368089044,
      "eval_arxiv_token_set_recall": 0.4223324535662771,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 71875
    },
    {
      "epoch": 13.8,
      "eval_python_code_alpaca_accuracy": 0.15390625,
      "eval_python_code_alpaca_bleu_score": 4.2379437579218004,
      "eval_python_code_alpaca_bleu_score_sem": 0.14047488021794002,
      "eval_python_code_alpaca_emb_cos_sim": 0.7371129393577576,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009346693225840383,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0274159908294678,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.39,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.604,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.828,
      "eval_python_code_alpaca_num_pred_words": 43.392,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.643819789875995,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3134217692089786,
      "eval_python_code_alpaca_runtime": 14.3324,
      "eval_python_code_alpaca_samples_per_second": 34.886,
      "eval_python_code_alpaca_steps_per_second": 0.07,
      "eval_python_code_alpaca_token_set_f1": 0.4600845392528296,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00555175677990806,
      "eval_python_code_alpaca_token_set_precision": 0.5115891628810105,
      "eval_python_code_alpaca_token_set_recall": 0.4406083843291174,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 71875
    },
    {
      "epoch": 13.8,
      "eval_wikibio_accuracy": 0.31090625,
      "eval_wikibio_bleu_score": 5.59419985816136,
      "eval_wikibio_bleu_score_sem": 0.21470473991171027,
      "eval_wikibio_emb_cos_sim": 0.7257965803146362,
      "eval_wikibio_emb_cos_sim_sem": 0.009295922078452947,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.932488441467285,
      "eval_wikibio_n_ngrams_match_1": 10.064,
      "eval_wikibio_n_ngrams_match_2": 3.238,
      "eval_wikibio_n_ngrams_match_3": 1.134,
      "eval_wikibio_num_pred_words": 36.936,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 51.033814450909844,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.344047218691074,
      "eval_wikibio_runtime": 10.9745,
      "eval_wikibio_samples_per_second": 45.56,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.3124186337809242,
      "eval_wikibio_token_set_f1_sem": 0.005443322417982122,
      "eval_wikibio_token_set_precision": 0.3231864854188542,
      "eval_wikibio_token_set_recall": 0.3182054275784626,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 71875
    },
    {
      "epoch": 13.8,
      "eval_nq_accuracy": 0.5125,
      "eval_nq_bleu_score": 10.758493401659862,
      "eval_nq_bleu_score_sem": 0.45410807397169894,
      "eval_nq_emb_cos_sim": 0.8201106786727905,
      "eval_nq_emb_cos_sim_sem": 0.00801139611277373,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.293391227722168,
      "eval_nq_n_ngrams_match_1": 22.434,
      "eval_nq_n_ngrams_match_2": 7.92,
      "eval_nq_n_ngrams_match_3": 3.536,
      "eval_nq_num_pred_words": 49.216,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.908482690827114,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43085284279987485,
      "eval_nq_runtime": 10.6388,
      "eval_nq_samples_per_second": 46.998,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.4481553596754238,
      "eval_nq_token_set_f1_sem": 0.004928203830851493,
      "eval_nq_token_set_precision": 0.40346722419867526,
      "eval_nq_token_set_recall": 0.5136582525628065,
      "eval_nq_true_num_tokens": 64.0,
      "step": 71875
    },
    {
      "epoch": 13.8,
      "learning_rate": 0.001,
      "loss": 2.6571,
      "step": 71880
    },
    {
      "epoch": 13.8,
      "learning_rate": 0.001,
      "loss": 2.6649,
      "step": 71892
    },
    {
      "epoch": 13.81,
      "learning_rate": 0.001,
      "loss": 2.6646,
      "step": 71904
    },
    {
      "epoch": 13.81,
      "learning_rate": 0.001,
      "loss": 2.661,
      "step": 71916
    },
    {
      "epoch": 13.81,
      "learning_rate": 0.001,
      "loss": 2.6683,
      "step": 71928
    },
    {
      "epoch": 13.81,
      "learning_rate": 0.001,
      "loss": 2.6695,
      "step": 71940
    },
    {
      "epoch": 13.82,
      "learning_rate": 0.001,
      "loss": 2.6591,
      "step": 71952
    },
    {
      "epoch": 13.82,
      "learning_rate": 0.001,
      "loss": 2.6668,
      "step": 71964
    },
    {
      "epoch": 13.82,
      "learning_rate": 0.001,
      "loss": 2.6592,
      "step": 71976
    },
    {
      "epoch": 13.82,
      "learning_rate": 0.001,
      "loss": 2.6617,
      "step": 71988
    },
    {
      "epoch": 13.82,
      "learning_rate": 0.001,
      "loss": 2.6662,
      "step": 72000
    },
    {
      "epoch": 13.83,
      "learning_rate": 0.001,
      "loss": 2.6636,
      "step": 72012
    },
    {
      "epoch": 13.83,
      "learning_rate": 0.001,
      "loss": 2.6616,
      "step": 72024
    },
    {
      "epoch": 13.83,
      "learning_rate": 0.001,
      "loss": 2.6698,
      "step": 72036
    },
    {
      "epoch": 13.83,
      "learning_rate": 0.001,
      "loss": 2.6635,
      "step": 72048
    },
    {
      "epoch": 13.84,
      "learning_rate": 0.001,
      "loss": 2.6637,
      "step": 72060
    },
    {
      "epoch": 13.84,
      "learning_rate": 0.001,
      "loss": 2.6611,
      "step": 72072
    },
    {
      "epoch": 13.84,
      "learning_rate": 0.001,
      "loss": 2.6575,
      "step": 72084
    },
    {
      "epoch": 13.84,
      "learning_rate": 0.001,
      "loss": 2.668,
      "step": 72096
    },
    {
      "epoch": 13.85,
      "learning_rate": 0.001,
      "loss": 2.6594,
      "step": 72108
    },
    {
      "epoch": 13.85,
      "learning_rate": 0.001,
      "loss": 2.6617,
      "step": 72120
    },
    {
      "epoch": 13.85,
      "learning_rate": 0.001,
      "loss": 2.6596,
      "step": 72132
    },
    {
      "epoch": 13.85,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 72144
    },
    {
      "epoch": 13.85,
      "learning_rate": 0.001,
      "loss": 2.6572,
      "step": 72156
    },
    {
      "epoch": 13.86,
      "learning_rate": 0.001,
      "loss": 2.6648,
      "step": 72168
    },
    {
      "epoch": 13.86,
      "learning_rate": 0.001,
      "loss": 2.6662,
      "step": 72180
    },
    {
      "epoch": 13.86,
      "learning_rate": 0.001,
      "loss": 2.6582,
      "step": 72192
    },
    {
      "epoch": 13.86,
      "learning_rate": 0.001,
      "loss": 2.6643,
      "step": 72204
    },
    {
      "epoch": 13.87,
      "learning_rate": 0.001,
      "loss": 2.6635,
      "step": 72216
    },
    {
      "epoch": 13.87,
      "learning_rate": 0.001,
      "loss": 2.6714,
      "step": 72228
    },
    {
      "epoch": 13.87,
      "learning_rate": 0.001,
      "loss": 2.6551,
      "step": 72240
    },
    {
      "epoch": 13.87,
      "learning_rate": 0.001,
      "loss": 2.6657,
      "step": 72252
    },
    {
      "epoch": 13.88,
      "learning_rate": 0.001,
      "loss": 2.6672,
      "step": 72264
    },
    {
      "epoch": 13.88,
      "learning_rate": 0.001,
      "loss": 2.6635,
      "step": 72276
    },
    {
      "epoch": 13.88,
      "learning_rate": 0.001,
      "loss": 2.6541,
      "step": 72288
    },
    {
      "epoch": 13.88,
      "learning_rate": 0.001,
      "loss": 2.6669,
      "step": 72300
    },
    {
      "epoch": 13.88,
      "learning_rate": 0.001,
      "loss": 2.6763,
      "step": 72312
    },
    {
      "epoch": 13.89,
      "learning_rate": 0.001,
      "loss": 2.6697,
      "step": 72324
    },
    {
      "epoch": 13.89,
      "learning_rate": 0.001,
      "loss": 2.6648,
      "step": 72336
    },
    {
      "epoch": 13.89,
      "learning_rate": 0.001,
      "loss": 2.6653,
      "step": 72348
    },
    {
      "epoch": 13.89,
      "learning_rate": 0.001,
      "loss": 2.6721,
      "step": 72360
    },
    {
      "epoch": 13.9,
      "learning_rate": 0.001,
      "loss": 2.6648,
      "step": 72372
    },
    {
      "epoch": 13.9,
      "learning_rate": 0.001,
      "loss": 2.6573,
      "step": 72384
    },
    {
      "epoch": 13.9,
      "learning_rate": 0.001,
      "loss": 2.6602,
      "step": 72396
    },
    {
      "epoch": 13.9,
      "learning_rate": 0.001,
      "loss": 2.6713,
      "step": 72408
    },
    {
      "epoch": 13.91,
      "learning_rate": 0.001,
      "loss": 2.6683,
      "step": 72420
    },
    {
      "epoch": 13.91,
      "learning_rate": 0.001,
      "loss": 2.6581,
      "step": 72432
    },
    {
      "epoch": 13.91,
      "learning_rate": 0.001,
      "loss": 2.6629,
      "step": 72444
    },
    {
      "epoch": 13.91,
      "learning_rate": 0.001,
      "loss": 2.6677,
      "step": 72456
    },
    {
      "epoch": 13.91,
      "learning_rate": 0.001,
      "loss": 2.665,
      "step": 72468
    },
    {
      "epoch": 13.92,
      "learning_rate": 0.001,
      "loss": 2.6552,
      "step": 72480
    },
    {
      "epoch": 13.92,
      "learning_rate": 0.001,
      "loss": 2.6689,
      "step": 72492
    },
    {
      "epoch": 13.92,
      "eval_ag_news_accuracy": 0.31259375,
      "eval_ag_news_bleu_score": 4.667010394604709,
      "eval_ag_news_bleu_score_sem": 0.15220312413165804,
      "eval_ag_news_emb_cos_sim": 0.7891812324523926,
      "eval_ag_news_emb_cos_sim_sem": 0.008268829063478271,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6626858711242676,
      "eval_ag_news_n_ngrams_match_1": 13.458,
      "eval_ag_news_n_ngrams_match_2": 2.954,
      "eval_ag_news_n_ngrams_match_3": 0.844,
      "eval_ag_news_num_pred_words": 46.432,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.96585972657138,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3306616044189356,
      "eval_ag_news_runtime": 9.7923,
      "eval_ag_news_samples_per_second": 51.06,
      "eval_ag_news_steps_per_second": 0.102,
      "eval_ag_news_token_set_f1": 0.3407866619983471,
      "eval_ag_news_token_set_f1_sem": 0.00461370432169028,
      "eval_ag_news_token_set_precision": 0.31932943071810443,
      "eval_ag_news_token_set_recall": 0.38152919819743547,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 72500
    },
    {
      "epoch": 13.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.11,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8838790061942934,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11380088685718076,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6470285058021545,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01016822947518235,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.360210418701172,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.838,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.712,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.604,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.356,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.79524930077552,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20008081828547014,
      "eval_anthropic_toxic_prompts_runtime": 10.0899,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.555,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3515441764011612,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006638689362519888,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41299420285061667,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33879309911752076,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 72500
    },
    {
      "epoch": 13.92,
      "eval_arxiv_accuracy": 0.33575,
      "eval_arxiv_bleu_score": 4.099351527217666,
      "eval_arxiv_bleu_score_sem": 0.12123417381083582,
      "eval_arxiv_emb_cos_sim": 0.7331365346908569,
      "eval_arxiv_emb_cos_sim_sem": 0.009152979724639234,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5255320072174072,
      "eval_arxiv_n_ngrams_match_1": 14.052,
      "eval_arxiv_n_ngrams_match_2": 2.842,
      "eval_arxiv_n_ngrams_match_3": 0.64,
      "eval_arxiv_num_pred_words": 39.06,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.971842074718595,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3363836710550086,
      "eval_arxiv_runtime": 10.0541,
      "eval_arxiv_samples_per_second": 49.731,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3356608243933526,
      "eval_arxiv_token_set_f1_sem": 0.004551175312760204,
      "eval_arxiv_token_set_precision": 0.2799417438080741,
      "eval_arxiv_token_set_recall": 0.4449900152168845,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 72500
    },
    {
      "epoch": 13.92,
      "eval_python_code_alpaca_accuracy": 0.15359375,
      "eval_python_code_alpaca_bleu_score": 3.921870232405925,
      "eval_python_code_alpaca_bleu_score_sem": 0.11048887972491647,
      "eval_python_code_alpaca_emb_cos_sim": 0.732799768447876,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008781415083800715,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0323173999786377,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.198,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.572,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.734,
      "eval_python_code_alpaca_num_pred_words": 44.008,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.745251974320777,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3035625584639082,
      "eval_python_code_alpaca_runtime": 9.4867,
      "eval_python_code_alpaca_samples_per_second": 52.706,
      "eval_python_code_alpaca_steps_per_second": 0.105,
      "eval_python_code_alpaca_token_set_f1": 0.4625909206655764,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00577159057307132,
      "eval_python_code_alpaca_token_set_precision": 0.4957626226513615,
      "eval_python_code_alpaca_token_set_recall": 0.45858372168510947,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 72500
    },
    {
      "epoch": 13.92,
      "eval_wikibio_accuracy": 0.3130625,
      "eval_wikibio_bleu_score": 5.338480536302942,
      "eval_wikibio_bleu_score_sem": 0.19376407946131832,
      "eval_wikibio_emb_cos_sim": 0.7155885696411133,
      "eval_wikibio_emb_cos_sim_sem": 0.012235243293602696,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8659629821777344,
      "eval_wikibio_n_ngrams_match_1": 9.434,
      "eval_wikibio_n_ngrams_match_2": 2.992,
      "eval_wikibio_n_ngrams_match_3": 1.048,
      "eval_wikibio_num_pred_words": 34.994,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.7492319594119,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32572344329734937,
      "eval_wikibio_runtime": 9.384,
      "eval_wikibio_samples_per_second": 53.282,
      "eval_wikibio_steps_per_second": 0.107,
      "eval_wikibio_token_set_f1": 0.29990304429961057,
      "eval_wikibio_token_set_f1_sem": 0.0057470633308822966,
      "eval_wikibio_token_set_precision": 0.30419409834289696,
      "eval_wikibio_token_set_recall": 0.31386573507298776,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 72500
    },
    {
      "epoch": 13.92,
      "eval_nq_accuracy": 0.51159375,
      "eval_nq_bleu_score": 10.927177283354741,
      "eval_nq_bleu_score_sem": 0.46982529307470106,
      "eval_nq_emb_cos_sim": 0.8228777647018433,
      "eval_nq_emb_cos_sim_sem": 0.007915942061975158,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.292212963104248,
      "eval_nq_n_ngrams_match_1": 22.286,
      "eval_nq_n_ngrams_match_2": 7.988,
      "eval_nq_n_ngrams_match_3": 3.628,
      "eval_nq_num_pred_words": 48.67,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.89681475156513,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4276151414616264,
      "eval_nq_runtime": 9.9899,
      "eval_nq_samples_per_second": 50.051,
      "eval_nq_steps_per_second": 0.1,
      "eval_nq_token_set_f1": 0.4488316320842184,
      "eval_nq_token_set_f1_sem": 0.005098180030044119,
      "eval_nq_token_set_precision": 0.4011092669667956,
      "eval_nq_token_set_recall": 0.5215948169892849,
      "eval_nq_true_num_tokens": 64.0,
      "step": 72500
    },
    {
      "epoch": 13.92,
      "learning_rate": 0.001,
      "loss": 2.6696,
      "step": 72504
    },
    {
      "epoch": 13.92,
      "learning_rate": 0.001,
      "loss": 2.6658,
      "step": 72516
    },
    {
      "epoch": 13.93,
      "learning_rate": 0.001,
      "loss": 2.662,
      "step": 72528
    },
    {
      "epoch": 13.93,
      "learning_rate": 0.001,
      "loss": 2.6652,
      "step": 72540
    },
    {
      "epoch": 13.93,
      "learning_rate": 0.001,
      "loss": 2.6648,
      "step": 72552
    },
    {
      "epoch": 13.93,
      "learning_rate": 0.001,
      "loss": 2.6589,
      "step": 72564
    },
    {
      "epoch": 13.94,
      "learning_rate": 0.001,
      "loss": 2.6652,
      "step": 72576
    },
    {
      "epoch": 13.94,
      "learning_rate": 0.001,
      "loss": 2.6624,
      "step": 72588
    },
    {
      "epoch": 13.94,
      "learning_rate": 0.001,
      "loss": 2.6622,
      "step": 72600
    },
    {
      "epoch": 13.94,
      "learning_rate": 0.001,
      "loss": 2.6658,
      "step": 72612
    },
    {
      "epoch": 13.94,
      "learning_rate": 0.001,
      "loss": 2.6706,
      "step": 72624
    },
    {
      "epoch": 13.95,
      "learning_rate": 0.001,
      "loss": 2.6565,
      "step": 72636
    },
    {
      "epoch": 13.95,
      "learning_rate": 0.001,
      "loss": 2.6598,
      "step": 72648
    },
    {
      "epoch": 13.95,
      "learning_rate": 0.001,
      "loss": 2.6585,
      "step": 72660
    },
    {
      "epoch": 13.95,
      "learning_rate": 0.001,
      "loss": 2.66,
      "step": 72672
    },
    {
      "epoch": 13.96,
      "learning_rate": 0.001,
      "loss": 2.6645,
      "step": 72684
    },
    {
      "epoch": 13.96,
      "learning_rate": 0.001,
      "loss": 2.6662,
      "step": 72696
    },
    {
      "epoch": 13.96,
      "learning_rate": 0.001,
      "loss": 2.6528,
      "step": 72708
    },
    {
      "epoch": 13.96,
      "learning_rate": 0.001,
      "loss": 2.6655,
      "step": 72720
    },
    {
      "epoch": 13.97,
      "learning_rate": 0.001,
      "loss": 2.6643,
      "step": 72732
    },
    {
      "epoch": 13.97,
      "learning_rate": 0.001,
      "loss": 2.6659,
      "step": 72744
    },
    {
      "epoch": 13.97,
      "learning_rate": 0.001,
      "loss": 2.6681,
      "step": 72756
    },
    {
      "epoch": 13.97,
      "learning_rate": 0.001,
      "loss": 2.6662,
      "step": 72768
    },
    {
      "epoch": 13.97,
      "learning_rate": 0.001,
      "loss": 2.6681,
      "step": 72780
    },
    {
      "epoch": 13.98,
      "learning_rate": 0.001,
      "loss": 2.6612,
      "step": 72792
    },
    {
      "epoch": 13.98,
      "learning_rate": 0.001,
      "loss": 2.6695,
      "step": 72804
    },
    {
      "epoch": 13.98,
      "learning_rate": 0.001,
      "loss": 2.6705,
      "step": 72816
    },
    {
      "epoch": 13.98,
      "learning_rate": 0.001,
      "loss": 2.6704,
      "step": 72828
    },
    {
      "epoch": 13.99,
      "learning_rate": 0.001,
      "loss": 2.6697,
      "step": 72840
    },
    {
      "epoch": 13.99,
      "learning_rate": 0.001,
      "loss": 2.6692,
      "step": 72852
    },
    {
      "epoch": 13.99,
      "learning_rate": 0.001,
      "loss": 2.6619,
      "step": 72864
    },
    {
      "epoch": 13.99,
      "learning_rate": 0.001,
      "loss": 2.667,
      "step": 72876
    },
    {
      "epoch": 14.0,
      "learning_rate": 0.001,
      "loss": 2.6617,
      "step": 72888
    },
    {
      "epoch": 14.0,
      "learning_rate": 0.001,
      "loss": 2.6606,
      "step": 72900
    },
    {
      "epoch": 14.0,
      "learning_rate": 0.001,
      "loss": 2.6555,
      "step": 72912
    },
    {
      "epoch": 14.0,
      "learning_rate": 0.001,
      "loss": 2.6343,
      "step": 72924
    },
    {
      "epoch": 14.0,
      "learning_rate": 0.001,
      "loss": 2.6561,
      "step": 72936
    },
    {
      "epoch": 14.01,
      "learning_rate": 0.001,
      "loss": 2.6485,
      "step": 72948
    },
    {
      "epoch": 14.01,
      "learning_rate": 0.001,
      "loss": 2.6378,
      "step": 72960
    },
    {
      "epoch": 14.01,
      "learning_rate": 0.001,
      "loss": 2.6379,
      "step": 72972
    },
    {
      "epoch": 14.01,
      "learning_rate": 0.001,
      "loss": 2.6512,
      "step": 72984
    },
    {
      "epoch": 14.02,
      "learning_rate": 0.001,
      "loss": 2.6459,
      "step": 72996
    },
    {
      "epoch": 14.02,
      "learning_rate": 0.001,
      "loss": 2.6531,
      "step": 73008
    },
    {
      "epoch": 14.02,
      "learning_rate": 0.001,
      "loss": 2.648,
      "step": 73020
    },
    {
      "epoch": 14.02,
      "learning_rate": 0.001,
      "loss": 2.6472,
      "step": 73032
    },
    {
      "epoch": 14.03,
      "learning_rate": 0.001,
      "loss": 2.6505,
      "step": 73044
    },
    {
      "epoch": 14.03,
      "learning_rate": 0.001,
      "loss": 2.6435,
      "step": 73056
    },
    {
      "epoch": 14.03,
      "learning_rate": 0.001,
      "loss": 2.6491,
      "step": 73068
    },
    {
      "epoch": 14.03,
      "learning_rate": 0.001,
      "loss": 2.6401,
      "step": 73080
    },
    {
      "epoch": 14.03,
      "learning_rate": 0.001,
      "loss": 2.6467,
      "step": 73092
    },
    {
      "epoch": 14.04,
      "learning_rate": 0.001,
      "loss": 2.6447,
      "step": 73104
    },
    {
      "epoch": 14.04,
      "learning_rate": 0.001,
      "loss": 2.6413,
      "step": 73116
    },
    {
      "epoch": 14.04,
      "eval_ag_news_accuracy": 0.3124375,
      "eval_ag_news_bleu_score": 4.599793773144123,
      "eval_ag_news_bleu_score_sem": 0.15307301718055874,
      "eval_ag_news_emb_cos_sim": 0.7911202907562256,
      "eval_ag_news_emb_cos_sim_sem": 0.007782889071037822,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6850435733795166,
      "eval_ag_news_n_ngrams_match_1": 13.49,
      "eval_ag_news_n_ngrams_match_2": 2.848,
      "eval_ag_news_n_ngrams_match_3": 0.844,
      "eval_ag_news_num_pred_words": 46.564,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.8468586743301,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3319840505372907,
      "eval_ag_news_runtime": 9.844,
      "eval_ag_news_samples_per_second": 50.793,
      "eval_ag_news_steps_per_second": 0.102,
      "eval_ag_news_token_set_f1": 0.33785119592784113,
      "eval_ag_news_token_set_f1_sem": 0.004544684325292295,
      "eval_ag_news_token_set_precision": 0.3181789723740158,
      "eval_ag_news_token_set_recall": 0.3769574205503869,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 73125
    },
    {
      "epoch": 14.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.109875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.895404776187029,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10993063822861639,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6511983871459961,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009415453833552382,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3460946083068848,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.896,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.734,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.622,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.082,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.3916363849251,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20427428036189293,
      "eval_anthropic_toxic_prompts_runtime": 9.2877,
      "eval_anthropic_toxic_prompts_samples_per_second": 53.835,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.108,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3517100808410542,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065988359986545045,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4195940844696324,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33207423984635404,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 73125
    },
    {
      "epoch": 14.04,
      "eval_arxiv_accuracy": 0.33490625,
      "eval_arxiv_bleu_score": 4.042857169101391,
      "eval_arxiv_bleu_score_sem": 0.11644170214522537,
      "eval_arxiv_emb_cos_sim": 0.7429251074790955,
      "eval_arxiv_emb_cos_sim_sem": 0.008031861041903801,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5403828620910645,
      "eval_arxiv_n_ngrams_match_1": 14.486,
      "eval_arxiv_n_ngrams_match_2": 2.698,
      "eval_arxiv_n_ngrams_match_3": 0.562,
      "eval_arxiv_num_pred_words": 40.288,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.480117794072534,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34497499666583126,
      "eval_arxiv_runtime": 9.8455,
      "eval_arxiv_samples_per_second": 50.785,
      "eval_arxiv_steps_per_second": 0.102,
      "eval_arxiv_token_set_f1": 0.3378610630935953,
      "eval_arxiv_token_set_f1_sem": 0.004186340128903121,
      "eval_arxiv_token_set_precision": 0.28564761725596466,
      "eval_arxiv_token_set_recall": 0.4320979633186641,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 73125
    },
    {
      "epoch": 14.04,
      "eval_python_code_alpaca_accuracy": 0.154875,
      "eval_python_code_alpaca_bleu_score": 4.1278632661531445,
      "eval_python_code_alpaca_bleu_score_sem": 0.1331676602417114,
      "eval_python_code_alpaca_emb_cos_sim": 0.7308018803596497,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008006857424024712,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0201494693756104,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.274,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.6,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.832,
      "eval_python_code_alpaca_num_pred_words": 45.222,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.494354733676772,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3044311901750288,
      "eval_python_code_alpaca_runtime": 9.3689,
      "eval_python_code_alpaca_samples_per_second": 53.368,
      "eval_python_code_alpaca_steps_per_second": 0.107,
      "eval_python_code_alpaca_token_set_f1": 0.46478355465055876,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005810942233106832,
      "eval_python_code_alpaca_token_set_precision": 0.5065965148749533,
      "eval_python_code_alpaca_token_set_recall": 0.45013731659628475,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 73125
    },
    {
      "epoch": 14.04,
      "eval_wikibio_accuracy": 0.316,
      "eval_wikibio_bleu_score": 5.489130036910917,
      "eval_wikibio_bleu_score_sem": 0.20050337634348733,
      "eval_wikibio_emb_cos_sim": 0.728935718536377,
      "eval_wikibio_emb_cos_sim_sem": 0.010047894954327683,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8851306438446045,
      "eval_wikibio_n_ngrams_match_1": 9.47,
      "eval_wikibio_n_ngrams_match_2": 3.056,
      "eval_wikibio_n_ngrams_match_3": 1.082,
      "eval_wikibio_num_pred_words": 34.9,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 48.67330091129572,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33099278113828823,
      "eval_wikibio_runtime": 9.8961,
      "eval_wikibio_samples_per_second": 50.525,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.30370348038269523,
      "eval_wikibio_token_set_f1_sem": 0.005659399365345912,
      "eval_wikibio_token_set_precision": 0.30651272926812007,
      "eval_wikibio_token_set_recall": 0.320457283020339,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 73125
    },
    {
      "epoch": 14.04,
      "eval_nq_accuracy": 0.514875,
      "eval_nq_bleu_score": 11.251045201256828,
      "eval_nq_bleu_score_sem": 0.46399444516687893,
      "eval_nq_emb_cos_sim": 0.8228020668029785,
      "eval_nq_emb_cos_sim_sem": 0.007771887426487462,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2896971702575684,
      "eval_nq_n_ngrams_match_1": 22.628,
      "eval_nq_n_ngrams_match_2": 8.254,
      "eval_nq_n_ngrams_match_3": 3.748,
      "eval_nq_num_pred_words": 49.3,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.871947709087753,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43529691620612854,
      "eval_nq_runtime": 10.1349,
      "eval_nq_samples_per_second": 49.335,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.45410653552424035,
      "eval_nq_token_set_f1_sem": 0.004955940786184263,
      "eval_nq_token_set_precision": 0.4089740665226169,
      "eval_nq_token_set_recall": 0.5213966947850811,
      "eval_nq_true_num_tokens": 64.0,
      "step": 73125
    },
    {
      "epoch": 14.04,
      "learning_rate": 0.001,
      "loss": 2.6454,
      "step": 73128
    },
    {
      "epoch": 14.04,
      "learning_rate": 0.001,
      "loss": 2.644,
      "step": 73140
    },
    {
      "epoch": 14.05,
      "learning_rate": 0.001,
      "loss": 2.6349,
      "step": 73152
    },
    {
      "epoch": 14.05,
      "learning_rate": 0.001,
      "loss": 2.6381,
      "step": 73164
    },
    {
      "epoch": 14.05,
      "learning_rate": 0.001,
      "loss": 2.6373,
      "step": 73176
    },
    {
      "epoch": 14.05,
      "learning_rate": 0.001,
      "loss": 2.6482,
      "step": 73188
    },
    {
      "epoch": 14.06,
      "learning_rate": 0.001,
      "loss": 2.6491,
      "step": 73200
    },
    {
      "epoch": 14.06,
      "learning_rate": 0.001,
      "loss": 2.6405,
      "step": 73212
    },
    {
      "epoch": 14.06,
      "learning_rate": 0.001,
      "loss": 2.6478,
      "step": 73224
    },
    {
      "epoch": 14.06,
      "learning_rate": 0.001,
      "loss": 2.6319,
      "step": 73236
    },
    {
      "epoch": 14.06,
      "learning_rate": 0.001,
      "loss": 2.6365,
      "step": 73248
    },
    {
      "epoch": 14.07,
      "learning_rate": 0.001,
      "loss": 2.6523,
      "step": 73260
    },
    {
      "epoch": 14.07,
      "learning_rate": 0.001,
      "loss": 2.6623,
      "step": 73272
    },
    {
      "epoch": 14.07,
      "learning_rate": 0.001,
      "loss": 2.6553,
      "step": 73284
    },
    {
      "epoch": 14.07,
      "learning_rate": 0.001,
      "loss": 2.6463,
      "step": 73296
    },
    {
      "epoch": 14.08,
      "learning_rate": 0.001,
      "loss": 2.6472,
      "step": 73308
    },
    {
      "epoch": 14.08,
      "learning_rate": 0.001,
      "loss": 2.6399,
      "step": 73320
    },
    {
      "epoch": 14.08,
      "learning_rate": 0.001,
      "loss": 2.6453,
      "step": 73332
    },
    {
      "epoch": 14.08,
      "learning_rate": 0.001,
      "loss": 2.6448,
      "step": 73344
    },
    {
      "epoch": 14.09,
      "learning_rate": 0.001,
      "loss": 2.6409,
      "step": 73356
    },
    {
      "epoch": 14.09,
      "learning_rate": 0.001,
      "loss": 2.6531,
      "step": 73368
    },
    {
      "epoch": 14.09,
      "learning_rate": 0.001,
      "loss": 2.6466,
      "step": 73380
    },
    {
      "epoch": 14.09,
      "learning_rate": 0.001,
      "loss": 2.644,
      "step": 73392
    },
    {
      "epoch": 14.09,
      "learning_rate": 0.001,
      "loss": 2.6444,
      "step": 73404
    },
    {
      "epoch": 14.1,
      "learning_rate": 0.001,
      "loss": 2.6515,
      "step": 73416
    },
    {
      "epoch": 14.1,
      "learning_rate": 0.001,
      "loss": 2.6436,
      "step": 73428
    },
    {
      "epoch": 14.1,
      "learning_rate": 0.001,
      "loss": 2.6534,
      "step": 73440
    },
    {
      "epoch": 14.1,
      "learning_rate": 0.001,
      "loss": 2.6497,
      "step": 73452
    },
    {
      "epoch": 14.11,
      "learning_rate": 0.001,
      "loss": 2.6486,
      "step": 73464
    },
    {
      "epoch": 14.11,
      "learning_rate": 0.001,
      "loss": 2.6612,
      "step": 73476
    },
    {
      "epoch": 14.11,
      "learning_rate": 0.001,
      "loss": 2.6561,
      "step": 73488
    },
    {
      "epoch": 14.11,
      "learning_rate": 0.001,
      "loss": 2.6508,
      "step": 73500
    },
    {
      "epoch": 14.12,
      "learning_rate": 0.001,
      "loss": 2.6494,
      "step": 73512
    },
    {
      "epoch": 14.12,
      "learning_rate": 0.001,
      "loss": 2.6489,
      "step": 73524
    },
    {
      "epoch": 14.12,
      "learning_rate": 0.001,
      "loss": 2.6492,
      "step": 73536
    },
    {
      "epoch": 14.12,
      "learning_rate": 0.001,
      "loss": 2.6522,
      "step": 73548
    },
    {
      "epoch": 14.12,
      "learning_rate": 0.001,
      "loss": 2.6432,
      "step": 73560
    },
    {
      "epoch": 14.13,
      "learning_rate": 0.001,
      "loss": 2.6493,
      "step": 73572
    },
    {
      "epoch": 14.13,
      "learning_rate": 0.001,
      "loss": 2.6495,
      "step": 73584
    },
    {
      "epoch": 14.13,
      "learning_rate": 0.001,
      "loss": 2.6495,
      "step": 73596
    },
    {
      "epoch": 14.13,
      "learning_rate": 0.001,
      "loss": 2.6398,
      "step": 73608
    },
    {
      "epoch": 14.14,
      "learning_rate": 0.001,
      "loss": 2.6435,
      "step": 73620
    },
    {
      "epoch": 14.14,
      "learning_rate": 0.001,
      "loss": 2.6461,
      "step": 73632
    },
    {
      "epoch": 14.14,
      "learning_rate": 0.001,
      "loss": 2.6514,
      "step": 73644
    },
    {
      "epoch": 14.14,
      "learning_rate": 0.001,
      "loss": 2.6547,
      "step": 73656
    },
    {
      "epoch": 14.15,
      "learning_rate": 0.001,
      "loss": 2.6494,
      "step": 73668
    },
    {
      "epoch": 14.15,
      "learning_rate": 0.001,
      "loss": 2.6405,
      "step": 73680
    },
    {
      "epoch": 14.15,
      "learning_rate": 0.001,
      "loss": 2.6492,
      "step": 73692
    },
    {
      "epoch": 14.15,
      "learning_rate": 0.001,
      "loss": 2.6511,
      "step": 73704
    },
    {
      "epoch": 14.15,
      "learning_rate": 0.001,
      "loss": 2.6544,
      "step": 73716
    },
    {
      "epoch": 14.16,
      "learning_rate": 0.001,
      "loss": 2.6484,
      "step": 73728
    },
    {
      "epoch": 14.16,
      "learning_rate": 0.001,
      "loss": 2.6379,
      "step": 73740
    },
    {
      "epoch": 14.16,
      "eval_ag_news_accuracy": 0.30990625,
      "eval_ag_news_bleu_score": 4.6911453936087115,
      "eval_ag_news_bleu_score_sem": 0.15697956399831808,
      "eval_ag_news_emb_cos_sim": 0.7940667867660522,
      "eval_ag_news_emb_cos_sim_sem": 0.0075017768354597465,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6646106243133545,
      "eval_ag_news_n_ngrams_match_1": 13.356,
      "eval_ag_news_n_ngrams_match_2": 2.988,
      "eval_ag_news_n_ngrams_match_3": 0.882,
      "eval_ag_news_num_pred_words": 46.23,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.04093161359618,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33079080594767524,
      "eval_ag_news_runtime": 9.9854,
      "eval_ag_news_samples_per_second": 50.073,
      "eval_ag_news_steps_per_second": 0.1,
      "eval_ag_news_token_set_f1": 0.33544118111473303,
      "eval_ag_news_token_set_f1_sem": 0.004651303418480213,
      "eval_ag_news_token_set_precision": 0.31543177944159256,
      "eval_ag_news_token_set_recall": 0.374220577683943,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 73750
    },
    {
      "epoch": 14.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.1095625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.786226119952974,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10713424351407781,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6525640487670898,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009288877627851055,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.347583532333374,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.878,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.724,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.588,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.094,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.43394086074982,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20356452372114942,
      "eval_anthropic_toxic_prompts_runtime": 9.884,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.587,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3416159236421704,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006345102161436105,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4151898144934839,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3178102705218873,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 73750
    },
    {
      "epoch": 14.16,
      "eval_arxiv_accuracy": 0.3361875,
      "eval_arxiv_bleu_score": 3.9824530512943435,
      "eval_arxiv_bleu_score_sem": 0.11369980858364914,
      "eval_arxiv_emb_cos_sim": 0.733562707901001,
      "eval_arxiv_emb_cos_sim_sem": 0.00844667625184859,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5285377502441406,
      "eval_arxiv_n_ngrams_match_1": 14.238,
      "eval_arxiv_n_ngrams_match_2": 2.716,
      "eval_arxiv_n_ngrams_match_3": 0.56,
      "eval_arxiv_num_pred_words": 39.872,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.074106315161664,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.339855338181802,
      "eval_arxiv_runtime": 9.6456,
      "eval_arxiv_samples_per_second": 51.837,
      "eval_arxiv_steps_per_second": 0.104,
      "eval_arxiv_token_set_f1": 0.3334672824648522,
      "eval_arxiv_token_set_f1_sem": 0.004236619444607306,
      "eval_arxiv_token_set_precision": 0.2808371818029373,
      "eval_arxiv_token_set_recall": 0.4358752554472092,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 73750
    },
    {
      "epoch": 14.16,
      "eval_python_code_alpaca_accuracy": 0.15496875,
      "eval_python_code_alpaca_bleu_score": 4.209871652279953,
      "eval_python_code_alpaca_bleu_score_sem": 0.13866307576436476,
      "eval_python_code_alpaca_emb_cos_sim": 0.7154151797294617,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01036311123434066,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0079410076141357,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.222,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.588,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.858,
      "eval_python_code_alpaca_num_pred_words": 44.128,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.245671297471503,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3022217892621271,
      "eval_python_code_alpaca_runtime": 9.4478,
      "eval_python_code_alpaca_samples_per_second": 52.922,
      "eval_python_code_alpaca_steps_per_second": 0.106,
      "eval_python_code_alpaca_token_set_f1": 0.45909255201429167,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00601253214442383,
      "eval_python_code_alpaca_token_set_precision": 0.4983887793272989,
      "eval_python_code_alpaca_token_set_recall": 0.45090550869662416,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 73750
    },
    {
      "epoch": 14.16,
      "eval_wikibio_accuracy": 0.31446875,
      "eval_wikibio_bleu_score": 5.580133044479681,
      "eval_wikibio_bleu_score_sem": 0.20506788017215485,
      "eval_wikibio_emb_cos_sim": 0.7301121950149536,
      "eval_wikibio_emb_cos_sim_sem": 0.009828932198145969,
      "eval_wikibio_emb_top1_equal": 0.125,
      "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8464243412017822,
      "eval_wikibio_n_ngrams_match_1": 9.692,
      "eval_wikibio_n_ngrams_match_2": 3.22,
      "eval_wikibio_n_ngrams_match_3": 1.144,
      "eval_wikibio_num_pred_words": 35.86,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.825332124891936,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3390310997186834,
      "eval_wikibio_runtime": 11.1169,
      "eval_wikibio_samples_per_second": 44.976,
      "eval_wikibio_steps_per_second": 0.09,
      "eval_wikibio_token_set_f1": 0.30983704129249356,
      "eval_wikibio_token_set_f1_sem": 0.005564948964924509,
      "eval_wikibio_token_set_precision": 0.3160615956479311,
      "eval_wikibio_token_set_recall": 0.3203211879569017,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 73750
    },
    {
      "epoch": 14.16,
      "eval_nq_accuracy": 0.513875,
      "eval_nq_bleu_score": 11.313150061634024,
      "eval_nq_bleu_score_sem": 0.47206159574148193,
      "eval_nq_emb_cos_sim": 0.8237274885177612,
      "eval_nq_emb_cos_sim_sem": 0.007182782485604474,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2871272563934326,
      "eval_nq_n_ngrams_match_1": 22.482,
      "eval_nq_n_ngrams_match_2": 8.192,
      "eval_nq_n_ngrams_match_3": 3.75,
      "eval_nq_num_pred_words": 48.948,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.846610225324573,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4339535366874501,
      "eval_nq_runtime": 10.3605,
      "eval_nq_samples_per_second": 48.26,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.45193722470489195,
      "eval_nq_token_set_f1_sem": 0.004997454913791524,
      "eval_nq_token_set_precision": 0.40655292709597685,
      "eval_nq_token_set_recall": 0.5175811503915181,
      "eval_nq_true_num_tokens": 64.0,
      "step": 73750
    },
    {
      "epoch": 14.16,
      "learning_rate": 0.001,
      "loss": 2.655,
      "step": 73752
    },
    {
      "epoch": 14.16,
      "learning_rate": 0.001,
      "loss": 2.6435,
      "step": 73764
    },
    {
      "epoch": 14.17,
      "learning_rate": 0.001,
      "loss": 2.6485,
      "step": 73776
    },
    {
      "epoch": 14.17,
      "learning_rate": 0.001,
      "loss": 2.6461,
      "step": 73788
    },
    {
      "epoch": 14.17,
      "learning_rate": 0.001,
      "loss": 2.6503,
      "step": 73800
    },
    {
      "epoch": 14.17,
      "learning_rate": 0.001,
      "loss": 2.6491,
      "step": 73812
    },
    {
      "epoch": 14.18,
      "learning_rate": 0.001,
      "loss": 2.6544,
      "step": 73824
    },
    {
      "epoch": 14.18,
      "learning_rate": 0.001,
      "loss": 2.6498,
      "step": 73836
    },
    {
      "epoch": 14.18,
      "learning_rate": 0.001,
      "loss": 2.6506,
      "step": 73848
    },
    {
      "epoch": 14.18,
      "learning_rate": 0.001,
      "loss": 2.6552,
      "step": 73860
    },
    {
      "epoch": 14.18,
      "learning_rate": 0.001,
      "loss": 2.6498,
      "step": 73872
    },
    {
      "epoch": 14.19,
      "learning_rate": 0.001,
      "loss": 2.647,
      "step": 73884
    },
    {
      "epoch": 14.19,
      "learning_rate": 0.001,
      "loss": 2.6378,
      "step": 73896
    },
    {
      "epoch": 14.19,
      "learning_rate": 0.001,
      "loss": 2.6565,
      "step": 73908
    },
    {
      "epoch": 14.19,
      "learning_rate": 0.001,
      "loss": 2.6538,
      "step": 73920
    },
    {
      "epoch": 14.2,
      "learning_rate": 0.001,
      "loss": 2.6415,
      "step": 73932
    },
    {
      "epoch": 14.2,
      "learning_rate": 0.001,
      "loss": 2.6504,
      "step": 73944
    },
    {
      "epoch": 14.2,
      "learning_rate": 0.001,
      "loss": 2.6469,
      "step": 73956
    },
    {
      "epoch": 14.2,
      "learning_rate": 0.001,
      "loss": 2.6485,
      "step": 73968
    },
    {
      "epoch": 14.21,
      "learning_rate": 0.001,
      "loss": 2.6498,
      "step": 73980
    },
    {
      "epoch": 14.21,
      "learning_rate": 0.001,
      "loss": 2.6472,
      "step": 73992
    },
    {
      "epoch": 14.21,
      "learning_rate": 0.001,
      "loss": 2.6483,
      "step": 74004
    },
    {
      "epoch": 14.21,
      "learning_rate": 0.001,
      "loss": 2.6606,
      "step": 74016
    },
    {
      "epoch": 14.21,
      "learning_rate": 0.001,
      "loss": 2.6504,
      "step": 74028
    },
    {
      "epoch": 14.22,
      "learning_rate": 0.001,
      "loss": 2.6593,
      "step": 74040
    },
    {
      "epoch": 14.22,
      "learning_rate": 0.001,
      "loss": 2.6479,
      "step": 74052
    },
    {
      "epoch": 14.22,
      "learning_rate": 0.001,
      "loss": 2.6537,
      "step": 74064
    },
    {
      "epoch": 14.22,
      "learning_rate": 0.001,
      "loss": 2.6494,
      "step": 74076
    },
    {
      "epoch": 14.23,
      "learning_rate": 0.001,
      "loss": 2.6513,
      "step": 74088
    },
    {
      "epoch": 14.23,
      "learning_rate": 0.001,
      "loss": 2.6569,
      "step": 74100
    },
    {
      "epoch": 14.23,
      "learning_rate": 0.001,
      "loss": 2.6588,
      "step": 74112
    },
    {
      "epoch": 14.23,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 74124
    },
    {
      "epoch": 14.24,
      "learning_rate": 0.001,
      "loss": 2.6525,
      "step": 74136
    },
    {
      "epoch": 14.24,
      "learning_rate": 0.001,
      "loss": 2.6467,
      "step": 74148
    },
    {
      "epoch": 14.24,
      "learning_rate": 0.001,
      "loss": 2.6548,
      "step": 74160
    },
    {
      "epoch": 14.24,
      "learning_rate": 0.001,
      "loss": 2.6615,
      "step": 74172
    },
    {
      "epoch": 14.24,
      "learning_rate": 0.001,
      "loss": 2.6567,
      "step": 74184
    },
    {
      "epoch": 14.25,
      "learning_rate": 0.001,
      "loss": 2.6486,
      "step": 74196
    },
    {
      "epoch": 14.25,
      "learning_rate": 0.001,
      "loss": 2.655,
      "step": 74208
    },
    {
      "epoch": 14.25,
      "learning_rate": 0.001,
      "loss": 2.6449,
      "step": 74220
    },
    {
      "epoch": 14.25,
      "learning_rate": 0.001,
      "loss": 2.6497,
      "step": 74232
    },
    {
      "epoch": 14.26,
      "learning_rate": 0.001,
      "loss": 2.6564,
      "step": 74244
    },
    {
      "epoch": 14.26,
      "learning_rate": 0.001,
      "loss": 2.6513,
      "step": 74256
    },
    {
      "epoch": 14.26,
      "learning_rate": 0.001,
      "loss": 2.6639,
      "step": 74268
    },
    {
      "epoch": 14.26,
      "learning_rate": 0.001,
      "loss": 2.6534,
      "step": 74280
    },
    {
      "epoch": 14.26,
      "learning_rate": 0.001,
      "loss": 2.666,
      "step": 74292
    },
    {
      "epoch": 14.27,
      "learning_rate": 0.001,
      "loss": 2.6485,
      "step": 74304
    },
    {
      "epoch": 14.27,
      "learning_rate": 0.001,
      "loss": 2.6577,
      "step": 74316
    },
    {
      "epoch": 14.27,
      "learning_rate": 0.001,
      "loss": 2.6499,
      "step": 74328
    },
    {
      "epoch": 14.27,
      "learning_rate": 0.001,
      "loss": 2.6481,
      "step": 74340
    },
    {
      "epoch": 14.28,
      "learning_rate": 0.001,
      "loss": 2.6517,
      "step": 74352
    },
    {
      "epoch": 14.28,
      "learning_rate": 0.001,
      "loss": 2.6618,
      "step": 74364
    },
    {
      "epoch": 14.28,
      "eval_ag_news_accuracy": 0.31196875,
      "eval_ag_news_bleu_score": 4.530719373629623,
      "eval_ag_news_bleu_score_sem": 0.14707608957693488,
      "eval_ag_news_emb_cos_sim": 0.7935448288917542,
      "eval_ag_news_emb_cos_sim_sem": 0.007292474268665631,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6723897457122803,
      "eval_ag_news_n_ngrams_match_1": 13.556,
      "eval_ag_news_n_ngrams_match_2": 2.924,
      "eval_ag_news_n_ngrams_match_3": 0.788,
      "eval_ag_news_num_pred_words": 46.566,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.34582010492024,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3349469480461629,
      "eval_ag_news_runtime": 11.1017,
      "eval_ag_news_samples_per_second": 45.038,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.33913312328003714,
      "eval_ag_news_token_set_f1_sem": 0.004317692030863565,
      "eval_ag_news_token_set_precision": 0.32194428388324775,
      "eval_ag_news_token_set_recall": 0.3729593396042471,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 74375
    },
    {
      "epoch": 14.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.10971875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.7945020810196173,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10685465770247689,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.641710638999939,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009864716189620733,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.363442897796631,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.718,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.654,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.598,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.404,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.88847994406159,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19770273251289106,
      "eval_anthropic_toxic_prompts_runtime": 9.3924,
      "eval_anthropic_toxic_prompts_samples_per_second": 53.235,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.106,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3434382938004426,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006437626654076042,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.40657300781026523,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3271426314507424,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 74375
    },
    {
      "epoch": 14.28,
      "eval_arxiv_accuracy": 0.3364375,
      "eval_arxiv_bleu_score": 3.928561341510662,
      "eval_arxiv_bleu_score_sem": 0.11104355755983975,
      "eval_arxiv_emb_cos_sim": 0.7331550717353821,
      "eval_arxiv_emb_cos_sim_sem": 0.007875648357709727,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.515850305557251,
      "eval_arxiv_n_ngrams_match_1": 14.17,
      "eval_arxiv_n_ngrams_match_2": 2.666,
      "eval_arxiv_n_ngrams_match_3": 0.538,
      "eval_arxiv_num_pred_words": 39.366,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.64452389018019,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.340580079726118,
      "eval_arxiv_runtime": 10.2528,
      "eval_arxiv_samples_per_second": 48.767,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.33346639356391156,
      "eval_arxiv_token_set_f1_sem": 0.004207045965642826,
      "eval_arxiv_token_set_precision": 0.28174143839893606,
      "eval_arxiv_token_set_recall": 0.430434005989786,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 74375
    },
    {
      "epoch": 14.28,
      "eval_python_code_alpaca_accuracy": 0.15378125,
      "eval_python_code_alpaca_bleu_score": 3.862998746008432,
      "eval_python_code_alpaca_bleu_score_sem": 0.11164045906700341,
      "eval_python_code_alpaca_emb_cos_sim": 0.7203464508056641,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009563238465133291,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.013864278793335,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.006,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.354,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.724,
      "eval_python_code_alpaca_num_pred_words": 43.384,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.365947762170272,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.29992380892071974,
      "eval_python_code_alpaca_runtime": 9.8246,
      "eval_python_code_alpaca_samples_per_second": 50.893,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4454810625851117,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005939004477089126,
      "eval_python_code_alpaca_token_set_precision": 0.48828864850600895,
      "eval_python_code_alpaca_token_set_recall": 0.43521256158964344,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 74375
    },
    {
      "epoch": 14.28,
      "eval_wikibio_accuracy": 0.31159375,
      "eval_wikibio_bleu_score": 5.5747303718725,
      "eval_wikibio_bleu_score_sem": 0.19877028004601707,
      "eval_wikibio_emb_cos_sim": 0.7202527523040771,
      "eval_wikibio_emb_cos_sim_sem": 0.01039528574336717,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.859248399734497,
      "eval_wikibio_n_ngrams_match_1": 9.854,
      "eval_wikibio_n_ngrams_match_2": 3.206,
      "eval_wikibio_n_ngrams_match_3": 1.134,
      "eval_wikibio_num_pred_words": 36.246,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.42968980146304,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3406756703952334,
      "eval_wikibio_runtime": 10.1792,
      "eval_wikibio_samples_per_second": 49.12,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.3100505634113041,
      "eval_wikibio_token_set_f1_sem": 0.005343169258096035,
      "eval_wikibio_token_set_precision": 0.31721747727931304,
      "eval_wikibio_token_set_recall": 0.3221580480073975,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 74375
    },
    {
      "epoch": 14.28,
      "eval_nq_accuracy": 0.5143125,
      "eval_nq_bleu_score": 11.116941807837486,
      "eval_nq_bleu_score_sem": 0.4759200581539458,
      "eval_nq_emb_cos_sim": 0.8214974403381348,
      "eval_nq_emb_cos_sim_sem": 0.007312549997991484,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.288007974624634,
      "eval_nq_n_ngrams_match_1": 22.362,
      "eval_nq_n_ngrams_match_2": 7.946,
      "eval_nq_n_ngrams_match_3": 3.656,
      "eval_nq_num_pred_words": 48.956,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.855286134420407,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4301810822709363,
      "eval_nq_runtime": 10.264,
      "eval_nq_samples_per_second": 48.714,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.44629435692179015,
      "eval_nq_token_set_f1_sem": 0.00490550261991377,
      "eval_nq_token_set_precision": 0.40100556596328285,
      "eval_nq_token_set_recall": 0.5104525715389672,
      "eval_nq_true_num_tokens": 64.0,
      "step": 74375
    },
    {
      "epoch": 14.28,
      "learning_rate": 0.001,
      "loss": 2.6553,
      "step": 74376
    },
    {
      "epoch": 14.28,
      "learning_rate": 0.001,
      "loss": 2.6513,
      "step": 74388
    },
    {
      "epoch": 14.29,
      "learning_rate": 0.001,
      "loss": 2.6519,
      "step": 74400
    },
    {
      "epoch": 14.29,
      "learning_rate": 0.001,
      "loss": 2.6685,
      "step": 74412
    },
    {
      "epoch": 14.29,
      "learning_rate": 0.001,
      "loss": 2.6645,
      "step": 74424
    },
    {
      "epoch": 14.29,
      "learning_rate": 0.001,
      "loss": 2.6562,
      "step": 74436
    },
    {
      "epoch": 14.29,
      "learning_rate": 0.001,
      "loss": 2.6578,
      "step": 74448
    },
    {
      "epoch": 14.3,
      "learning_rate": 0.001,
      "loss": 2.6617,
      "step": 74460
    },
    {
      "epoch": 14.3,
      "learning_rate": 0.001,
      "loss": 2.6571,
      "step": 74472
    },
    {
      "epoch": 14.3,
      "learning_rate": 0.001,
      "loss": 2.6557,
      "step": 74484
    },
    {
      "epoch": 14.3,
      "learning_rate": 0.001,
      "loss": 2.655,
      "step": 74496
    },
    {
      "epoch": 14.31,
      "learning_rate": 0.001,
      "loss": 2.6491,
      "step": 74508
    },
    {
      "epoch": 14.31,
      "learning_rate": 0.001,
      "loss": 2.6406,
      "step": 74520
    },
    {
      "epoch": 14.31,
      "learning_rate": 0.001,
      "loss": 2.6484,
      "step": 74532
    },
    {
      "epoch": 14.31,
      "learning_rate": 0.001,
      "loss": 2.6479,
      "step": 74544
    },
    {
      "epoch": 14.32,
      "learning_rate": 0.001,
      "loss": 2.6501,
      "step": 74556
    },
    {
      "epoch": 14.32,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 74568
    },
    {
      "epoch": 14.32,
      "learning_rate": 0.001,
      "loss": 2.6607,
      "step": 74580
    },
    {
      "epoch": 14.32,
      "learning_rate": 0.001,
      "loss": 2.6487,
      "step": 74592
    },
    {
      "epoch": 14.32,
      "learning_rate": 0.001,
      "loss": 2.6559,
      "step": 74604
    },
    {
      "epoch": 14.33,
      "learning_rate": 0.001,
      "loss": 2.646,
      "step": 74616
    },
    {
      "epoch": 14.33,
      "learning_rate": 0.001,
      "loss": 2.6439,
      "step": 74628
    },
    {
      "epoch": 14.33,
      "learning_rate": 0.001,
      "loss": 2.6575,
      "step": 74640
    },
    {
      "epoch": 14.33,
      "learning_rate": 0.001,
      "loss": 2.6472,
      "step": 74652
    },
    {
      "epoch": 14.34,
      "learning_rate": 0.001,
      "loss": 2.6592,
      "step": 74664
    },
    {
      "epoch": 14.34,
      "learning_rate": 0.001,
      "loss": 2.6565,
      "step": 74676
    },
    {
      "epoch": 14.34,
      "learning_rate": 0.001,
      "loss": 2.6503,
      "step": 74688
    },
    {
      "epoch": 14.34,
      "learning_rate": 0.001,
      "loss": 2.6435,
      "step": 74700
    },
    {
      "epoch": 14.35,
      "learning_rate": 0.001,
      "loss": 2.6544,
      "step": 74712
    },
    {
      "epoch": 14.35,
      "learning_rate": 0.001,
      "loss": 2.6493,
      "step": 74724
    },
    {
      "epoch": 14.35,
      "learning_rate": 0.001,
      "loss": 2.6558,
      "step": 74736
    },
    {
      "epoch": 14.35,
      "learning_rate": 0.001,
      "loss": 2.6565,
      "step": 74748
    },
    {
      "epoch": 14.35,
      "learning_rate": 0.001,
      "loss": 2.6503,
      "step": 74760
    },
    {
      "epoch": 14.36,
      "learning_rate": 0.001,
      "loss": 2.6514,
      "step": 74772
    },
    {
      "epoch": 14.36,
      "learning_rate": 0.001,
      "loss": 2.6498,
      "step": 74784
    },
    {
      "epoch": 14.36,
      "learning_rate": 0.001,
      "loss": 2.6507,
      "step": 74796
    },
    {
      "epoch": 14.36,
      "learning_rate": 0.001,
      "loss": 2.6498,
      "step": 74808
    },
    {
      "epoch": 14.37,
      "learning_rate": 0.001,
      "loss": 2.651,
      "step": 74820
    },
    {
      "epoch": 14.37,
      "learning_rate": 0.001,
      "loss": 2.6448,
      "step": 74832
    },
    {
      "epoch": 14.37,
      "learning_rate": 0.001,
      "loss": 2.6486,
      "step": 74844
    },
    {
      "epoch": 14.37,
      "learning_rate": 0.001,
      "loss": 2.6538,
      "step": 74856
    },
    {
      "epoch": 14.38,
      "learning_rate": 0.001,
      "loss": 2.652,
      "step": 74868
    },
    {
      "epoch": 14.38,
      "learning_rate": 0.001,
      "loss": 2.6424,
      "step": 74880
    },
    {
      "epoch": 14.38,
      "learning_rate": 0.001,
      "loss": 2.6539,
      "step": 74892
    },
    {
      "epoch": 14.38,
      "learning_rate": 0.001,
      "loss": 2.6476,
      "step": 74904
    },
    {
      "epoch": 14.38,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 74916
    },
    {
      "epoch": 14.39,
      "learning_rate": 0.001,
      "loss": 2.654,
      "step": 74928
    },
    {
      "epoch": 14.39,
      "learning_rate": 0.001,
      "loss": 2.6646,
      "step": 74940
    },
    {
      "epoch": 14.39,
      "learning_rate": 0.001,
      "loss": 2.6498,
      "step": 74952
    },
    {
      "epoch": 14.39,
      "learning_rate": 0.001,
      "loss": 2.6518,
      "step": 74964
    },
    {
      "epoch": 14.4,
      "learning_rate": 0.001,
      "loss": 2.6518,
      "step": 74976
    },
    {
      "epoch": 14.4,
      "learning_rate": 0.001,
      "loss": 2.6567,
      "step": 74988
    },
    {
      "epoch": 14.4,
      "learning_rate": 0.001,
      "loss": 2.6476,
      "step": 75000
    },
    {
      "epoch": 14.4,
      "eval_ag_news_accuracy": 0.31125,
      "eval_ag_news_bleu_score": 4.825265164506628,
      "eval_ag_news_bleu_score_sem": 0.16061676852998474,
      "eval_ag_news_emb_cos_sim": 0.7942475080490112,
      "eval_ag_news_emb_cos_sim_sem": 0.006755775641183398,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6629862785339355,
      "eval_ag_news_n_ngrams_match_1": 13.442,
      "eval_ag_news_n_ngrams_match_2": 2.968,
      "eval_ag_news_n_ngrams_match_3": 0.896,
      "eval_ag_news_num_pred_words": 46.132,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.97756711796284,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3333763081254419,
      "eval_ag_news_runtime": 9.853,
      "eval_ag_news_samples_per_second": 50.746,
      "eval_ag_news_steps_per_second": 0.101,
      "eval_ag_news_token_set_f1": 0.33795608771709623,
      "eval_ag_news_token_set_f1_sem": 0.004539972483984398,
      "eval_ag_news_token_set_precision": 0.3174717992862559,
      "eval_ag_news_token_set_recall": 0.379608859965652,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 75000
    },
    {
      "epoch": 14.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.1095625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9508382467623506,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11186990624048283,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6582716107368469,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00898989183578182,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3321025371551514,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.912,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.636,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.416,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.997144901012536,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20377580039567783,
      "eval_anthropic_toxic_prompts_runtime": 9.4893,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.691,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.105,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3458427859817732,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006436895383963648,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41559077611914735,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3253764235997341,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 75000
    },
    {
      "epoch": 14.4,
      "eval_arxiv_accuracy": 0.3355,
      "eval_arxiv_bleu_score": 4.128656624074922,
      "eval_arxiv_bleu_score_sem": 0.1183949221643065,
      "eval_arxiv_emb_cos_sim": 0.738603413105011,
      "eval_arxiv_emb_cos_sim_sem": 0.008967593518741527,
      "eval_arxiv_emb_top1_equal": 0.1953125,
      "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5226566791534424,
      "eval_arxiv_n_ngrams_match_1": 14.306,
      "eval_arxiv_n_ngrams_match_2": 2.76,
      "eval_arxiv_n_ngrams_match_3": 0.622,
      "eval_arxiv_num_pred_words": 40.336,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.87430218061502,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3392346518234537,
      "eval_arxiv_runtime": 10.1196,
      "eval_arxiv_samples_per_second": 49.409,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3360491641440138,
      "eval_arxiv_token_set_f1_sem": 0.004441608901171986,
      "eval_arxiv_token_set_precision": 0.2838872252414057,
      "eval_arxiv_token_set_recall": 0.43104836963579973,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 75000
    },
    {
      "epoch": 14.4,
      "eval_python_code_alpaca_accuracy": 0.15525,
      "eval_python_code_alpaca_bleu_score": 4.1750089981728955,
      "eval_python_code_alpaca_bleu_score_sem": 0.14244011545234558,
      "eval_python_code_alpaca_emb_cos_sim": 0.7288846969604492,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010393718407438171,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0037569999694824,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.272,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.68,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.85,
      "eval_python_code_alpaca_num_pred_words": 44.27,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.161140216652083,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30784098512657615,
      "eval_python_code_alpaca_runtime": 9.7008,
      "eval_python_code_alpaca_samples_per_second": 51.542,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.45906546035014595,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006185535015961747,
      "eval_python_code_alpaca_token_set_precision": 0.5026287946028317,
      "eval_python_code_alpaca_token_set_recall": 0.4469467322166043,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 75000
    },
    {
      "epoch": 14.4,
      "eval_wikibio_accuracy": 0.31290625,
      "eval_wikibio_bleu_score": 5.505307064833404,
      "eval_wikibio_bleu_score_sem": 0.20573228474271227,
      "eval_wikibio_emb_cos_sim": 0.7147561311721802,
      "eval_wikibio_emb_cos_sim_sem": 0.010479368706146405,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.854083776473999,
      "eval_wikibio_n_ngrams_match_1": 9.314,
      "eval_wikibio_n_ngrams_match_2": 3.078,
      "eval_wikibio_n_ngrams_match_3": 1.118,
      "eval_wikibio_num_pred_words": 35.33,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.18536478867824,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.32920250931498296,
      "eval_wikibio_runtime": 10.5288,
      "eval_wikibio_samples_per_second": 47.489,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.2972337502815764,
      "eval_wikibio_token_set_f1_sem": 0.005929955659531237,
      "eval_wikibio_token_set_precision": 0.3024276408245034,
      "eval_wikibio_token_set_recall": 0.3102296234577406,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 75000
    },
    {
      "epoch": 14.4,
      "eval_nq_accuracy": 0.51515625,
      "eval_nq_bleu_score": 11.306658310330809,
      "eval_nq_bleu_score_sem": 0.47323131156860576,
      "eval_nq_emb_cos_sim": 0.8234788179397583,
      "eval_nq_emb_cos_sim_sem": 0.007213109777286094,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.284891128540039,
      "eval_nq_n_ngrams_match_1": 22.39,
      "eval_nq_n_ngrams_match_2": 8.12,
      "eval_nq_n_ngrams_match_3": 3.754,
      "eval_nq_num_pred_words": 48.95,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.82461654544283,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.431665225931508,
      "eval_nq_runtime": 10.9854,
      "eval_nq_samples_per_second": 45.515,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4482254340269814,
      "eval_nq_token_set_f1_sem": 0.004960347638546244,
      "eval_nq_token_set_precision": 0.40238438579843505,
      "eval_nq_token_set_recall": 0.5151764854879013,
      "eval_nq_true_num_tokens": 64.0,
      "step": 75000
    },
    {
      "epoch": 14.4,
      "learning_rate": 0.001,
      "loss": 2.6451,
      "step": 75012
    },
    {
      "epoch": 14.41,
      "learning_rate": 0.001,
      "loss": 2.6433,
      "step": 75024
    },
    {
      "epoch": 14.41,
      "learning_rate": 0.001,
      "loss": 2.6523,
      "step": 75036
    },
    {
      "epoch": 14.41,
      "learning_rate": 0.001,
      "loss": 2.6496,
      "step": 75048
    },
    {
      "epoch": 14.41,
      "learning_rate": 0.001,
      "loss": 2.6474,
      "step": 75060
    },
    {
      "epoch": 14.41,
      "learning_rate": 0.001,
      "loss": 2.643,
      "step": 75072
    },
    {
      "epoch": 14.42,
      "learning_rate": 0.001,
      "loss": 2.6496,
      "step": 75084
    },
    {
      "epoch": 14.42,
      "learning_rate": 0.001,
      "loss": 2.6391,
      "step": 75096
    },
    {
      "epoch": 14.42,
      "learning_rate": 0.001,
      "loss": 2.6561,
      "step": 75108
    },
    {
      "epoch": 14.42,
      "learning_rate": 0.001,
      "loss": 2.6518,
      "step": 75120
    },
    {
      "epoch": 14.43,
      "learning_rate": 0.001,
      "loss": 2.6439,
      "step": 75132
    },
    {
      "epoch": 14.43,
      "learning_rate": 0.001,
      "loss": 2.6536,
      "step": 75144
    },
    {
      "epoch": 14.43,
      "learning_rate": 0.001,
      "loss": 2.6558,
      "step": 75156
    },
    {
      "epoch": 14.43,
      "learning_rate": 0.001,
      "loss": 2.6393,
      "step": 75168
    },
    {
      "epoch": 14.44,
      "learning_rate": 0.001,
      "loss": 2.6587,
      "step": 75180
    },
    {
      "epoch": 14.44,
      "learning_rate": 0.001,
      "loss": 2.6543,
      "step": 75192
    },
    {
      "epoch": 14.44,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 75204
    },
    {
      "epoch": 14.44,
      "learning_rate": 0.001,
      "loss": 2.6574,
      "step": 75216
    },
    {
      "epoch": 14.44,
      "learning_rate": 0.001,
      "loss": 2.6456,
      "step": 75228
    },
    {
      "epoch": 14.45,
      "learning_rate": 0.001,
      "loss": 2.6516,
      "step": 75240
    },
    {
      "epoch": 14.45,
      "learning_rate": 0.001,
      "loss": 2.6543,
      "step": 75252
    },
    {
      "epoch": 14.45,
      "learning_rate": 0.001,
      "loss": 2.6507,
      "step": 75264
    },
    {
      "epoch": 14.45,
      "learning_rate": 0.001,
      "loss": 2.6376,
      "step": 75276
    },
    {
      "epoch": 14.46,
      "learning_rate": 0.001,
      "loss": 2.6534,
      "step": 75288
    },
    {
      "epoch": 14.46,
      "learning_rate": 0.001,
      "loss": 2.6486,
      "step": 75300
    },
    {
      "epoch": 14.46,
      "learning_rate": 0.001,
      "loss": 2.6589,
      "step": 75312
    },
    {
      "epoch": 14.46,
      "learning_rate": 0.001,
      "loss": 2.6443,
      "step": 75324
    },
    {
      "epoch": 14.47,
      "learning_rate": 0.001,
      "loss": 2.6524,
      "step": 75336
    },
    {
      "epoch": 14.47,
      "learning_rate": 0.001,
      "loss": 2.6668,
      "step": 75348
    },
    {
      "epoch": 14.47,
      "learning_rate": 0.001,
      "loss": 2.6528,
      "step": 75360
    },
    {
      "epoch": 14.47,
      "learning_rate": 0.001,
      "loss": 2.6545,
      "step": 75372
    },
    {
      "epoch": 14.47,
      "learning_rate": 0.001,
      "loss": 2.6486,
      "step": 75384
    },
    {
      "epoch": 14.48,
      "learning_rate": 0.001,
      "loss": 2.6557,
      "step": 75396
    },
    {
      "epoch": 14.48,
      "learning_rate": 0.001,
      "loss": 2.6491,
      "step": 75408
    },
    {
      "epoch": 14.48,
      "learning_rate": 0.001,
      "loss": 2.6587,
      "step": 75420
    },
    {
      "epoch": 14.48,
      "learning_rate": 0.001,
      "loss": 2.6442,
      "step": 75432
    },
    {
      "epoch": 14.49,
      "learning_rate": 0.001,
      "loss": 2.6505,
      "step": 75444
    },
    {
      "epoch": 14.49,
      "learning_rate": 0.001,
      "loss": 2.6577,
      "step": 75456
    },
    {
      "epoch": 14.49,
      "learning_rate": 0.001,
      "loss": 2.651,
      "step": 75468
    },
    {
      "epoch": 14.49,
      "learning_rate": 0.001,
      "loss": 2.6634,
      "step": 75480
    },
    {
      "epoch": 14.5,
      "learning_rate": 0.001,
      "loss": 2.6424,
      "step": 75492
    },
    {
      "epoch": 14.5,
      "learning_rate": 0.001,
      "loss": 2.6481,
      "step": 75504
    },
    {
      "epoch": 14.5,
      "learning_rate": 0.001,
      "loss": 2.6439,
      "step": 75516
    },
    {
      "epoch": 14.5,
      "learning_rate": 0.001,
      "loss": 2.6479,
      "step": 75528
    },
    {
      "epoch": 14.5,
      "learning_rate": 0.001,
      "loss": 2.6532,
      "step": 75540
    },
    {
      "epoch": 14.51,
      "learning_rate": 0.001,
      "loss": 2.6561,
      "step": 75552
    },
    {
      "epoch": 14.51,
      "learning_rate": 0.001,
      "loss": 2.6614,
      "step": 75564
    },
    {
      "epoch": 14.51,
      "learning_rate": 0.001,
      "loss": 2.6525,
      "step": 75576
    },
    {
      "epoch": 14.51,
      "learning_rate": 0.001,
      "loss": 2.6416,
      "step": 75588
    },
    {
      "epoch": 14.52,
      "learning_rate": 0.001,
      "loss": 2.6405,
      "step": 75600
    },
    {
      "epoch": 14.52,
      "learning_rate": 0.001,
      "loss": 2.6492,
      "step": 75612
    },
    {
      "epoch": 14.52,
      "learning_rate": 0.001,
      "loss": 2.6672,
      "step": 75624
    },
    {
      "epoch": 14.52,
      "eval_ag_news_accuracy": 0.310375,
      "eval_ag_news_bleu_score": 4.655412802755985,
      "eval_ag_news_bleu_score_sem": 0.155004284280549,
      "eval_ag_news_emb_cos_sim": 0.7930936217308044,
      "eval_ag_news_emb_cos_sim_sem": 0.007596103458060652,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6820600032806396,
      "eval_ag_news_n_ngrams_match_1": 13.642,
      "eval_ag_news_n_ngrams_match_2": 2.982,
      "eval_ag_news_n_ngrams_match_3": 0.884,
      "eval_ag_news_num_pred_words": 47.028,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.72814995420812,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3343935770510794,
      "eval_ag_news_runtime": 10.6119,
      "eval_ag_news_samples_per_second": 47.117,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.3404060039707562,
      "eval_ag_news_token_set_f1_sem": 0.004622360860322712,
      "eval_ag_news_token_set_precision": 0.32387348810661365,
      "eval_ag_news_token_set_recall": 0.3747796600616977,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 75625
    },
    {
      "epoch": 14.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.1103125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.876129619045212,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10880801230260725,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6423047780990601,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01027917380989769,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.360487699508667,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.832,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.736,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.632,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.348,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.803234777812257,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20049765813978743,
      "eval_anthropic_toxic_prompts_runtime": 9.4263,
      "eval_anthropic_toxic_prompts_samples_per_second": 53.043,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.106,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3526566551452242,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006480346989934327,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4134845768753208,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33684238966629404,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 75625
    },
    {
      "epoch": 14.52,
      "eval_arxiv_accuracy": 0.3369375,
      "eval_arxiv_bleu_score": 4.119498721662499,
      "eval_arxiv_bleu_score_sem": 0.11446355754821129,
      "eval_arxiv_emb_cos_sim": 0.7383588552474976,
      "eval_arxiv_emb_cos_sim_sem": 0.008242254158403135,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.522167444229126,
      "eval_arxiv_n_ngrams_match_1": 14.394,
      "eval_arxiv_n_ngrams_match_2": 2.772,
      "eval_arxiv_n_ngrams_match_3": 0.606,
      "eval_arxiv_num_pred_words": 40.548,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.85773374221124,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33948853232039194,
      "eval_arxiv_runtime": 10.1505,
      "eval_arxiv_samples_per_second": 49.259,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3376089251969639,
      "eval_arxiv_token_set_f1_sem": 0.004237650715090648,
      "eval_arxiv_token_set_precision": 0.28646494354979996,
      "eval_arxiv_token_set_recall": 0.4263498231370397,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 75625
    },
    {
      "epoch": 14.52,
      "eval_python_code_alpaca_accuracy": 0.1566875,
      "eval_python_code_alpaca_bleu_score": 4.22506531640581,
      "eval_python_code_alpaca_bleu_score_sem": 0.13964023687934293,
      "eval_python_code_alpaca_emb_cos_sim": 0.7337641716003418,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009433625710705657,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9880495071411133,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.49,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.686,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.856,
      "eval_python_code_alpaca_num_pred_words": 44.048,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.846933412057098,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31379163429124846,
      "eval_python_code_alpaca_runtime": 9.9596,
      "eval_python_code_alpaca_samples_per_second": 50.203,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4676294939803047,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00580676499840515,
      "eval_python_code_alpaca_token_set_precision": 0.5199176042826318,
      "eval_python_code_alpaca_token_set_recall": 0.45222107007232876,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 75625
    },
    {
      "epoch": 14.52,
      "eval_wikibio_accuracy": 0.31221875,
      "eval_wikibio_bleu_score": 5.477373305705958,
      "eval_wikibio_bleu_score_sem": 0.19590572277088825,
      "eval_wikibio_emb_cos_sim": 0.7205518484115601,
      "eval_wikibio_emb_cos_sim_sem": 0.01125361553291464,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8638317584991455,
      "eval_wikibio_n_ngrams_match_1": 9.856,
      "eval_wikibio_n_ngrams_match_2": 3.176,
      "eval_wikibio_n_ngrams_match_3": 1.13,
      "eval_wikibio_num_pred_words": 36.638,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.64757602986537,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3394769741172859,
      "eval_wikibio_runtime": 10.2539,
      "eval_wikibio_samples_per_second": 48.762,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.30748428427846725,
      "eval_wikibio_token_set_f1_sem": 0.005599875663073089,
      "eval_wikibio_token_set_precision": 0.3162653082603447,
      "eval_wikibio_token_set_recall": 0.315662627814797,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 75625
    },
    {
      "epoch": 14.52,
      "eval_nq_accuracy": 0.51325,
      "eval_nq_bleu_score": 11.027304310545896,
      "eval_nq_bleu_score_sem": 0.46266101652935365,
      "eval_nq_emb_cos_sim": 0.820936918258667,
      "eval_nq_emb_cos_sim_sem": 0.00768282915995042,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2858147621154785,
      "eval_nq_n_ngrams_match_1": 22.58,
      "eval_nq_n_ngrams_match_2": 8.078,
      "eval_nq_n_ngrams_match_3": 3.654,
      "eval_nq_num_pred_words": 49.356,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.833695083125722,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4340510414319413,
      "eval_nq_runtime": 11.4133,
      "eval_nq_samples_per_second": 43.808,
      "eval_nq_steps_per_second": 0.088,
      "eval_nq_token_set_f1": 0.44971215918807794,
      "eval_nq_token_set_f1_sem": 0.005032339931197213,
      "eval_nq_token_set_precision": 0.4082384831646633,
      "eval_nq_token_set_recall": 0.5088412461961356,
      "eval_nq_true_num_tokens": 64.0,
      "step": 75625
    },
    {
      "epoch": 14.52,
      "learning_rate": 0.001,
      "loss": 2.6491,
      "step": 75636
    },
    {
      "epoch": 14.53,
      "learning_rate": 0.001,
      "loss": 2.6519,
      "step": 75648
    },
    {
      "epoch": 14.53,
      "learning_rate": 0.001,
      "loss": 2.6525,
      "step": 75660
    },
    {
      "epoch": 14.53,
      "learning_rate": 0.001,
      "loss": 2.6472,
      "step": 75672
    },
    {
      "epoch": 14.53,
      "learning_rate": 0.001,
      "loss": 2.6535,
      "step": 75684
    },
    {
      "epoch": 14.53,
      "learning_rate": 0.001,
      "loss": 2.6565,
      "step": 75696
    },
    {
      "epoch": 14.54,
      "learning_rate": 0.001,
      "loss": 2.6594,
      "step": 75708
    },
    {
      "epoch": 14.54,
      "learning_rate": 0.001,
      "loss": 2.6529,
      "step": 75720
    },
    {
      "epoch": 14.54,
      "learning_rate": 0.001,
      "loss": 2.652,
      "step": 75732
    },
    {
      "epoch": 14.54,
      "learning_rate": 0.001,
      "loss": 2.652,
      "step": 75744
    },
    {
      "epoch": 14.55,
      "learning_rate": 0.001,
      "loss": 2.6535,
      "step": 75756
    },
    {
      "epoch": 14.55,
      "learning_rate": 0.001,
      "loss": 2.6563,
      "step": 75768
    },
    {
      "epoch": 14.55,
      "learning_rate": 0.001,
      "loss": 2.6553,
      "step": 75780
    },
    {
      "epoch": 14.55,
      "learning_rate": 0.001,
      "loss": 2.6571,
      "step": 75792
    },
    {
      "epoch": 14.56,
      "learning_rate": 0.001,
      "loss": 2.6592,
      "step": 75804
    },
    {
      "epoch": 14.56,
      "learning_rate": 0.001,
      "loss": 2.644,
      "step": 75816
    },
    {
      "epoch": 14.56,
      "learning_rate": 0.001,
      "loss": 2.6573,
      "step": 75828
    },
    {
      "epoch": 14.56,
      "learning_rate": 0.001,
      "loss": 2.6635,
      "step": 75840
    },
    {
      "epoch": 14.56,
      "learning_rate": 0.001,
      "loss": 2.6497,
      "step": 75852
    },
    {
      "epoch": 14.57,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 75864
    },
    {
      "epoch": 14.57,
      "learning_rate": 0.001,
      "loss": 2.6461,
      "step": 75876
    },
    {
      "epoch": 14.57,
      "learning_rate": 0.001,
      "loss": 2.653,
      "step": 75888
    },
    {
      "epoch": 14.57,
      "learning_rate": 0.001,
      "loss": 2.6535,
      "step": 75900
    },
    {
      "epoch": 14.58,
      "learning_rate": 0.001,
      "loss": 2.6564,
      "step": 75912
    },
    {
      "epoch": 14.58,
      "learning_rate": 0.001,
      "loss": 2.6555,
      "step": 75924
    },
    {
      "epoch": 14.58,
      "learning_rate": 0.001,
      "loss": 2.6497,
      "step": 75936
    },
    {
      "epoch": 14.58,
      "learning_rate": 0.001,
      "loss": 2.6697,
      "step": 75948
    },
    {
      "epoch": 14.59,
      "learning_rate": 0.001,
      "loss": 2.6526,
      "step": 75960
    },
    {
      "epoch": 14.59,
      "learning_rate": 0.001,
      "loss": 2.6437,
      "step": 75972
    },
    {
      "epoch": 14.59,
      "learning_rate": 0.001,
      "loss": 2.648,
      "step": 75984
    },
    {
      "epoch": 14.59,
      "learning_rate": 0.001,
      "loss": 2.6512,
      "step": 75996
    },
    {
      "epoch": 14.59,
      "learning_rate": 0.001,
      "loss": 2.6476,
      "step": 76008
    },
    {
      "epoch": 14.6,
      "learning_rate": 0.001,
      "loss": 2.6448,
      "step": 76020
    },
    {
      "epoch": 14.6,
      "learning_rate": 0.001,
      "loss": 2.6508,
      "step": 76032
    },
    {
      "epoch": 14.6,
      "learning_rate": 0.001,
      "loss": 2.644,
      "step": 76044
    },
    {
      "epoch": 14.6,
      "learning_rate": 0.001,
      "loss": 2.6455,
      "step": 76056
    },
    {
      "epoch": 14.61,
      "learning_rate": 0.001,
      "loss": 2.6504,
      "step": 76068
    },
    {
      "epoch": 14.61,
      "learning_rate": 0.001,
      "loss": 2.6489,
      "step": 76080
    },
    {
      "epoch": 14.61,
      "learning_rate": 0.001,
      "loss": 2.6468,
      "step": 76092
    },
    {
      "epoch": 14.61,
      "learning_rate": 0.001,
      "loss": 2.655,
      "step": 76104
    },
    {
      "epoch": 14.62,
      "learning_rate": 0.001,
      "loss": 2.6479,
      "step": 76116
    },
    {
      "epoch": 14.62,
      "learning_rate": 0.001,
      "loss": 2.6597,
      "step": 76128
    },
    {
      "epoch": 14.62,
      "learning_rate": 0.001,
      "loss": 2.6626,
      "step": 76140
    },
    {
      "epoch": 14.62,
      "learning_rate": 0.001,
      "loss": 2.642,
      "step": 76152
    },
    {
      "epoch": 14.62,
      "learning_rate": 0.001,
      "loss": 2.6433,
      "step": 76164
    },
    {
      "epoch": 14.63,
      "learning_rate": 0.001,
      "loss": 2.6513,
      "step": 76176
    },
    {
      "epoch": 14.63,
      "learning_rate": 0.001,
      "loss": 2.6539,
      "step": 76188
    },
    {
      "epoch": 14.63,
      "learning_rate": 0.001,
      "loss": 2.6606,
      "step": 76200
    },
    {
      "epoch": 14.63,
      "learning_rate": 0.001,
      "loss": 2.6458,
      "step": 76212
    },
    {
      "epoch": 14.64,
      "learning_rate": 0.001,
      "loss": 2.653,
      "step": 76224
    },
    {
      "epoch": 14.64,
      "learning_rate": 0.001,
      "loss": 2.6528,
      "step": 76236
    },
    {
      "epoch": 14.64,
      "learning_rate": 0.001,
      "loss": 2.6506,
      "step": 76248
    },
    {
      "epoch": 14.64,
      "eval_ag_news_accuracy": 0.31259375,
      "eval_ag_news_bleu_score": 4.651526411582217,
      "eval_ag_news_bleu_score_sem": 0.14377521332499338,
      "eval_ag_news_emb_cos_sim": 0.7970424294471741,
      "eval_ag_news_emb_cos_sim_sem": 0.0075556563129248605,
      "eval_ag_news_emb_top1_equal": 0.1953125,
      "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6626620292663574,
      "eval_ag_news_n_ngrams_match_1": 13.636,
      "eval_ag_news_n_ngrams_match_2": 2.872,
      "eval_ag_news_n_ngrams_match_3": 0.808,
      "eval_ag_news_num_pred_words": 46.364,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.964930719155106,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33827477090693736,
      "eval_ag_news_runtime": 9.8257,
      "eval_ag_news_samples_per_second": 50.887,
      "eval_ag_news_steps_per_second": 0.102,
      "eval_ag_news_token_set_f1": 0.342507602151011,
      "eval_ag_news_token_set_f1_sem": 0.0043908813057337,
      "eval_ag_news_token_set_precision": 0.32307866257631845,
      "eval_ag_news_token_set_recall": 0.38145538280935165,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 76250
    },
    {
      "epoch": 14.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.11034375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.921761347309161,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10688136883654988,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6543354988098145,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008875401238890994,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3178293704986572,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.932,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.804,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.66,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.764,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.600375301156884,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2025388716957945,
      "eval_anthropic_toxic_prompts_runtime": 10.0384,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.809,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34983621316398883,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00643097524341174,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4199480957294115,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3266485123791543,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 76250
    },
    {
      "epoch": 14.64,
      "eval_arxiv_accuracy": 0.337625,
      "eval_arxiv_bleu_score": 3.9350191136695796,
      "eval_arxiv_bleu_score_sem": 0.10507292675712303,
      "eval_arxiv_emb_cos_sim": 0.7389769554138184,
      "eval_arxiv_emb_cos_sim_sem": 0.007970572280179373,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.518223762512207,
      "eval_arxiv_n_ngrams_match_1": 14.144,
      "eval_arxiv_n_ngrams_match_2": 2.62,
      "eval_arxiv_n_ngrams_match_3": 0.546,
      "eval_arxiv_num_pred_words": 39.208,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.724472559234606,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.338280011727951,
      "eval_arxiv_runtime": 10.148,
      "eval_arxiv_samples_per_second": 49.271,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3326526064936905,
      "eval_arxiv_token_set_f1_sem": 0.004472971109645054,
      "eval_arxiv_token_set_precision": 0.2807206300186895,
      "eval_arxiv_token_set_recall": 0.4294484005680578,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 76250
    },
    {
      "epoch": 14.64,
      "eval_python_code_alpaca_accuracy": 0.15478125,
      "eval_python_code_alpaca_bleu_score": 4.2807724645856595,
      "eval_python_code_alpaca_bleu_score_sem": 0.1378699381922166,
      "eval_python_code_alpaca_emb_cos_sim": 0.7384339570999146,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009733584173013182,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.979473114013672,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.456,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.67,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.838,
      "eval_python_code_alpaca_num_pred_words": 43.502,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.677446142346987,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31477142636234423,
      "eval_python_code_alpaca_runtime": 10.2217,
      "eval_python_code_alpaca_samples_per_second": 48.916,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.466077417367089,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00570613133320064,
      "eval_python_code_alpaca_token_set_precision": 0.5140095977577134,
      "eval_python_code_alpaca_token_set_recall": 0.45111069881096394,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 76250
    },
    {
      "epoch": 14.64,
      "eval_wikibio_accuracy": 0.31628125,
      "eval_wikibio_bleu_score": 5.472883986001374,
      "eval_wikibio_bleu_score_sem": 0.20090337891561805,
      "eval_wikibio_emb_cos_sim": 0.7212482690811157,
      "eval_wikibio_emb_cos_sim_sem": 0.011007649817727602,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.87813663482666,
      "eval_wikibio_n_ngrams_match_1": 9.37,
      "eval_wikibio_n_ngrams_match_2": 3.092,
      "eval_wikibio_n_ngrams_match_3": 1.13,
      "eval_wikibio_num_pred_words": 35.434,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 48.33406709082238,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3255289787089194,
      "eval_wikibio_runtime": 10.2364,
      "eval_wikibio_samples_per_second": 48.845,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.29590539714172015,
      "eval_wikibio_token_set_f1_sem": 0.005926313063926945,
      "eval_wikibio_token_set_precision": 0.30286045743856127,
      "eval_wikibio_token_set_recall": 0.3065459299318423,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 76250
    },
    {
      "epoch": 14.64,
      "eval_nq_accuracy": 0.515625,
      "eval_nq_bleu_score": 10.963256287688997,
      "eval_nq_bleu_score_sem": 0.46914593933232945,
      "eval_nq_emb_cos_sim": 0.8221389651298523,
      "eval_nq_emb_cos_sim_sem": 0.006978323513666571,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.278108596801758,
      "eval_nq_n_ngrams_match_1": 22.382,
      "eval_nq_n_ngrams_match_2": 8.03,
      "eval_nq_n_ngrams_match_3": 3.596,
      "eval_nq_num_pred_words": 48.858,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.758206241491852,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43195307926493953,
      "eval_nq_runtime": 10.4376,
      "eval_nq_samples_per_second": 47.904,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.44679654179619294,
      "eval_nq_token_set_f1_sem": 0.004979462209745802,
      "eval_nq_token_set_precision": 0.4019680391733164,
      "eval_nq_token_set_recall": 0.5128056506818839,
      "eval_nq_true_num_tokens": 64.0,
      "step": 76250
    },
    {
      "epoch": 14.64,
      "learning_rate": 0.001,
      "loss": 2.6436,
      "step": 76260
    },
    {
      "epoch": 14.65,
      "learning_rate": 0.001,
      "loss": 2.6477,
      "step": 76272
    },
    {
      "epoch": 14.65,
      "learning_rate": 0.001,
      "loss": 2.6502,
      "step": 76284
    },
    {
      "epoch": 14.65,
      "learning_rate": 0.001,
      "loss": 2.6533,
      "step": 76296
    },
    {
      "epoch": 14.65,
      "learning_rate": 0.001,
      "loss": 2.6428,
      "step": 76308
    },
    {
      "epoch": 14.65,
      "learning_rate": 0.001,
      "loss": 2.6639,
      "step": 76320
    },
    {
      "epoch": 14.66,
      "learning_rate": 0.001,
      "loss": 2.6489,
      "step": 76332
    },
    {
      "epoch": 14.66,
      "learning_rate": 0.001,
      "loss": 2.6531,
      "step": 76344
    },
    {
      "epoch": 14.66,
      "learning_rate": 0.001,
      "loss": 2.6476,
      "step": 76356
    },
    {
      "epoch": 14.66,
      "learning_rate": 0.001,
      "loss": 2.6548,
      "step": 76368
    },
    {
      "epoch": 14.67,
      "learning_rate": 0.001,
      "loss": 2.6586,
      "step": 76380
    },
    {
      "epoch": 14.67,
      "learning_rate": 0.001,
      "loss": 2.6544,
      "step": 76392
    },
    {
      "epoch": 14.67,
      "learning_rate": 0.001,
      "loss": 2.6522,
      "step": 76404
    },
    {
      "epoch": 14.67,
      "learning_rate": 0.001,
      "loss": 2.6468,
      "step": 76416
    },
    {
      "epoch": 14.68,
      "learning_rate": 0.001,
      "loss": 2.6518,
      "step": 76428
    },
    {
      "epoch": 14.68,
      "learning_rate": 0.001,
      "loss": 2.6496,
      "step": 76440
    },
    {
      "epoch": 14.68,
      "learning_rate": 0.001,
      "loss": 2.6549,
      "step": 76452
    },
    {
      "epoch": 14.68,
      "learning_rate": 0.001,
      "loss": 2.6486,
      "step": 76464
    },
    {
      "epoch": 14.68,
      "learning_rate": 0.001,
      "loss": 2.651,
      "step": 76476
    },
    {
      "epoch": 14.69,
      "learning_rate": 0.001,
      "loss": 2.6531,
      "step": 76488
    },
    {
      "epoch": 14.69,
      "learning_rate": 0.001,
      "loss": 2.652,
      "step": 76500
    },
    {
      "epoch": 14.69,
      "learning_rate": 0.001,
      "loss": 2.6619,
      "step": 76512
    },
    {
      "epoch": 14.69,
      "learning_rate": 0.001,
      "loss": 2.6581,
      "step": 76524
    },
    {
      "epoch": 14.7,
      "learning_rate": 0.001,
      "loss": 2.6459,
      "step": 76536
    },
    {
      "epoch": 14.7,
      "learning_rate": 0.001,
      "loss": 2.6552,
      "step": 76548
    },
    {
      "epoch": 14.7,
      "learning_rate": 0.001,
      "loss": 2.6349,
      "step": 76560
    },
    {
      "epoch": 14.7,
      "learning_rate": 0.001,
      "loss": 2.6465,
      "step": 76572
    },
    {
      "epoch": 14.71,
      "learning_rate": 0.001,
      "loss": 2.6597,
      "step": 76584
    },
    {
      "epoch": 14.71,
      "learning_rate": 0.001,
      "loss": 2.6516,
      "step": 76596
    },
    {
      "epoch": 14.71,
      "learning_rate": 0.001,
      "loss": 2.6463,
      "step": 76608
    },
    {
      "epoch": 14.71,
      "learning_rate": 0.001,
      "loss": 2.6557,
      "step": 76620
    },
    {
      "epoch": 14.71,
      "learning_rate": 0.001,
      "loss": 2.6498,
      "step": 76632
    },
    {
      "epoch": 14.72,
      "learning_rate": 0.001,
      "loss": 2.6528,
      "step": 76644
    },
    {
      "epoch": 14.72,
      "learning_rate": 0.001,
      "loss": 2.6518,
      "step": 76656
    },
    {
      "epoch": 14.72,
      "learning_rate": 0.001,
      "loss": 2.651,
      "step": 76668
    },
    {
      "epoch": 14.72,
      "learning_rate": 0.001,
      "loss": 2.6452,
      "step": 76680
    },
    {
      "epoch": 14.73,
      "learning_rate": 0.001,
      "loss": 2.6497,
      "step": 76692
    },
    {
      "epoch": 14.73,
      "learning_rate": 0.001,
      "loss": 2.6517,
      "step": 76704
    },
    {
      "epoch": 14.73,
      "learning_rate": 0.001,
      "loss": 2.6475,
      "step": 76716
    },
    {
      "epoch": 14.73,
      "learning_rate": 0.001,
      "loss": 2.656,
      "step": 76728
    },
    {
      "epoch": 14.74,
      "learning_rate": 0.001,
      "loss": 2.6577,
      "step": 76740
    },
    {
      "epoch": 14.74,
      "learning_rate": 0.001,
      "loss": 2.6475,
      "step": 76752
    },
    {
      "epoch": 14.74,
      "learning_rate": 0.001,
      "loss": 2.6487,
      "step": 76764
    },
    {
      "epoch": 14.74,
      "learning_rate": 0.001,
      "loss": 2.6519,
      "step": 76776
    },
    {
      "epoch": 14.74,
      "learning_rate": 0.001,
      "loss": 2.6435,
      "step": 76788
    },
    {
      "epoch": 14.75,
      "learning_rate": 0.001,
      "loss": 2.6495,
      "step": 76800
    },
    {
      "epoch": 14.75,
      "learning_rate": 0.001,
      "loss": 2.6614,
      "step": 76812
    },
    {
      "epoch": 14.75,
      "learning_rate": 0.001,
      "loss": 2.6481,
      "step": 76824
    },
    {
      "epoch": 14.75,
      "learning_rate": 0.001,
      "loss": 2.6414,
      "step": 76836
    },
    {
      "epoch": 14.76,
      "learning_rate": 0.001,
      "loss": 2.6538,
      "step": 76848
    },
    {
      "epoch": 14.76,
      "learning_rate": 0.001,
      "loss": 2.6534,
      "step": 76860
    },
    {
      "epoch": 14.76,
      "learning_rate": 0.001,
      "loss": 2.6436,
      "step": 76872
    },
    {
      "epoch": 14.76,
      "eval_ag_news_accuracy": 0.31259375,
      "eval_ag_news_bleu_score": 4.414810398139664,
      "eval_ag_news_bleu_score_sem": 0.14185250923534423,
      "eval_ag_news_emb_cos_sim": 0.7913942337036133,
      "eval_ag_news_emb_cos_sim_sem": 0.00782191705275891,
      "eval_ag_news_emb_top1_equal": 0.1953125,
      "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6608340740203857,
      "eval_ag_news_n_ngrams_match_1": 13.346,
      "eval_ag_news_n_ngrams_match_2": 2.844,
      "eval_ag_news_n_ngrams_match_3": 0.776,
      "eval_ag_news_num_pred_words": 46.44,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.89376962909697,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3278349454089131,
      "eval_ag_news_runtime": 10.3588,
      "eval_ag_news_samples_per_second": 48.268,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3350232407183741,
      "eval_ag_news_token_set_f1_sem": 0.004567995271353797,
      "eval_ag_news_token_set_precision": 0.3177480062232345,
      "eval_ag_news_token_set_recall": 0.37137679807932966,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 76875
    },
    {
      "epoch": 14.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.1105,
      "eval_anthropic_toxic_prompts_bleu_score": 2.940088966186127,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1148997731793882,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6606615781784058,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009694190040545655,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3106913566589355,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.99,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.762,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.676,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.40406490695189,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.96875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20596362120051115,
      "eval_anthropic_toxic_prompts_runtime": 9.8168,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.933,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35098327837978505,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006311093764356088,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42332946711302244,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3259385185758552,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 76875
    },
    {
      "epoch": 14.76,
      "eval_arxiv_accuracy": 0.33603125,
      "eval_arxiv_bleu_score": 4.206553556357853,
      "eval_arxiv_bleu_score_sem": 0.12502509090726233,
      "eval_arxiv_emb_cos_sim": 0.7460159659385681,
      "eval_arxiv_emb_cos_sim_sem": 0.006932129250609615,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.51404070854187,
      "eval_arxiv_n_ngrams_match_1": 14.506,
      "eval_arxiv_n_ngrams_match_2": 2.766,
      "eval_arxiv_n_ngrams_match_3": 0.644,
      "eval_arxiv_num_pred_words": 41.598,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.583695913825984,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34226806225136647,
      "eval_arxiv_runtime": 10.3506,
      "eval_arxiv_samples_per_second": 48.306,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.33652455369117645,
      "eval_arxiv_token_set_f1_sem": 0.004275165082746081,
      "eval_arxiv_token_set_precision": 0.2867741291553252,
      "eval_arxiv_token_set_recall": 0.42542004999299515,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 76875
    },
    {
      "epoch": 14.76,
      "eval_python_code_alpaca_accuracy": 0.15621875,
      "eval_python_code_alpaca_bleu_score": 4.233694138185224,
      "eval_python_code_alpaca_bleu_score_sem": 0.1312488718986595,
      "eval_python_code_alpaca_emb_cos_sim": 0.7357479929924011,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008838668284780225,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9835448265075684,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.292,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.654,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.844,
      "eval_python_code_alpaca_num_pred_words": 43.502,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.75773038190511,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.30978098382559377,
      "eval_python_code_alpaca_runtime": 10.0546,
      "eval_python_code_alpaca_samples_per_second": 49.729,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.45611797939762494,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00574584353012143,
      "eval_python_code_alpaca_token_set_precision": 0.5062434901685975,
      "eval_python_code_alpaca_token_set_recall": 0.4386257049856014,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 76875
    },
    {
      "epoch": 14.76,
      "eval_wikibio_accuracy": 0.311625,
      "eval_wikibio_bleu_score": 5.285030508385922,
      "eval_wikibio_bleu_score_sem": 0.18725741610015192,
      "eval_wikibio_emb_cos_sim": 0.735741138458252,
      "eval_wikibio_emb_cos_sim_sem": 0.008572901453327163,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8608815670013428,
      "eval_wikibio_n_ngrams_match_1": 9.558,
      "eval_wikibio_n_ngrams_match_2": 3.09,
      "eval_wikibio_n_ngrams_match_3": 1.058,
      "eval_wikibio_num_pred_words": 35.896,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.507213705843746,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33430718083541094,
      "eval_wikibio_runtime": 10.4937,
      "eval_wikibio_samples_per_second": 47.648,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.30519041115826034,
      "eval_wikibio_token_set_f1_sem": 0.0055618153528707,
      "eval_wikibio_token_set_precision": 0.3107292305233083,
      "eval_wikibio_token_set_recall": 0.3172764445780841,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 76875
    },
    {
      "epoch": 14.76,
      "eval_nq_accuracy": 0.5150625,
      "eval_nq_bleu_score": 10.942025608957334,
      "eval_nq_bleu_score_sem": 0.4837637827437225,
      "eval_nq_emb_cos_sim": 0.8182423710823059,
      "eval_nq_emb_cos_sim_sem": 0.007722426124126405,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2796990871429443,
      "eval_nq_n_ngrams_match_1": 22.468,
      "eval_nq_n_ngrams_match_2": 7.99,
      "eval_nq_n_ngrams_match_3": 3.58,
      "eval_nq_num_pred_words": 48.976,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.773738923282064,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43323899516804787,
      "eval_nq_runtime": 10.9154,
      "eval_nq_samples_per_second": 45.807,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.45092714167921305,
      "eval_nq_token_set_f1_sem": 0.004983012150980966,
      "eval_nq_token_set_precision": 0.40656812132890785,
      "eval_nq_token_set_recall": 0.5169567217580451,
      "eval_nq_true_num_tokens": 64.0,
      "step": 76875
    },
    {
      "epoch": 14.76,
      "learning_rate": 0.001,
      "loss": 2.6433,
      "step": 76884
    },
    {
      "epoch": 14.76,
      "learning_rate": 0.001,
      "loss": 2.6611,
      "step": 76896
    },
    {
      "epoch": 14.77,
      "learning_rate": 0.001,
      "loss": 2.65,
      "step": 76908
    },
    {
      "epoch": 14.77,
      "learning_rate": 0.001,
      "loss": 2.6356,
      "step": 76920
    },
    {
      "epoch": 14.77,
      "learning_rate": 0.001,
      "loss": 2.6575,
      "step": 76932
    },
    {
      "epoch": 14.77,
      "learning_rate": 0.001,
      "loss": 2.6613,
      "step": 76944
    },
    {
      "epoch": 14.78,
      "learning_rate": 0.001,
      "loss": 2.6567,
      "step": 76956
    },
    {
      "epoch": 14.78,
      "learning_rate": 0.001,
      "loss": 2.6579,
      "step": 76968
    },
    {
      "epoch": 14.78,
      "learning_rate": 0.001,
      "loss": 2.6578,
      "step": 76980
    },
    {
      "epoch": 14.78,
      "learning_rate": 0.001,
      "loss": 2.647,
      "step": 76992
    },
    {
      "epoch": 14.79,
      "learning_rate": 0.001,
      "loss": 2.6568,
      "step": 77004
    },
    {
      "epoch": 14.79,
      "learning_rate": 0.001,
      "loss": 2.6512,
      "step": 77016
    },
    {
      "epoch": 14.79,
      "learning_rate": 0.001,
      "loss": 2.6578,
      "step": 77028
    },
    {
      "epoch": 14.79,
      "learning_rate": 0.001,
      "loss": 2.651,
      "step": 77040
    },
    {
      "epoch": 14.79,
      "learning_rate": 0.001,
      "loss": 2.6508,
      "step": 77052
    },
    {
      "epoch": 14.8,
      "learning_rate": 0.001,
      "loss": 2.6486,
      "step": 77064
    },
    {
      "epoch": 14.8,
      "learning_rate": 0.001,
      "loss": 2.6487,
      "step": 77076
    },
    {
      "epoch": 14.8,
      "learning_rate": 0.001,
      "loss": 2.6531,
      "step": 77088
    },
    {
      "epoch": 14.8,
      "learning_rate": 0.001,
      "loss": 2.647,
      "step": 77100
    },
    {
      "epoch": 14.81,
      "learning_rate": 0.001,
      "loss": 2.6501,
      "step": 77112
    },
    {
      "epoch": 14.81,
      "learning_rate": 0.001,
      "loss": 2.6495,
      "step": 77124
    },
    {
      "epoch": 14.81,
      "learning_rate": 0.001,
      "loss": 2.6586,
      "step": 77136
    },
    {
      "epoch": 14.81,
      "learning_rate": 0.001,
      "loss": 2.6559,
      "step": 77148
    },
    {
      "epoch": 14.82,
      "learning_rate": 0.001,
      "loss": 2.6559,
      "step": 77160
    },
    {
      "epoch": 14.82,
      "learning_rate": 0.001,
      "loss": 2.6496,
      "step": 77172
    },
    {
      "epoch": 14.82,
      "learning_rate": 0.001,
      "loss": 2.6541,
      "step": 77184
    },
    {
      "epoch": 14.82,
      "learning_rate": 0.001,
      "loss": 2.6519,
      "step": 77196
    },
    {
      "epoch": 14.82,
      "learning_rate": 0.001,
      "loss": 2.6363,
      "step": 77208
    },
    {
      "epoch": 14.83,
      "learning_rate": 0.001,
      "loss": 2.65,
      "step": 77220
    },
    {
      "epoch": 14.83,
      "learning_rate": 0.001,
      "loss": 2.656,
      "step": 77232
    },
    {
      "epoch": 14.83,
      "learning_rate": 0.001,
      "loss": 2.6464,
      "step": 77244
    },
    {
      "epoch": 14.83,
      "learning_rate": 0.001,
      "loss": 2.643,
      "step": 77256
    },
    {
      "epoch": 14.84,
      "learning_rate": 0.001,
      "loss": 2.6504,
      "step": 77268
    },
    {
      "epoch": 14.84,
      "learning_rate": 0.001,
      "loss": 2.6555,
      "step": 77280
    },
    {
      "epoch": 14.84,
      "learning_rate": 0.001,
      "loss": 2.6524,
      "step": 77292
    },
    {
      "epoch": 14.84,
      "learning_rate": 0.001,
      "loss": 2.6602,
      "step": 77304
    },
    {
      "epoch": 14.85,
      "learning_rate": 0.001,
      "loss": 2.6592,
      "step": 77316
    },
    {
      "epoch": 14.85,
      "learning_rate": 0.001,
      "loss": 2.649,
      "step": 77328
    },
    {
      "epoch": 14.85,
      "learning_rate": 0.001,
      "loss": 2.663,
      "step": 77340
    },
    {
      "epoch": 14.85,
      "learning_rate": 0.001,
      "loss": 2.6507,
      "step": 77352
    },
    {
      "epoch": 14.85,
      "learning_rate": 0.001,
      "loss": 2.6583,
      "step": 77364
    },
    {
      "epoch": 14.86,
      "learning_rate": 0.001,
      "loss": 2.6597,
      "step": 77376
    },
    {
      "epoch": 14.86,
      "learning_rate": 0.001,
      "loss": 2.6546,
      "step": 77388
    },
    {
      "epoch": 14.86,
      "learning_rate": 0.001,
      "loss": 2.6498,
      "step": 77400
    },
    {
      "epoch": 14.86,
      "learning_rate": 0.001,
      "loss": 2.643,
      "step": 77412
    },
    {
      "epoch": 14.87,
      "learning_rate": 0.001,
      "loss": 2.648,
      "step": 77424
    },
    {
      "epoch": 14.87,
      "learning_rate": 0.001,
      "loss": 2.6445,
      "step": 77436
    },
    {
      "epoch": 14.87,
      "learning_rate": 0.001,
      "loss": 2.6494,
      "step": 77448
    },
    {
      "epoch": 14.87,
      "learning_rate": 0.001,
      "loss": 2.6477,
      "step": 77460
    },
    {
      "epoch": 14.88,
      "learning_rate": 0.001,
      "loss": 2.6593,
      "step": 77472
    },
    {
      "epoch": 14.88,
      "learning_rate": 0.001,
      "loss": 2.6575,
      "step": 77484
    },
    {
      "epoch": 14.88,
      "learning_rate": 0.001,
      "loss": 2.6423,
      "step": 77496
    },
    {
      "epoch": 14.88,
      "eval_ag_news_accuracy": 0.31459375,
      "eval_ag_news_bleu_score": 4.631032270171614,
      "eval_ag_news_bleu_score_sem": 0.15999853452808432,
      "eval_ag_news_emb_cos_sim": 0.7956146001815796,
      "eval_ag_news_emb_cos_sim_sem": 0.0075687771537019085,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6569697856903076,
      "eval_ag_news_n_ngrams_match_1": 13.458,
      "eval_ag_news_n_ngrams_match_2": 2.888,
      "eval_ag_news_n_ngrams_match_3": 0.836,
      "eval_ag_news_num_pred_words": 46.48,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.743762910280346,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3311104250094676,
      "eval_ag_news_runtime": 10.4358,
      "eval_ag_news_samples_per_second": 47.912,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.33834260989411025,
      "eval_ag_news_token_set_f1_sem": 0.00452232620321744,
      "eval_ag_news_token_set_precision": 0.32190911245664305,
      "eval_ag_news_token_set_recall": 0.37446189049247963,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 77500
    },
    {
      "epoch": 14.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.10840625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.925343675211889,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10653985519582482,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6549015045166016,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009578090619421826,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.340261697769165,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.076,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.834,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.626,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.494,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.22651255430314,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2062731814551379,
      "eval_anthropic_toxic_prompts_runtime": 9.8997,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.507,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35468858502280093,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006479771315227887,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4253265250122434,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33424321970252674,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 77500
    },
    {
      "epoch": 14.88,
      "eval_arxiv_accuracy": 0.33821875,
      "eval_arxiv_bleu_score": 4.08397045862728,
      "eval_arxiv_bleu_score_sem": 0.11767461607181948,
      "eval_arxiv_emb_cos_sim": 0.7391788363456726,
      "eval_arxiv_emb_cos_sim_sem": 0.008188753344030112,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5042643547058105,
      "eval_arxiv_n_ngrams_match_1": 14.242,
      "eval_arxiv_n_ngrams_match_2": 2.708,
      "eval_arxiv_n_ngrams_match_3": 0.616,
      "eval_arxiv_num_pred_words": 39.75,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.25696951816805,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.33851789306031654,
      "eval_arxiv_runtime": 10.2365,
      "eval_arxiv_samples_per_second": 48.845,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.33304753308185286,
      "eval_arxiv_token_set_f1_sem": 0.004526957743319787,
      "eval_arxiv_token_set_precision": 0.28434613363972866,
      "eval_arxiv_token_set_recall": 0.42762750730347915,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 77500
    },
    {
      "epoch": 14.88,
      "eval_python_code_alpaca_accuracy": 0.1551875,
      "eval_python_code_alpaca_bleu_score": 4.339701917200884,
      "eval_python_code_alpaca_bleu_score_sem": 0.13338780938360195,
      "eval_python_code_alpaca_emb_cos_sim": 0.7565209865570068,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007302152897299453,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0038363933563232,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.646,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.77,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.886,
      "eval_python_code_alpaca_num_pred_words": 44.646,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.162740941399093,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31889391162809305,
      "eval_python_code_alpaca_runtime": 10.5768,
      "eval_python_code_alpaca_samples_per_second": 47.273,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.465792894667353,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005529029250145019,
      "eval_python_code_alpaca_token_set_precision": 0.5262871313726274,
      "eval_python_code_alpaca_token_set_recall": 0.43755778015183144,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 77500
    },
    {
      "epoch": 14.88,
      "eval_wikibio_accuracy": 0.31709375,
      "eval_wikibio_bleu_score": 5.456056450368803,
      "eval_wikibio_bleu_score_sem": 0.1970489952827261,
      "eval_wikibio_emb_cos_sim": 0.7289547920227051,
      "eval_wikibio_emb_cos_sim_sem": 0.009284352768536706,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.841438055038452,
      "eval_wikibio_n_ngrams_match_1": 9.774,
      "eval_wikibio_n_ngrams_match_2": 3.138,
      "eval_wikibio_n_ngrams_match_3": 1.09,
      "eval_wikibio_num_pred_words": 36.05,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.592428763186305,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3349919510965307,
      "eval_wikibio_runtime": 11.1729,
      "eval_wikibio_samples_per_second": 44.751,
      "eval_wikibio_steps_per_second": 0.09,
      "eval_wikibio_token_set_f1": 0.3072632426149612,
      "eval_wikibio_token_set_f1_sem": 0.005599271608403875,
      "eval_wikibio_token_set_precision": 0.31601679972051183,
      "eval_wikibio_token_set_recall": 0.3179809555417617,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 77500
    },
    {
      "epoch": 14.88,
      "eval_nq_accuracy": 0.51453125,
      "eval_nq_bleu_score": 11.130354698405046,
      "eval_nq_bleu_score_sem": 0.46762484990152287,
      "eval_nq_emb_cos_sim": 0.817885160446167,
      "eval_nq_emb_cos_sim_sem": 0.007382511132341537,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.277179479598999,
      "eval_nq_n_ngrams_match_1": 22.494,
      "eval_nq_n_ngrams_match_2": 8.062,
      "eval_nq_n_ngrams_match_3": 3.696,
      "eval_nq_num_pred_words": 49.206,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.749143934829249,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43142035015214286,
      "eval_nq_runtime": 11.216,
      "eval_nq_samples_per_second": 44.579,
      "eval_nq_steps_per_second": 0.089,
      "eval_nq_token_set_f1": 0.4491553393750031,
      "eval_nq_token_set_f1_sem": 0.005120027827010909,
      "eval_nq_token_set_precision": 0.40571971347760305,
      "eval_nq_token_set_recall": 0.5125443471047076,
      "eval_nq_true_num_tokens": 64.0,
      "step": 77500
    },
    {
      "epoch": 14.88,
      "learning_rate": 0.001,
      "loss": 2.6544,
      "step": 77508
    },
    {
      "epoch": 14.88,
      "learning_rate": 0.001,
      "loss": 2.6474,
      "step": 77520
    },
    {
      "epoch": 14.89,
      "learning_rate": 0.001,
      "loss": 2.6489,
      "step": 77532
    },
    {
      "epoch": 14.89,
      "learning_rate": 0.001,
      "loss": 2.6515,
      "step": 77544
    },
    {
      "epoch": 14.89,
      "learning_rate": 0.001,
      "loss": 2.6572,
      "step": 77556
    },
    {
      "epoch": 14.89,
      "learning_rate": 0.001,
      "loss": 2.6424,
      "step": 77568
    },
    {
      "epoch": 14.9,
      "learning_rate": 0.001,
      "loss": 2.6501,
      "step": 77580
    },
    {
      "epoch": 14.9,
      "learning_rate": 0.001,
      "loss": 2.6449,
      "step": 77592
    },
    {
      "epoch": 14.9,
      "learning_rate": 0.001,
      "loss": 2.6565,
      "step": 77604
    },
    {
      "epoch": 14.9,
      "learning_rate": 0.001,
      "loss": 2.6485,
      "step": 77616
    },
    {
      "epoch": 14.91,
      "learning_rate": 0.001,
      "loss": 2.6585,
      "step": 77628
    },
    {
      "epoch": 14.91,
      "learning_rate": 0.001,
      "loss": 2.6471,
      "step": 77640
    },
    {
      "epoch": 14.91,
      "learning_rate": 0.001,
      "loss": 2.6495,
      "step": 77652
    },
    {
      "epoch": 14.91,
      "learning_rate": 0.001,
      "loss": 2.6482,
      "step": 77664
    },
    {
      "epoch": 14.91,
      "learning_rate": 0.001,
      "loss": 2.654,
      "step": 77676
    },
    {
      "epoch": 14.92,
      "learning_rate": 0.001,
      "loss": 2.6467,
      "step": 77688
    },
    {
      "epoch": 14.92,
      "learning_rate": 0.001,
      "loss": 2.6516,
      "step": 77700
    },
    {
      "epoch": 14.92,
      "learning_rate": 0.001,
      "loss": 2.6523,
      "step": 77712
    },
    {
      "epoch": 14.92,
      "learning_rate": 0.001,
      "loss": 2.6526,
      "step": 77724
    },
    {
      "epoch": 14.93,
      "learning_rate": 0.001,
      "loss": 2.6512,
      "step": 77736
    },
    {
      "epoch": 14.93,
      "learning_rate": 0.001,
      "loss": 2.6571,
      "step": 77748
    },
    {
      "epoch": 14.93,
      "learning_rate": 0.001,
      "loss": 2.6638,
      "step": 77760
    },
    {
      "epoch": 14.93,
      "learning_rate": 0.001,
      "loss": 2.6594,
      "step": 77772
    },
    {
      "epoch": 14.94,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 77784
    },
    {
      "epoch": 14.94,
      "learning_rate": 0.001,
      "loss": 2.6492,
      "step": 77796
    },
    {
      "epoch": 14.94,
      "learning_rate": 0.001,
      "loss": 2.6453,
      "step": 77808
    },
    {
      "epoch": 14.94,
      "learning_rate": 0.001,
      "loss": 2.6546,
      "step": 77820
    },
    {
      "epoch": 14.94,
      "learning_rate": 0.001,
      "loss": 2.6391,
      "step": 77832
    },
    {
      "epoch": 14.95,
      "learning_rate": 0.001,
      "loss": 2.6442,
      "step": 77844
    },
    {
      "epoch": 14.95,
      "learning_rate": 0.001,
      "loss": 2.6521,
      "step": 77856
    },
    {
      "epoch": 14.95,
      "learning_rate": 0.001,
      "loss": 2.6554,
      "step": 77868
    },
    {
      "epoch": 14.95,
      "learning_rate": 0.001,
      "loss": 2.6605,
      "step": 77880
    },
    {
      "epoch": 14.96,
      "learning_rate": 0.001,
      "loss": 2.6485,
      "step": 77892
    },
    {
      "epoch": 14.96,
      "learning_rate": 0.001,
      "loss": 2.6481,
      "step": 77904
    },
    {
      "epoch": 14.96,
      "learning_rate": 0.001,
      "loss": 2.6526,
      "step": 77916
    },
    {
      "epoch": 14.96,
      "learning_rate": 0.001,
      "loss": 2.6386,
      "step": 77928
    },
    {
      "epoch": 14.97,
      "learning_rate": 0.001,
      "loss": 2.6473,
      "step": 77940
    },
    {
      "epoch": 14.97,
      "learning_rate": 0.001,
      "loss": 2.6386,
      "step": 77952
    },
    {
      "epoch": 14.97,
      "learning_rate": 0.001,
      "loss": 2.6387,
      "step": 77964
    },
    {
      "epoch": 14.97,
      "learning_rate": 0.001,
      "loss": 2.6499,
      "step": 77976
    },
    {
      "epoch": 14.97,
      "learning_rate": 0.001,
      "loss": 2.6601,
      "step": 77988
    },
    {
      "epoch": 14.98,
      "learning_rate": 0.001,
      "loss": 2.6469,
      "step": 78000
    },
    {
      "epoch": 14.98,
      "learning_rate": 0.001,
      "loss": 2.6496,
      "step": 78012
    },
    {
      "epoch": 14.98,
      "learning_rate": 0.001,
      "loss": 2.6581,
      "step": 78024
    },
    {
      "epoch": 14.98,
      "learning_rate": 0.001,
      "loss": 2.6506,
      "step": 78036
    },
    {
      "epoch": 14.99,
      "learning_rate": 0.001,
      "loss": 2.6489,
      "step": 78048
    },
    {
      "epoch": 14.99,
      "learning_rate": 0.001,
      "loss": 2.6492,
      "step": 78060
    },
    {
      "epoch": 14.99,
      "learning_rate": 0.001,
      "loss": 2.6506,
      "step": 78072
    },
    {
      "epoch": 14.99,
      "learning_rate": 0.001,
      "loss": 2.6435,
      "step": 78084
    },
    {
      "epoch": 15.0,
      "learning_rate": 0.001,
      "loss": 2.6378,
      "step": 78096
    },
    {
      "epoch": 15.0,
      "learning_rate": 0.001,
      "loss": 2.6553,
      "step": 78108
    },
    {
      "epoch": 15.0,
      "learning_rate": 0.001,
      "loss": 2.6509,
      "step": 78120
    },
    {
      "epoch": 15.0,
      "eval_ag_news_accuracy": 0.31146875,
      "eval_ag_news_bleu_score": 4.55687907341995,
      "eval_ag_news_bleu_score_sem": 0.14168826307002097,
      "eval_ag_news_emb_cos_sim": 0.8001970052719116,
      "eval_ag_news_emb_cos_sim_sem": 0.006763832933238567,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6746983528137207,
      "eval_ag_news_n_ngrams_match_1": 13.494,
      "eval_ag_news_n_ngrams_match_2": 2.87,
      "eval_ag_news_n_ngrams_match_3": 0.83,
      "eval_ag_news_num_pred_words": 46.804,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.43675907541347,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.331305410227578,
      "eval_ag_news_runtime": 10.2311,
      "eval_ag_news_samples_per_second": 48.87,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3361638944316805,
      "eval_ag_news_token_set_f1_sem": 0.004494118201335518,
      "eval_ag_news_token_set_precision": 0.3196571701029243,
      "eval_ag_news_token_set_recall": 0.3692811452088858,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 78125
    },
    {
      "epoch": 15.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.11009375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.991854800804824,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11648455777747162,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6514008641242981,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009158809819168405,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3317744731903076,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.946,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.796,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.644,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.222,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.987961553097133,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20275988453763516,
      "eval_anthropic_toxic_prompts_runtime": 11.1928,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.672,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34848142850950203,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064726623770099,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4191481559501271,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3288319094647738,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 78125
    },
    {
      "epoch": 15.0,
      "eval_arxiv_accuracy": 0.33540625,
      "eval_arxiv_bleu_score": 4.093313340945823,
      "eval_arxiv_bleu_score_sem": 0.11741439357679818,
      "eval_arxiv_emb_cos_sim": 0.7404592037200928,
      "eval_arxiv_emb_cos_sim_sem": 0.007134437416184446,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5302891731262207,
      "eval_arxiv_n_ngrams_match_1": 14.54,
      "eval_arxiv_n_ngrams_match_2": 2.79,
      "eval_arxiv_n_ngrams_match_3": 0.58,
      "eval_arxiv_num_pred_words": 40.86,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 34.13383677602791,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3422752678386841,
      "eval_arxiv_runtime": 14.9195,
      "eval_arxiv_samples_per_second": 33.513,
      "eval_arxiv_steps_per_second": 0.067,
      "eval_arxiv_token_set_f1": 0.3377657060348677,
      "eval_arxiv_token_set_f1_sem": 0.004207855857039247,
      "eval_arxiv_token_set_precision": 0.2888183116966359,
      "eval_arxiv_token_set_recall": 0.42247830133496517,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 78125
    },
    {
      "epoch": 15.0,
      "eval_python_code_alpaca_accuracy": 0.1550625,
      "eval_python_code_alpaca_bleu_score": 4.390590995116049,
      "eval_python_code_alpaca_bleu_score_sem": 0.14043490806783718,
      "eval_python_code_alpaca_emb_cos_sim": 0.7426967024803162,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010412652220587182,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9705452919006348,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.584,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.8,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.93,
      "eval_python_code_alpaca_num_pred_words": 43.734,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.50255128033689,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31723250056380503,
      "eval_python_code_alpaca_runtime": 9.7869,
      "eval_python_code_alpaca_samples_per_second": 51.089,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.46034348301184796,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005977222950853551,
      "eval_python_code_alpaca_token_set_precision": 0.5192194511962053,
      "eval_python_code_alpaca_token_set_recall": 0.4391917466066157,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 78125
    },
    {
      "epoch": 15.0,
      "eval_wikibio_accuracy": 0.31534375,
      "eval_wikibio_bleu_score": 5.430132314342204,
      "eval_wikibio_bleu_score_sem": 0.18900363820296573,
      "eval_wikibio_emb_cos_sim": 0.7391431927680969,
      "eval_wikibio_emb_cos_sim_sem": 0.010009163976179694,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8963725566864014,
      "eval_wikibio_n_ngrams_match_1": 9.66,
      "eval_wikibio_n_ngrams_match_2": 3.18,
      "eval_wikibio_n_ngrams_match_3": 1.086,
      "eval_wikibio_num_pred_words": 36.132,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 49.22356915643153,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3387682831770781,
      "eval_wikibio_runtime": 10.0595,
      "eval_wikibio_samples_per_second": 49.704,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3051529292868866,
      "eval_wikibio_token_set_f1_sem": 0.005453279865197709,
      "eval_wikibio_token_set_precision": 0.3148738837335179,
      "eval_wikibio_token_set_recall": 0.3126155424927755,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 78125
    },
    {
      "epoch": 15.0,
      "eval_nq_accuracy": 0.5146875,
      "eval_nq_bleu_score": 11.36397842627679,
      "eval_nq_bleu_score_sem": 0.48049478981311194,
      "eval_nq_emb_cos_sim": 0.8131399154663086,
      "eval_nq_emb_cos_sim_sem": 0.007624440628239148,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.277000665664673,
      "eval_nq_n_ngrams_match_1": 22.468,
      "eval_nq_n_ngrams_match_2": 8.19,
      "eval_nq_n_ngrams_match_3": 3.816,
      "eval_nq_num_pred_words": 49.034,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.747400807898286,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43164996757113144,
      "eval_nq_runtime": 11.625,
      "eval_nq_samples_per_second": 43.011,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4488524102037199,
      "eval_nq_token_set_f1_sem": 0.005137918477550579,
      "eval_nq_token_set_precision": 0.4047128036308987,
      "eval_nq_token_set_recall": 0.5131301020274545,
      "eval_nq_true_num_tokens": 64.0,
      "step": 78125
    },
    {
      "epoch": 15.0,
      "learning_rate": 0.001,
      "loss": 2.63,
      "step": 78132
    },
    {
      "epoch": 15.0,
      "learning_rate": 0.001,
      "loss": 2.6363,
      "step": 78144
    },
    {
      "epoch": 15.01,
      "learning_rate": 0.001,
      "loss": 2.625,
      "step": 78156
    },
    {
      "epoch": 15.01,
      "learning_rate": 0.001,
      "loss": 2.6282,
      "step": 78168
    },
    {
      "epoch": 15.01,
      "learning_rate": 0.001,
      "loss": 2.6378,
      "step": 78180
    },
    {
      "epoch": 15.01,
      "learning_rate": 0.001,
      "loss": 2.6229,
      "step": 78192
    },
    {
      "epoch": 15.02,
      "learning_rate": 0.001,
      "loss": 2.6198,
      "step": 78204
    },
    {
      "epoch": 15.02,
      "learning_rate": 0.001,
      "loss": 2.632,
      "step": 78216
    },
    {
      "epoch": 15.02,
      "learning_rate": 0.001,
      "loss": 2.6313,
      "step": 78228
    },
    {
      "epoch": 15.02,
      "learning_rate": 0.001,
      "loss": 2.6262,
      "step": 78240
    },
    {
      "epoch": 15.03,
      "learning_rate": 0.001,
      "loss": 2.6321,
      "step": 78252
    },
    {
      "epoch": 15.03,
      "learning_rate": 0.001,
      "loss": 2.6267,
      "step": 78264
    },
    {
      "epoch": 15.03,
      "learning_rate": 0.001,
      "loss": 2.639,
      "step": 78276
    },
    {
      "epoch": 15.03,
      "learning_rate": 0.001,
      "loss": 2.631,
      "step": 78288
    },
    {
      "epoch": 15.03,
      "learning_rate": 0.001,
      "loss": 2.6309,
      "step": 78300
    },
    {
      "epoch": 15.04,
      "learning_rate": 0.001,
      "loss": 2.6395,
      "step": 78312
    },
    {
      "epoch": 15.04,
      "learning_rate": 0.001,
      "loss": 2.6282,
      "step": 78324
    },
    {
      "epoch": 15.04,
      "learning_rate": 0.001,
      "loss": 2.6414,
      "step": 78336
    },
    {
      "epoch": 15.04,
      "learning_rate": 0.001,
      "loss": 2.6299,
      "step": 78348
    },
    {
      "epoch": 15.05,
      "learning_rate": 0.001,
      "loss": 2.6312,
      "step": 78360
    },
    {
      "epoch": 15.05,
      "learning_rate": 0.001,
      "loss": 2.6258,
      "step": 78372
    },
    {
      "epoch": 15.05,
      "learning_rate": 0.001,
      "loss": 2.6253,
      "step": 78384
    },
    {
      "epoch": 15.05,
      "learning_rate": 0.001,
      "loss": 2.6282,
      "step": 78396
    },
    {
      "epoch": 15.06,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 78408
    },
    {
      "epoch": 15.06,
      "learning_rate": 0.001,
      "loss": 2.6282,
      "step": 78420
    },
    {
      "epoch": 15.06,
      "learning_rate": 0.001,
      "loss": 2.6318,
      "step": 78432
    },
    {
      "epoch": 15.06,
      "learning_rate": 0.001,
      "loss": 2.6316,
      "step": 78444
    },
    {
      "epoch": 15.06,
      "learning_rate": 0.001,
      "loss": 2.6324,
      "step": 78456
    },
    {
      "epoch": 15.07,
      "learning_rate": 0.001,
      "loss": 2.6249,
      "step": 78468
    },
    {
      "epoch": 15.07,
      "learning_rate": 0.001,
      "loss": 2.6359,
      "step": 78480
    },
    {
      "epoch": 15.07,
      "learning_rate": 0.001,
      "loss": 2.6339,
      "step": 78492
    },
    {
      "epoch": 15.07,
      "learning_rate": 0.001,
      "loss": 2.6299,
      "step": 78504
    },
    {
      "epoch": 15.08,
      "learning_rate": 0.001,
      "loss": 2.6251,
      "step": 78516
    },
    {
      "epoch": 15.08,
      "learning_rate": 0.001,
      "loss": 2.6223,
      "step": 78528
    },
    {
      "epoch": 15.08,
      "learning_rate": 0.001,
      "loss": 2.6345,
      "step": 78540
    },
    {
      "epoch": 15.08,
      "learning_rate": 0.001,
      "loss": 2.6406,
      "step": 78552
    },
    {
      "epoch": 15.09,
      "learning_rate": 0.001,
      "loss": 2.6281,
      "step": 78564
    },
    {
      "epoch": 15.09,
      "learning_rate": 0.001,
      "loss": 2.6356,
      "step": 78576
    },
    {
      "epoch": 15.09,
      "learning_rate": 0.001,
      "loss": 2.6409,
      "step": 78588
    },
    {
      "epoch": 15.09,
      "learning_rate": 0.001,
      "loss": 2.6249,
      "step": 78600
    },
    {
      "epoch": 15.09,
      "learning_rate": 0.001,
      "loss": 2.6245,
      "step": 78612
    },
    {
      "epoch": 15.1,
      "learning_rate": 0.001,
      "loss": 2.6323,
      "step": 78624
    },
    {
      "epoch": 15.1,
      "learning_rate": 0.001,
      "loss": 2.6278,
      "step": 78636
    },
    {
      "epoch": 15.1,
      "learning_rate": 0.001,
      "loss": 2.6244,
      "step": 78648
    },
    {
      "epoch": 15.1,
      "learning_rate": 0.001,
      "loss": 2.6307,
      "step": 78660
    },
    {
      "epoch": 15.11,
      "learning_rate": 0.001,
      "loss": 2.6419,
      "step": 78672
    },
    {
      "epoch": 15.11,
      "learning_rate": 0.001,
      "loss": 2.6298,
      "step": 78684
    },
    {
      "epoch": 15.11,
      "learning_rate": 0.001,
      "loss": 2.6327,
      "step": 78696
    },
    {
      "epoch": 15.11,
      "learning_rate": 0.001,
      "loss": 2.6361,
      "step": 78708
    },
    {
      "epoch": 15.12,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 78720
    },
    {
      "epoch": 15.12,
      "learning_rate": 0.001,
      "loss": 2.6443,
      "step": 78732
    },
    {
      "epoch": 15.12,
      "learning_rate": 0.001,
      "loss": 2.6327,
      "step": 78744
    },
    {
      "epoch": 15.12,
      "eval_ag_news_accuracy": 0.31434375,
      "eval_ag_news_bleu_score": 4.709331151638334,
      "eval_ag_news_bleu_score_sem": 0.161182568042613,
      "eval_ag_news_emb_cos_sim": 0.7881615161895752,
      "eval_ag_news_emb_cos_sim_sem": 0.008591133258030665,
      "eval_ag_news_emb_top1_equal": 0.171875,
      "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.655778169631958,
      "eval_ag_news_n_ngrams_match_1": 13.542,
      "eval_ag_news_n_ngrams_match_2": 2.956,
      "eval_ag_news_n_ngrams_match_3": 0.866,
      "eval_ag_news_num_pred_words": 46.338,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.69762271639328,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33467655672931806,
      "eval_ag_news_runtime": 12.0354,
      "eval_ag_news_samples_per_second": 41.544,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.340935298374678,
      "eval_ag_news_token_set_f1_sem": 0.004550301569924958,
      "eval_ag_news_token_set_precision": 0.32078544261833436,
      "eval_ag_news_token_set_recall": 0.3810620891958097,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 78750
    },
    {
      "epoch": 15.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.111,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9513907540760345,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11072114351295238,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6517083644866943,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009353276036742178,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3121449947357178,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.92,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.726,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.616,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.33,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.44392946646036,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20610230554944842,
      "eval_anthropic_toxic_prompts_runtime": 13.1437,
      "eval_anthropic_toxic_prompts_samples_per_second": 38.041,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.076,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3464527101543247,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006493648995109544,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41888436420096403,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32420068256751244,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 78750
    },
    {
      "epoch": 15.12,
      "eval_arxiv_accuracy": 0.3376875,
      "eval_arxiv_bleu_score": 4.014688682625429,
      "eval_arxiv_bleu_score_sem": 0.11804915764060579,
      "eval_arxiv_emb_cos_sim": 0.7362596988677979,
      "eval_arxiv_emb_cos_sim_sem": 0.0077642384320272435,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5092623233795166,
      "eval_arxiv_n_ngrams_match_1": 14.126,
      "eval_arxiv_n_ngrams_match_2": 2.63,
      "eval_arxiv_n_ngrams_match_3": 0.602,
      "eval_arxiv_num_pred_words": 39.79,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.42360287728601,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.338672533833623,
      "eval_arxiv_runtime": 10.5877,
      "eval_arxiv_samples_per_second": 47.225,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.3349007315828461,
      "eval_arxiv_token_set_f1_sem": 0.004199454838761151,
      "eval_arxiv_token_set_precision": 0.28074879187714413,
      "eval_arxiv_token_set_recall": 0.43604499348643977,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 78750
    },
    {
      "epoch": 15.12,
      "eval_python_code_alpaca_accuracy": 0.1543125,
      "eval_python_code_alpaca_bleu_score": 4.124095142196487,
      "eval_python_code_alpaca_bleu_score_sem": 0.1319102465316636,
      "eval_python_code_alpaca_emb_cos_sim": 0.7358843088150024,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009029650143362261,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0163207054138184,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.438,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.604,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.78,
      "eval_python_code_alpaca_num_pred_words": 43.612,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.41603671313439,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3199745161997377,
      "eval_python_code_alpaca_runtime": 11.583,
      "eval_python_code_alpaca_samples_per_second": 43.167,
      "eval_python_code_alpaca_steps_per_second": 0.086,
      "eval_python_code_alpaca_token_set_f1": 0.46347061144057566,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005509433724409282,
      "eval_python_code_alpaca_token_set_precision": 0.5158004890273531,
      "eval_python_code_alpaca_token_set_recall": 0.4419562188026992,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 78750
    },
    {
      "epoch": 15.12,
      "eval_wikibio_accuracy": 0.31309375,
      "eval_wikibio_bleu_score": 5.374763453331607,
      "eval_wikibio_bleu_score_sem": 0.19453951623300408,
      "eval_wikibio_emb_cos_sim": 0.7150180339813232,
      "eval_wikibio_emb_cos_sim_sem": 0.011016393971704596,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.891688585281372,
      "eval_wikibio_n_ngrams_match_1": 9.558,
      "eval_wikibio_n_ngrams_match_2": 3.062,
      "eval_wikibio_n_ngrams_match_3": 1.074,
      "eval_wikibio_num_pred_words": 35.806,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 48.99354649638047,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3336723205016684,
      "eval_wikibio_runtime": 11.9069,
      "eval_wikibio_samples_per_second": 41.993,
      "eval_wikibio_steps_per_second": 0.084,
      "eval_wikibio_token_set_f1": 0.30434849914422285,
      "eval_wikibio_token_set_f1_sem": 0.005529010601084642,
      "eval_wikibio_token_set_precision": 0.308564379616472,
      "eval_wikibio_token_set_recall": 0.32131592914417434,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 78750
    },
    {
      "epoch": 15.12,
      "eval_nq_accuracy": 0.51525,
      "eval_nq_bleu_score": 10.945980265156468,
      "eval_nq_bleu_score_sem": 0.4624122544281547,
      "eval_nq_emb_cos_sim": 0.8205318450927734,
      "eval_nq_emb_cos_sim_sem": 0.007989117552068674,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.271064519882202,
      "eval_nq_n_ngrams_match_1": 22.214,
      "eval_nq_n_ngrams_match_2": 7.958,
      "eval_nq_n_ngrams_match_3": 3.604,
      "eval_nq_num_pred_words": 48.75,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.689710214994799,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4305066219500824,
      "eval_nq_runtime": 11.9682,
      "eval_nq_samples_per_second": 41.777,
      "eval_nq_steps_per_second": 0.084,
      "eval_nq_token_set_f1": 0.44740259154456813,
      "eval_nq_token_set_f1_sem": 0.0049342745586682946,
      "eval_nq_token_set_precision": 0.40180395838782174,
      "eval_nq_token_set_recall": 0.5153171579555549,
      "eval_nq_true_num_tokens": 64.0,
      "step": 78750
    },
    {
      "epoch": 15.12,
      "learning_rate": 0.001,
      "loss": 2.6273,
      "step": 78756
    },
    {
      "epoch": 15.12,
      "learning_rate": 0.001,
      "loss": 2.636,
      "step": 78768
    },
    {
      "epoch": 15.13,
      "learning_rate": 0.001,
      "loss": 2.6366,
      "step": 78780
    },
    {
      "epoch": 15.13,
      "learning_rate": 0.001,
      "loss": 2.6433,
      "step": 78792
    },
    {
      "epoch": 15.13,
      "learning_rate": 0.001,
      "loss": 2.6316,
      "step": 78804
    },
    {
      "epoch": 15.13,
      "learning_rate": 0.001,
      "loss": 2.6306,
      "step": 78816
    },
    {
      "epoch": 15.14,
      "learning_rate": 0.001,
      "loss": 2.6322,
      "step": 78828
    },
    {
      "epoch": 15.14,
      "learning_rate": 0.001,
      "loss": 2.6302,
      "step": 78840
    },
    {
      "epoch": 15.14,
      "learning_rate": 0.001,
      "loss": 2.6379,
      "step": 78852
    },
    {
      "epoch": 15.14,
      "learning_rate": 0.001,
      "loss": 2.6326,
      "step": 78864
    },
    {
      "epoch": 15.15,
      "learning_rate": 0.001,
      "loss": 2.6292,
      "step": 78876
    },
    {
      "epoch": 15.15,
      "learning_rate": 0.001,
      "loss": 2.6304,
      "step": 78888
    },
    {
      "epoch": 15.15,
      "learning_rate": 0.001,
      "loss": 2.6343,
      "step": 78900
    },
    {
      "epoch": 15.15,
      "learning_rate": 0.001,
      "loss": 2.6395,
      "step": 78912
    },
    {
      "epoch": 15.15,
      "learning_rate": 0.001,
      "loss": 2.6331,
      "step": 78924
    },
    {
      "epoch": 15.16,
      "learning_rate": 0.001,
      "loss": 2.6351,
      "step": 78936
    },
    {
      "epoch": 15.16,
      "learning_rate": 0.001,
      "loss": 2.635,
      "step": 78948
    },
    {
      "epoch": 15.16,
      "learning_rate": 0.001,
      "loss": 2.6383,
      "step": 78960
    },
    {
      "epoch": 15.16,
      "learning_rate": 0.001,
      "loss": 2.635,
      "step": 78972
    },
    {
      "epoch": 15.17,
      "learning_rate": 0.001,
      "loss": 2.6341,
      "step": 78984
    },
    {
      "epoch": 15.17,
      "learning_rate": 0.001,
      "loss": 2.6361,
      "step": 78996
    },
    {
      "epoch": 15.17,
      "learning_rate": 0.001,
      "loss": 2.6228,
      "step": 79008
    },
    {
      "epoch": 15.17,
      "learning_rate": 0.001,
      "loss": 2.6412,
      "step": 79020
    },
    {
      "epoch": 15.18,
      "learning_rate": 0.001,
      "loss": 2.6222,
      "step": 79032
    },
    {
      "epoch": 15.18,
      "learning_rate": 0.001,
      "loss": 2.6325,
      "step": 79044
    },
    {
      "epoch": 15.18,
      "learning_rate": 0.001,
      "loss": 2.6277,
      "step": 79056
    },
    {
      "epoch": 15.18,
      "learning_rate": 0.001,
      "loss": 2.6319,
      "step": 79068
    },
    {
      "epoch": 15.18,
      "learning_rate": 0.001,
      "loss": 2.6206,
      "step": 79080
    },
    {
      "epoch": 15.19,
      "learning_rate": 0.001,
      "loss": 2.6344,
      "step": 79092
    },
    {
      "epoch": 15.19,
      "learning_rate": 0.001,
      "loss": 2.6401,
      "step": 79104
    },
    {
      "epoch": 15.19,
      "learning_rate": 0.001,
      "loss": 2.635,
      "step": 79116
    },
    {
      "epoch": 15.19,
      "learning_rate": 0.001,
      "loss": 2.6352,
      "step": 79128
    },
    {
      "epoch": 15.2,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 79140
    },
    {
      "epoch": 15.2,
      "learning_rate": 0.001,
      "loss": 2.6378,
      "step": 79152
    },
    {
      "epoch": 15.2,
      "learning_rate": 0.001,
      "loss": 2.6406,
      "step": 79164
    },
    {
      "epoch": 15.2,
      "learning_rate": 0.001,
      "loss": 2.6368,
      "step": 79176
    },
    {
      "epoch": 15.21,
      "learning_rate": 0.001,
      "loss": 2.6281,
      "step": 79188
    },
    {
      "epoch": 15.21,
      "learning_rate": 0.001,
      "loss": 2.6268,
      "step": 79200
    },
    {
      "epoch": 15.21,
      "learning_rate": 0.001,
      "loss": 2.6496,
      "step": 79212
    },
    {
      "epoch": 15.21,
      "learning_rate": 0.001,
      "loss": 2.6362,
      "step": 79224
    },
    {
      "epoch": 15.21,
      "learning_rate": 0.001,
      "loss": 2.6423,
      "step": 79236
    },
    {
      "epoch": 15.22,
      "learning_rate": 0.001,
      "loss": 2.6391,
      "step": 79248
    },
    {
      "epoch": 15.22,
      "learning_rate": 0.001,
      "loss": 2.6337,
      "step": 79260
    },
    {
      "epoch": 15.22,
      "learning_rate": 0.001,
      "loss": 2.6401,
      "step": 79272
    },
    {
      "epoch": 15.22,
      "learning_rate": 0.001,
      "loss": 2.6316,
      "step": 79284
    },
    {
      "epoch": 15.23,
      "learning_rate": 0.001,
      "loss": 2.6469,
      "step": 79296
    },
    {
      "epoch": 15.23,
      "learning_rate": 0.001,
      "loss": 2.6277,
      "step": 79308
    },
    {
      "epoch": 15.23,
      "learning_rate": 0.001,
      "loss": 2.6353,
      "step": 79320
    },
    {
      "epoch": 15.23,
      "learning_rate": 0.001,
      "loss": 2.6423,
      "step": 79332
    },
    {
      "epoch": 15.24,
      "learning_rate": 0.001,
      "loss": 2.635,
      "step": 79344
    },
    {
      "epoch": 15.24,
      "learning_rate": 0.001,
      "loss": 2.6333,
      "step": 79356
    },
    {
      "epoch": 15.24,
      "learning_rate": 0.001,
      "loss": 2.6291,
      "step": 79368
    },
    {
      "epoch": 15.24,
      "eval_ag_news_accuracy": 0.31440625,
      "eval_ag_news_bleu_score": 4.601826717340538,
      "eval_ag_news_bleu_score_sem": 0.14277041737178586,
      "eval_ag_news_emb_cos_sim": 0.7933106422424316,
      "eval_ag_news_emb_cos_sim_sem": 0.007978230494262705,
      "eval_ag_news_emb_top1_equal": 0.171875,
      "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6685566902160645,
      "eval_ag_news_n_ngrams_match_1": 13.466,
      "eval_ag_news_n_ngrams_match_2": 2.908,
      "eval_ag_news_n_ngrams_match_3": 0.824,
      "eval_ag_news_num_pred_words": 46.846,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.195294064545095,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33121353065097914,
      "eval_ag_news_runtime": 11.5225,
      "eval_ag_news_samples_per_second": 43.393,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.33602591897211426,
      "eval_ag_news_token_set_f1_sem": 0.004406017253693179,
      "eval_ag_news_token_set_precision": 0.3194280006993723,
      "eval_ag_news_token_set_recall": 0.36841965570753565,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 79375
    },
    {
      "epoch": 15.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.11059375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0523709849639213,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11754473206210375,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6514561176300049,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009804986861291208,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3238117694854736,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.06,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.822,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.678,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.48,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.765986641027126,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20768753960850322,
      "eval_anthropic_toxic_prompts_runtime": 10.6024,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.159,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34962818427770315,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065926318618090875,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42740865047362764,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3224157869496725,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 79375
    },
    {
      "epoch": 15.24,
      "eval_arxiv_accuracy": 0.3366875,
      "eval_arxiv_bleu_score": 4.08733678741479,
      "eval_arxiv_bleu_score_sem": 0.1179969326961493,
      "eval_arxiv_emb_cos_sim": 0.7434097528457642,
      "eval_arxiv_emb_cos_sim_sem": 0.008391037143458147,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.522575616836548,
      "eval_arxiv_n_ngrams_match_1": 14.44,
      "eval_arxiv_n_ngrams_match_2": 2.752,
      "eval_arxiv_n_ngrams_match_3": 0.624,
      "eval_arxiv_num_pred_words": 40.772,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.87155636248976,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34348354748856513,
      "eval_arxiv_runtime": 10.197,
      "eval_arxiv_samples_per_second": 49.034,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.33744360938819495,
      "eval_arxiv_token_set_f1_sem": 0.004397577131159572,
      "eval_arxiv_token_set_precision": 0.2867417233004457,
      "eval_arxiv_token_set_recall": 0.43558523529975657,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 79375
    },
    {
      "epoch": 15.24,
      "eval_python_code_alpaca_accuracy": 0.15625,
      "eval_python_code_alpaca_bleu_score": 4.382676426037081,
      "eval_python_code_alpaca_bleu_score_sem": 0.13415464242537486,
      "eval_python_code_alpaca_emb_cos_sim": 0.7466802597045898,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008349086541210632,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9901957511901855,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.736,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.78,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.874,
      "eval_python_code_alpaca_num_pred_words": 44.23,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.889575518600008,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32238036846327545,
      "eval_python_code_alpaca_runtime": 10.6375,
      "eval_python_code_alpaca_samples_per_second": 47.004,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.467201210929703,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005347815623137397,
      "eval_python_code_alpaca_token_set_precision": 0.531586194251142,
      "eval_python_code_alpaca_token_set_recall": 0.43667802521573623,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 79375
    },
    {
      "epoch": 15.24,
      "eval_wikibio_accuracy": 0.31775,
      "eval_wikibio_bleu_score": 5.685628954184466,
      "eval_wikibio_bleu_score_sem": 0.1990435046321838,
      "eval_wikibio_emb_cos_sim": 0.7299209833145142,
      "eval_wikibio_emb_cos_sim_sem": 0.009531472253701729,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.853386163711548,
      "eval_wikibio_n_ngrams_match_1": 9.868,
      "eval_wikibio_n_ngrams_match_2": 3.306,
      "eval_wikibio_n_ngrams_match_3": 1.198,
      "eval_wikibio_num_pred_words": 36.71,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.15245915503024,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33907118076742054,
      "eval_wikibio_runtime": 11.1945,
      "eval_wikibio_samples_per_second": 44.665,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.3108423995817786,
      "eval_wikibio_token_set_f1_sem": 0.00566353643505371,
      "eval_wikibio_token_set_precision": 0.3189239347665758,
      "eval_wikibio_token_set_recall": 0.31872158424909264,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 79375
    },
    {
      "epoch": 15.24,
      "eval_nq_accuracy": 0.51615625,
      "eval_nq_bleu_score": 11.358283890189858,
      "eval_nq_bleu_score_sem": 0.4797350033308438,
      "eval_nq_emb_cos_sim": 0.8144046664237976,
      "eval_nq_emb_cos_sim_sem": 0.008683270220142527,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.268126964569092,
      "eval_nq_n_ngrams_match_1": 22.7,
      "eval_nq_n_ngrams_match_2": 8.308,
      "eval_nq_n_ngrams_match_3": 3.832,
      "eval_nq_num_pred_words": 49.374,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.661287921738067,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4353169928740548,
      "eval_nq_runtime": 10.8621,
      "eval_nq_samples_per_second": 46.032,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.4516246166839993,
      "eval_nq_token_set_f1_sem": 0.005137952527401246,
      "eval_nq_token_set_precision": 0.4084688851511734,
      "eval_nq_token_set_recall": 0.5148402668331971,
      "eval_nq_true_num_tokens": 64.0,
      "step": 79375
    },
    {
      "epoch": 15.24,
      "learning_rate": 0.001,
      "loss": 2.6231,
      "step": 79380
    },
    {
      "epoch": 15.24,
      "learning_rate": 0.001,
      "loss": 2.6344,
      "step": 79392
    },
    {
      "epoch": 15.25,
      "learning_rate": 0.001,
      "loss": 2.6367,
      "step": 79404
    },
    {
      "epoch": 15.25,
      "learning_rate": 0.001,
      "loss": 2.6358,
      "step": 79416
    },
    {
      "epoch": 15.25,
      "learning_rate": 0.001,
      "loss": 2.642,
      "step": 79428
    },
    {
      "epoch": 15.25,
      "learning_rate": 0.001,
      "loss": 2.6387,
      "step": 79440
    },
    {
      "epoch": 15.26,
      "learning_rate": 0.001,
      "loss": 2.6316,
      "step": 79452
    },
    {
      "epoch": 15.26,
      "learning_rate": 0.001,
      "loss": 2.6316,
      "step": 79464
    },
    {
      "epoch": 15.26,
      "learning_rate": 0.001,
      "loss": 2.638,
      "step": 79476
    },
    {
      "epoch": 15.26,
      "learning_rate": 0.001,
      "loss": 2.6454,
      "step": 79488
    },
    {
      "epoch": 15.26,
      "learning_rate": 0.001,
      "loss": 2.6308,
      "step": 79500
    },
    {
      "epoch": 15.27,
      "learning_rate": 0.001,
      "loss": 2.6296,
      "step": 79512
    },
    {
      "epoch": 15.27,
      "learning_rate": 0.001,
      "loss": 2.6416,
      "step": 79524
    },
    {
      "epoch": 15.27,
      "learning_rate": 0.001,
      "loss": 2.6402,
      "step": 79536
    },
    {
      "epoch": 15.27,
      "learning_rate": 0.001,
      "loss": 2.6433,
      "step": 79548
    },
    {
      "epoch": 15.28,
      "learning_rate": 0.001,
      "loss": 2.6407,
      "step": 79560
    },
    {
      "epoch": 15.28,
      "learning_rate": 0.001,
      "loss": 2.6405,
      "step": 79572
    },
    {
      "epoch": 15.28,
      "learning_rate": 0.001,
      "loss": 2.6403,
      "step": 79584
    },
    {
      "epoch": 15.28,
      "learning_rate": 0.001,
      "loss": 2.6353,
      "step": 79596
    },
    {
      "epoch": 15.29,
      "learning_rate": 0.001,
      "loss": 2.6438,
      "step": 79608
    },
    {
      "epoch": 15.29,
      "learning_rate": 0.001,
      "loss": 2.6318,
      "step": 79620
    },
    {
      "epoch": 15.29,
      "learning_rate": 0.001,
      "loss": 2.6454,
      "step": 79632
    },
    {
      "epoch": 15.29,
      "learning_rate": 0.001,
      "loss": 2.6475,
      "step": 79644
    },
    {
      "epoch": 15.29,
      "learning_rate": 0.001,
      "loss": 2.6392,
      "step": 79656
    },
    {
      "epoch": 15.3,
      "learning_rate": 0.001,
      "loss": 2.6461,
      "step": 79668
    },
    {
      "epoch": 15.3,
      "learning_rate": 0.001,
      "loss": 2.6252,
      "step": 79680
    },
    {
      "epoch": 15.3,
      "learning_rate": 0.001,
      "loss": 2.6353,
      "step": 79692
    },
    {
      "epoch": 15.3,
      "learning_rate": 0.001,
      "loss": 2.6367,
      "step": 79704
    },
    {
      "epoch": 15.31,
      "learning_rate": 0.001,
      "loss": 2.639,
      "step": 79716
    },
    {
      "epoch": 15.31,
      "learning_rate": 0.001,
      "loss": 2.6436,
      "step": 79728
    },
    {
      "epoch": 15.31,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 79740
    },
    {
      "epoch": 15.31,
      "learning_rate": 0.001,
      "loss": 2.632,
      "step": 79752
    },
    {
      "epoch": 15.32,
      "learning_rate": 0.001,
      "loss": 2.6428,
      "step": 79764
    },
    {
      "epoch": 15.32,
      "learning_rate": 0.001,
      "loss": 2.6454,
      "step": 79776
    },
    {
      "epoch": 15.32,
      "learning_rate": 0.001,
      "loss": 2.6387,
      "step": 79788
    },
    {
      "epoch": 15.32,
      "learning_rate": 0.001,
      "loss": 2.6404,
      "step": 79800
    },
    {
      "epoch": 15.32,
      "learning_rate": 0.001,
      "loss": 2.653,
      "step": 79812
    },
    {
      "epoch": 15.33,
      "learning_rate": 0.001,
      "loss": 2.6435,
      "step": 79824
    },
    {
      "epoch": 15.33,
      "learning_rate": 0.001,
      "loss": 2.6379,
      "step": 79836
    },
    {
      "epoch": 15.33,
      "learning_rate": 0.001,
      "loss": 2.6424,
      "step": 79848
    },
    {
      "epoch": 15.33,
      "learning_rate": 0.001,
      "loss": 2.6396,
      "step": 79860
    },
    {
      "epoch": 15.34,
      "learning_rate": 0.001,
      "loss": 2.6356,
      "step": 79872
    },
    {
      "epoch": 15.34,
      "learning_rate": 0.001,
      "loss": 2.6372,
      "step": 79884
    },
    {
      "epoch": 15.34,
      "learning_rate": 0.001,
      "loss": 2.6468,
      "step": 79896
    },
    {
      "epoch": 15.34,
      "learning_rate": 0.001,
      "loss": 2.6345,
      "step": 79908
    },
    {
      "epoch": 15.35,
      "learning_rate": 0.001,
      "loss": 2.6342,
      "step": 79920
    },
    {
      "epoch": 15.35,
      "learning_rate": 0.001,
      "loss": 2.6209,
      "step": 79932
    },
    {
      "epoch": 15.35,
      "learning_rate": 0.001,
      "loss": 2.6402,
      "step": 79944
    },
    {
      "epoch": 15.35,
      "learning_rate": 0.001,
      "loss": 2.6446,
      "step": 79956
    },
    {
      "epoch": 15.35,
      "learning_rate": 0.001,
      "loss": 2.64,
      "step": 79968
    },
    {
      "epoch": 15.36,
      "learning_rate": 0.001,
      "loss": 2.6435,
      "step": 79980
    },
    {
      "epoch": 15.36,
      "learning_rate": 0.001,
      "loss": 2.6394,
      "step": 79992
    },
    {
      "epoch": 15.36,
      "eval_ag_news_accuracy": 0.31578125,
      "eval_ag_news_bleu_score": 4.730361594997509,
      "eval_ag_news_bleu_score_sem": 0.15140771371019152,
      "eval_ag_news_emb_cos_sim": 0.788294792175293,
      "eval_ag_news_emb_cos_sim_sem": 0.008205163614624533,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6438395977020264,
      "eval_ag_news_n_ngrams_match_1": 13.59,
      "eval_ag_news_n_ngrams_match_2": 3.024,
      "eval_ag_news_n_ngrams_match_3": 0.874,
      "eval_ag_news_num_pred_words": 46.118,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.23837519851944,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3362795592336469,
      "eval_ag_news_runtime": 10.2423,
      "eval_ag_news_samples_per_second": 48.817,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.34467688354510934,
      "eval_ag_news_token_set_f1_sem": 0.00445724251939383,
      "eval_ag_news_token_set_precision": 0.32438450030726224,
      "eval_ag_news_token_set_recall": 0.39059500748455456,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 80000
    },
    {
      "epoch": 15.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.11084375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9742435628419184,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10952665715027296,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6537163257598877,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009910595668402872,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3290483951568604,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.07,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.838,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.628,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.888,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.911768087758325,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20861706374940747,
      "eval_anthropic_toxic_prompts_runtime": 9.4752,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.769,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.106,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3496101650782922,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00622234355091113,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4268718953068824,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32234484236400307,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 80000
    },
    {
      "epoch": 15.36,
      "eval_arxiv_accuracy": 0.335375,
      "eval_arxiv_bleu_score": 4.0897295226619725,
      "eval_arxiv_bleu_score_sem": 0.1183274460994473,
      "eval_arxiv_emb_cos_sim": 0.7445273399353027,
      "eval_arxiv_emb_cos_sim_sem": 0.008040803417777699,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.506622552871704,
      "eval_arxiv_n_ngrams_match_1": 14.416,
      "eval_arxiv_n_ngrams_match_2": 2.752,
      "eval_arxiv_n_ngrams_match_3": 0.6,
      "eval_arxiv_num_pred_words": 40.114,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.335488588064585,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34368532984699696,
      "eval_arxiv_runtime": 11.5191,
      "eval_arxiv_samples_per_second": 43.406,
      "eval_arxiv_steps_per_second": 0.087,
      "eval_arxiv_token_set_f1": 0.3377013275927523,
      "eval_arxiv_token_set_f1_sem": 0.004391542781335758,
      "eval_arxiv_token_set_precision": 0.28551767564777564,
      "eval_arxiv_token_set_recall": 0.4328621113439869,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 80000
    },
    {
      "epoch": 15.36,
      "eval_python_code_alpaca_accuracy": 0.15671875,
      "eval_python_code_alpaca_bleu_score": 4.158878141786196,
      "eval_python_code_alpaca_bleu_score_sem": 0.12894559423333093,
      "eval_python_code_alpaca_emb_cos_sim": 0.7361272573471069,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008738172417228539,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9702227115631104,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.498,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.684,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.82,
      "eval_python_code_alpaca_num_pred_words": 43.538,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.49626115535216,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3163294556758225,
      "eval_python_code_alpaca_runtime": 10.0366,
      "eval_python_code_alpaca_samples_per_second": 49.818,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.461687627779369,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0059686049865851345,
      "eval_python_code_alpaca_token_set_precision": 0.5167242553149951,
      "eval_python_code_alpaca_token_set_recall": 0.43865666310553725,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 80000
    },
    {
      "epoch": 15.36,
      "eval_wikibio_accuracy": 0.31803125,
      "eval_wikibio_bleu_score": 5.205853509318029,
      "eval_wikibio_bleu_score_sem": 0.1989527554720283,
      "eval_wikibio_emb_cos_sim": 0.7154920101165771,
      "eval_wikibio_emb_cos_sim_sem": 0.010590209649373992,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.805198907852173,
      "eval_wikibio_n_ngrams_match_1": 9.166,
      "eval_wikibio_n_ngrams_match_2": 2.96,
      "eval_wikibio_n_ngrams_match_3": 1.016,
      "eval_wikibio_num_pred_words": 34.906,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.93418698679945,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3219322838921782,
      "eval_wikibio_runtime": 10.5113,
      "eval_wikibio_samples_per_second": 47.568,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.29404700280737817,
      "eval_wikibio_token_set_f1_sem": 0.006054734298112563,
      "eval_wikibio_token_set_precision": 0.2975466241182733,
      "eval_wikibio_token_set_recall": 0.31098053412762106,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 80000
    },
    {
      "epoch": 15.36,
      "eval_nq_accuracy": 0.5138125,
      "eval_nq_bleu_score": 11.350753812900662,
      "eval_nq_bleu_score_sem": 0.4824661409085098,
      "eval_nq_emb_cos_sim": 0.8283723592758179,
      "eval_nq_emb_cos_sim_sem": 0.006872088430727814,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2686431407928467,
      "eval_nq_n_ngrams_match_1": 22.62,
      "eval_nq_n_ngrams_match_2": 8.246,
      "eval_nq_n_ngrams_match_3": 3.792,
      "eval_nq_num_pred_words": 49.064,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.666276136142201,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4328425392537225,
      "eval_nq_runtime": 10.1429,
      "eval_nq_samples_per_second": 49.295,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.4511458695487511,
      "eval_nq_token_set_f1_sem": 0.005015187650887773,
      "eval_nq_token_set_precision": 0.4072391617969803,
      "eval_nq_token_set_recall": 0.5148671078914516,
      "eval_nq_true_num_tokens": 64.0,
      "step": 80000
    },
    {
      "epoch": 15.36,
      "learning_rate": 0.001,
      "loss": 2.6269,
      "step": 80004
    },
    {
      "epoch": 15.36,
      "learning_rate": 0.001,
      "loss": 2.6347,
      "step": 80016
    },
    {
      "epoch": 15.37,
      "learning_rate": 0.001,
      "loss": 2.629,
      "step": 80028
    },
    {
      "epoch": 15.37,
      "learning_rate": 0.001,
      "loss": 2.636,
      "step": 80040
    },
    {
      "epoch": 15.37,
      "learning_rate": 0.001,
      "loss": 2.6358,
      "step": 80052
    },
    {
      "epoch": 15.37,
      "learning_rate": 0.001,
      "loss": 2.6285,
      "step": 80064
    },
    {
      "epoch": 15.38,
      "learning_rate": 0.001,
      "loss": 2.6373,
      "step": 80076
    },
    {
      "epoch": 15.38,
      "learning_rate": 0.001,
      "loss": 2.6223,
      "step": 80088
    },
    {
      "epoch": 15.38,
      "learning_rate": 0.001,
      "loss": 2.6424,
      "step": 80100
    },
    {
      "epoch": 15.38,
      "learning_rate": 0.001,
      "loss": 2.6378,
      "step": 80112
    },
    {
      "epoch": 15.38,
      "learning_rate": 0.001,
      "loss": 2.6302,
      "step": 80124
    },
    {
      "epoch": 15.39,
      "learning_rate": 0.001,
      "loss": 2.6405,
      "step": 80136
    },
    {
      "epoch": 15.39,
      "learning_rate": 0.001,
      "loss": 2.636,
      "step": 80148
    },
    {
      "epoch": 15.39,
      "learning_rate": 0.001,
      "loss": 2.6346,
      "step": 80160
    },
    {
      "epoch": 15.39,
      "learning_rate": 0.001,
      "loss": 2.64,
      "step": 80172
    },
    {
      "epoch": 15.4,
      "learning_rate": 0.001,
      "loss": 2.6383,
      "step": 80184
    },
    {
      "epoch": 15.4,
      "learning_rate": 0.001,
      "loss": 2.6308,
      "step": 80196
    },
    {
      "epoch": 15.4,
      "learning_rate": 0.001,
      "loss": 2.6304,
      "step": 80208
    },
    {
      "epoch": 15.4,
      "learning_rate": 0.001,
      "loss": 2.626,
      "step": 80220
    },
    {
      "epoch": 15.41,
      "learning_rate": 0.001,
      "loss": 2.6309,
      "step": 80232
    },
    {
      "epoch": 15.41,
      "learning_rate": 0.001,
      "loss": 2.6377,
      "step": 80244
    },
    {
      "epoch": 15.41,
      "learning_rate": 0.001,
      "loss": 2.6537,
      "step": 80256
    },
    {
      "epoch": 15.41,
      "learning_rate": 0.001,
      "loss": 2.64,
      "step": 80268
    },
    {
      "epoch": 15.41,
      "learning_rate": 0.001,
      "loss": 2.6394,
      "step": 80280
    },
    {
      "epoch": 15.42,
      "learning_rate": 0.001,
      "loss": 2.6405,
      "step": 80292
    },
    {
      "epoch": 15.42,
      "learning_rate": 0.001,
      "loss": 2.6351,
      "step": 80304
    },
    {
      "epoch": 15.42,
      "learning_rate": 0.001,
      "loss": 2.6307,
      "step": 80316
    },
    {
      "epoch": 15.42,
      "learning_rate": 0.001,
      "loss": 2.6339,
      "step": 80328
    },
    {
      "epoch": 15.43,
      "learning_rate": 0.001,
      "loss": 2.6377,
      "step": 80340
    },
    {
      "epoch": 15.43,
      "learning_rate": 0.001,
      "loss": 2.6278,
      "step": 80352
    },
    {
      "epoch": 15.43,
      "learning_rate": 0.001,
      "loss": 2.6371,
      "step": 80364
    },
    {
      "epoch": 15.43,
      "learning_rate": 0.001,
      "loss": 2.6378,
      "step": 80376
    },
    {
      "epoch": 15.44,
      "learning_rate": 0.001,
      "loss": 2.6406,
      "step": 80388
    },
    {
      "epoch": 15.44,
      "learning_rate": 0.001,
      "loss": 2.6456,
      "step": 80400
    },
    {
      "epoch": 15.44,
      "learning_rate": 0.001,
      "loss": 2.6442,
      "step": 80412
    },
    {
      "epoch": 15.44,
      "learning_rate": 0.001,
      "loss": 2.6338,
      "step": 80424
    },
    {
      "epoch": 15.44,
      "learning_rate": 0.001,
      "loss": 2.635,
      "step": 80436
    },
    {
      "epoch": 15.45,
      "learning_rate": 0.001,
      "loss": 2.6362,
      "step": 80448
    },
    {
      "epoch": 15.45,
      "learning_rate": 0.001,
      "loss": 2.6418,
      "step": 80460
    },
    {
      "epoch": 15.45,
      "learning_rate": 0.001,
      "loss": 2.6343,
      "step": 80472
    },
    {
      "epoch": 15.45,
      "learning_rate": 0.001,
      "loss": 2.6375,
      "step": 80484
    },
    {
      "epoch": 15.46,
      "learning_rate": 0.001,
      "loss": 2.6333,
      "step": 80496
    },
    {
      "epoch": 15.46,
      "learning_rate": 0.001,
      "loss": 2.6332,
      "step": 80508
    },
    {
      "epoch": 15.46,
      "learning_rate": 0.001,
      "loss": 2.6374,
      "step": 80520
    },
    {
      "epoch": 15.46,
      "learning_rate": 0.001,
      "loss": 2.6383,
      "step": 80532
    },
    {
      "epoch": 15.47,
      "learning_rate": 0.001,
      "loss": 2.6341,
      "step": 80544
    },
    {
      "epoch": 15.47,
      "learning_rate": 0.001,
      "loss": 2.625,
      "step": 80556
    },
    {
      "epoch": 15.47,
      "learning_rate": 0.001,
      "loss": 2.6371,
      "step": 80568
    },
    {
      "epoch": 15.47,
      "learning_rate": 0.001,
      "loss": 2.6335,
      "step": 80580
    },
    {
      "epoch": 15.47,
      "learning_rate": 0.001,
      "loss": 2.6313,
      "step": 80592
    },
    {
      "epoch": 15.48,
      "learning_rate": 0.001,
      "loss": 2.636,
      "step": 80604
    },
    {
      "epoch": 15.48,
      "learning_rate": 0.001,
      "loss": 2.6421,
      "step": 80616
    },
    {
      "epoch": 15.48,
      "eval_ag_news_accuracy": 0.31328125,
      "eval_ag_news_bleu_score": 4.902969187138628,
      "eval_ag_news_bleu_score_sem": 0.15685461126798547,
      "eval_ag_news_emb_cos_sim": 0.7938907146453857,
      "eval_ag_news_emb_cos_sim_sem": 0.00737748577920112,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.663857936859131,
      "eval_ag_news_n_ngrams_match_1": 13.77,
      "eval_ag_news_n_ngrams_match_2": 3.082,
      "eval_ag_news_n_ngrams_match_3": 0.93,
      "eval_ag_news_num_pred_words": 46.89,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 39.011557050488804,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3369456785052954,
      "eval_ag_news_runtime": 10.1508,
      "eval_ag_news_samples_per_second": 49.257,
      "eval_ag_news_steps_per_second": 0.099,
      "eval_ag_news_token_set_f1": 0.34591004934855424,
      "eval_ag_news_token_set_f1_sem": 0.004406920066754583,
      "eval_ag_news_token_set_precision": 0.32818773207092217,
      "eval_ag_news_token_set_recall": 0.3836104996699221,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 80625
    },
    {
      "epoch": 15.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.11034375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8101509861178857,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10888008627699274,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6572726964950562,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009766325688861066,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3258824348449707,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.82,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.714,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.62,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.966,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.82354027433018,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.19854912477762166,
      "eval_anthropic_toxic_prompts_runtime": 9.699,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.551,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3447942813783264,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00669296404310681,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.40981891673460225,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32834953575860026,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 80625
    },
    {
      "epoch": 15.48,
      "eval_arxiv_accuracy": 0.3374375,
      "eval_arxiv_bleu_score": 4.125040820004931,
      "eval_arxiv_bleu_score_sem": 0.11326277337378322,
      "eval_arxiv_emb_cos_sim": 0.7457396388053894,
      "eval_arxiv_emb_cos_sim_sem": 0.008271936076466932,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5074424743652344,
      "eval_arxiv_n_ngrams_match_1": 14.722,
      "eval_arxiv_n_ngrams_match_2": 2.82,
      "eval_arxiv_n_ngrams_match_3": 0.576,
      "eval_arxiv_num_pred_words": 41.108,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.36283227996376,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3480919874396554,
      "eval_arxiv_runtime": 10.007,
      "eval_arxiv_samples_per_second": 49.965,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.34262515759688517,
      "eval_arxiv_token_set_f1_sem": 0.0040497237004410575,
      "eval_arxiv_token_set_precision": 0.29292742441436825,
      "eval_arxiv_token_set_recall": 0.42921025127510737,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 80625
    },
    {
      "epoch": 15.48,
      "eval_python_code_alpaca_accuracy": 0.1569375,
      "eval_python_code_alpaca_bleu_score": 4.3937920422944865,
      "eval_python_code_alpaca_bleu_score_sem": 0.13308568352125713,
      "eval_python_code_alpaca_emb_cos_sim": 0.7495800256729126,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009260718264689714,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.962172746658325,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.832,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.786,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.864,
      "eval_python_code_alpaca_num_pred_words": 43.714,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.33994694335842,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.327535308174034,
      "eval_python_code_alpaca_runtime": 10.3932,
      "eval_python_code_alpaca_samples_per_second": 48.108,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.46835692039661836,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005518144154934098,
      "eval_python_code_alpaca_token_set_precision": 0.5356498709782705,
      "eval_python_code_alpaca_token_set_recall": 0.4362219847384163,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 80625
    },
    {
      "epoch": 15.48,
      "eval_wikibio_accuracy": 0.3163125,
      "eval_wikibio_bleu_score": 5.4346361908823315,
      "eval_wikibio_bleu_score_sem": 0.1947358151126523,
      "eval_wikibio_emb_cos_sim": 0.7088469862937927,
      "eval_wikibio_emb_cos_sim_sem": 0.011166227993794348,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.848999500274658,
      "eval_wikibio_n_ngrams_match_1": 9.372,
      "eval_wikibio_n_ngrams_match_2": 3.048,
      "eval_wikibio_n_ngrams_match_3": 1.04,
      "eval_wikibio_num_pred_words": 34.668,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.94607019690226,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3337735410525501,
      "eval_wikibio_runtime": 12.0628,
      "eval_wikibio_samples_per_second": 41.45,
      "eval_wikibio_steps_per_second": 0.083,
      "eval_wikibio_token_set_f1": 0.3009399254029897,
      "eval_wikibio_token_set_f1_sem": 0.005763500426829387,
      "eval_wikibio_token_set_precision": 0.3039482143380533,
      "eval_wikibio_token_set_recall": 0.3146186770050787,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 80625
    },
    {
      "epoch": 15.48,
      "eval_nq_accuracy": 0.5168125,
      "eval_nq_bleu_score": 11.266202740249126,
      "eval_nq_bleu_score_sem": 0.4661878370345242,
      "eval_nq_emb_cos_sim": 0.8264041543006897,
      "eval_nq_emb_cos_sim_sem": 0.006618851067083631,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2660751342773438,
      "eval_nq_n_ngrams_match_1": 22.614,
      "eval_nq_n_ngrams_match_2": 8.156,
      "eval_nq_n_ngrams_match_3": 3.74,
      "eval_nq_num_pred_words": 49.33,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.641484921668216,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43435719091213265,
      "eval_nq_runtime": 12.3246,
      "eval_nq_samples_per_second": 40.569,
      "eval_nq_steps_per_second": 0.081,
      "eval_nq_token_set_f1": 0.4520957625381011,
      "eval_nq_token_set_f1_sem": 0.004988544875926743,
      "eval_nq_token_set_precision": 0.40853170003964134,
      "eval_nq_token_set_recall": 0.5148430985261564,
      "eval_nq_true_num_tokens": 64.0,
      "step": 80625
    },
    {
      "epoch": 15.48,
      "learning_rate": 0.001,
      "loss": 2.6349,
      "step": 80628
    },
    {
      "epoch": 15.48,
      "learning_rate": 0.001,
      "loss": 2.6355,
      "step": 80640
    },
    {
      "epoch": 15.49,
      "learning_rate": 0.001,
      "loss": 2.6444,
      "step": 80652
    },
    {
      "epoch": 15.49,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 80664
    },
    {
      "epoch": 15.49,
      "learning_rate": 0.001,
      "loss": 2.6242,
      "step": 80676
    },
    {
      "epoch": 15.49,
      "learning_rate": 0.001,
      "loss": 2.6362,
      "step": 80688
    },
    {
      "epoch": 15.5,
      "learning_rate": 0.001,
      "loss": 2.6422,
      "step": 80700
    },
    {
      "epoch": 15.5,
      "learning_rate": 0.001,
      "loss": 2.6354,
      "step": 80712
    },
    {
      "epoch": 15.5,
      "learning_rate": 0.001,
      "loss": 2.6306,
      "step": 80724
    },
    {
      "epoch": 15.5,
      "learning_rate": 0.001,
      "loss": 2.6432,
      "step": 80736
    },
    {
      "epoch": 15.5,
      "learning_rate": 0.001,
      "loss": 2.6368,
      "step": 80748
    },
    {
      "epoch": 15.51,
      "learning_rate": 0.001,
      "loss": 2.6361,
      "step": 80760
    },
    {
      "epoch": 15.51,
      "learning_rate": 0.001,
      "loss": 2.633,
      "step": 80772
    },
    {
      "epoch": 15.51,
      "learning_rate": 0.001,
      "loss": 2.6415,
      "step": 80784
    },
    {
      "epoch": 15.51,
      "learning_rate": 0.001,
      "loss": 2.6335,
      "step": 80796
    },
    {
      "epoch": 15.52,
      "learning_rate": 0.001,
      "loss": 2.6495,
      "step": 80808
    },
    {
      "epoch": 15.52,
      "learning_rate": 0.001,
      "loss": 2.6389,
      "step": 80820
    },
    {
      "epoch": 15.52,
      "learning_rate": 0.001,
      "loss": 2.6389,
      "step": 80832
    },
    {
      "epoch": 15.52,
      "learning_rate": 0.001,
      "loss": 2.6399,
      "step": 80844
    },
    {
      "epoch": 15.53,
      "learning_rate": 0.001,
      "loss": 2.6432,
      "step": 80856
    },
    {
      "epoch": 15.53,
      "learning_rate": 0.001,
      "loss": 2.6453,
      "step": 80868
    },
    {
      "epoch": 15.53,
      "learning_rate": 0.001,
      "loss": 2.6367,
      "step": 80880
    },
    {
      "epoch": 15.53,
      "learning_rate": 0.001,
      "loss": 2.6475,
      "step": 80892
    },
    {
      "epoch": 15.53,
      "learning_rate": 0.001,
      "loss": 2.6512,
      "step": 80904
    },
    {
      "epoch": 15.54,
      "learning_rate": 0.001,
      "loss": 2.6439,
      "step": 80916
    },
    {
      "epoch": 15.54,
      "learning_rate": 0.001,
      "loss": 2.6323,
      "step": 80928
    },
    {
      "epoch": 15.54,
      "learning_rate": 0.001,
      "loss": 2.636,
      "step": 80940
    },
    {
      "epoch": 15.54,
      "learning_rate": 0.001,
      "loss": 2.6278,
      "step": 80952
    },
    {
      "epoch": 15.55,
      "learning_rate": 0.001,
      "loss": 2.6308,
      "step": 80964
    },
    {
      "epoch": 15.55,
      "learning_rate": 0.001,
      "loss": 2.6411,
      "step": 80976
    },
    {
      "epoch": 15.55,
      "learning_rate": 0.001,
      "loss": 2.6276,
      "step": 80988
    },
    {
      "epoch": 15.55,
      "learning_rate": 0.001,
      "loss": 2.6407,
      "step": 81000
    },
    {
      "epoch": 15.56,
      "learning_rate": 0.001,
      "loss": 2.6379,
      "step": 81012
    },
    {
      "epoch": 15.56,
      "learning_rate": 0.001,
      "loss": 2.6356,
      "step": 81024
    },
    {
      "epoch": 15.56,
      "learning_rate": 0.001,
      "loss": 2.6261,
      "step": 81036
    },
    {
      "epoch": 15.56,
      "learning_rate": 0.001,
      "loss": 2.6304,
      "step": 81048
    },
    {
      "epoch": 15.56,
      "learning_rate": 0.001,
      "loss": 2.6357,
      "step": 81060
    },
    {
      "epoch": 15.57,
      "learning_rate": 0.001,
      "loss": 2.6299,
      "step": 81072
    },
    {
      "epoch": 15.57,
      "learning_rate": 0.001,
      "loss": 2.6393,
      "step": 81084
    },
    {
      "epoch": 15.57,
      "learning_rate": 0.001,
      "loss": 2.6377,
      "step": 81096
    },
    {
      "epoch": 15.57,
      "learning_rate": 0.001,
      "loss": 2.6428,
      "step": 81108
    },
    {
      "epoch": 15.58,
      "learning_rate": 0.001,
      "loss": 2.6464,
      "step": 81120
    },
    {
      "epoch": 15.58,
      "learning_rate": 0.001,
      "loss": 2.6274,
      "step": 81132
    },
    {
      "epoch": 15.58,
      "learning_rate": 0.001,
      "loss": 2.6354,
      "step": 81144
    },
    {
      "epoch": 15.58,
      "learning_rate": 0.001,
      "loss": 2.6391,
      "step": 81156
    },
    {
      "epoch": 15.59,
      "learning_rate": 0.001,
      "loss": 2.6426,
      "step": 81168
    },
    {
      "epoch": 15.59,
      "learning_rate": 0.001,
      "loss": 2.6315,
      "step": 81180
    },
    {
      "epoch": 15.59,
      "learning_rate": 0.001,
      "loss": 2.6404,
      "step": 81192
    },
    {
      "epoch": 15.59,
      "learning_rate": 0.001,
      "loss": 2.6285,
      "step": 81204
    },
    {
      "epoch": 15.59,
      "learning_rate": 0.001,
      "loss": 2.6309,
      "step": 81216
    },
    {
      "epoch": 15.6,
      "learning_rate": 0.001,
      "loss": 2.6412,
      "step": 81228
    },
    {
      "epoch": 15.6,
      "learning_rate": 0.001,
      "loss": 2.635,
      "step": 81240
    },
    {
      "epoch": 15.6,
      "eval_ag_news_accuracy": 0.316375,
      "eval_ag_news_bleu_score": 4.682395421855241,
      "eval_ag_news_bleu_score_sem": 0.15159199569377962,
      "eval_ag_news_emb_cos_sim": 0.7924454808235168,
      "eval_ag_news_emb_cos_sim_sem": 0.007152564512311178,
      "eval_ag_news_emb_top1_equal": 0.1640625,
      "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.641594171524048,
      "eval_ag_news_n_ngrams_match_1": 13.5,
      "eval_ag_news_n_ngrams_match_2": 3.002,
      "eval_ag_news_n_ngrams_match_3": 0.874,
      "eval_ag_news_num_pred_words": 46.048,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.152610075506715,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33551377766739854,
      "eval_ag_news_runtime": 10.4734,
      "eval_ag_news_samples_per_second": 47.74,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3415970667810647,
      "eval_ag_news_token_set_f1_sem": 0.004477849714244134,
      "eval_ag_news_token_set_precision": 0.3209268164374048,
      "eval_ag_news_token_set_recall": 0.3854853977883826,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 81250
    },
    {
      "epoch": 15.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.11021875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8650664247207613,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1065648789611312,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.660201907157898,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009367109422676862,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3250439167022705,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.88,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.726,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.618,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.208,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.800219509824505,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20117776760874384,
      "eval_anthropic_toxic_prompts_runtime": 10.1348,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.335,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34381236840181684,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006386860215746904,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4158466991993418,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32037843397116683,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 81250
    },
    {
      "epoch": 15.6,
      "eval_arxiv_accuracy": 0.3354375,
      "eval_arxiv_bleu_score": 4.0443330103526085,
      "eval_arxiv_bleu_score_sem": 0.11662538402071894,
      "eval_arxiv_emb_cos_sim": 0.7389492988586426,
      "eval_arxiv_emb_cos_sim_sem": 0.007959178143102257,
      "eval_arxiv_emb_top1_equal": 0.1953125,
      "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4950528144836426,
      "eval_arxiv_n_ngrams_match_1": 14.508,
      "eval_arxiv_n_ngrams_match_2": 2.728,
      "eval_arxiv_n_ngrams_match_3": 0.592,
      "eval_arxiv_num_pred_words": 40.412,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.9520282514048,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3462333636337764,
      "eval_arxiv_runtime": 12.2979,
      "eval_arxiv_samples_per_second": 40.657,
      "eval_arxiv_steps_per_second": 0.081,
      "eval_arxiv_token_set_f1": 0.3420483466516191,
      "eval_arxiv_token_set_f1_sem": 0.004247311797294354,
      "eval_arxiv_token_set_precision": 0.28931396658696074,
      "eval_arxiv_token_set_recall": 0.43745266977638797,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 81250
    },
    {
      "epoch": 15.6,
      "eval_python_code_alpaca_accuracy": 0.15525,
      "eval_python_code_alpaca_bleu_score": 4.223859914649429,
      "eval_python_code_alpaca_bleu_score_sem": 0.13389798990119844,
      "eval_python_code_alpaca_emb_cos_sim": 0.7515406012535095,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008981141096359746,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.963074207305908,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.464,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.738,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.874,
      "eval_python_code_alpaca_num_pred_words": 44.388,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.3573890049391,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31377960286240353,
      "eval_python_code_alpaca_runtime": 10.0826,
      "eval_python_code_alpaca_samples_per_second": 49.59,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.46755875263882934,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005557293216973375,
      "eval_python_code_alpaca_token_set_precision": 0.5156998155802238,
      "eval_python_code_alpaca_token_set_recall": 0.45172114970371036,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 81250
    },
    {
      "epoch": 15.6,
      "eval_wikibio_accuracy": 0.311,
      "eval_wikibio_bleu_score": 5.732317034573434,
      "eval_wikibio_bleu_score_sem": 0.20042552946159797,
      "eval_wikibio_emb_cos_sim": 0.7321215867996216,
      "eval_wikibio_emb_cos_sim_sem": 0.011186965560350724,
      "eval_wikibio_emb_top1_equal": 0.1328125,
      "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8367044925689697,
      "eval_wikibio_n_ngrams_match_1": 10.26,
      "eval_wikibio_n_ngrams_match_2": 3.368,
      "eval_wikibio_n_ngrams_match_3": 1.182,
      "eval_wikibio_num_pred_words": 36.982,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.37240175765687,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3524071211803058,
      "eval_wikibio_runtime": 10.025,
      "eval_wikibio_samples_per_second": 49.875,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3201287172492095,
      "eval_wikibio_token_set_f1_sem": 0.005216591469242263,
      "eval_wikibio_token_set_precision": 0.3297787345605933,
      "eval_wikibio_token_set_recall": 0.323932980307254,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 81250
    },
    {
      "epoch": 15.6,
      "eval_nq_accuracy": 0.51759375,
      "eval_nq_bleu_score": 11.190894608645293,
      "eval_nq_bleu_score_sem": 0.48407423216222073,
      "eval_nq_emb_cos_sim": 0.8212544322013855,
      "eval_nq_emb_cos_sim_sem": 0.007672706672318217,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2649385929107666,
      "eval_nq_n_ngrams_match_1": 22.648,
      "eval_nq_n_ngrams_match_2": 8.188,
      "eval_nq_n_ngrams_match_3": 3.718,
      "eval_nq_num_pred_words": 48.948,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.630533199940784,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4364079609747853,
      "eval_nq_runtime": 11.217,
      "eval_nq_samples_per_second": 44.575,
      "eval_nq_steps_per_second": 0.089,
      "eval_nq_token_set_f1": 0.4521435887521291,
      "eval_nq_token_set_f1_sem": 0.0052243279546164275,
      "eval_nq_token_set_precision": 0.4067902147521592,
      "eval_nq_token_set_recall": 0.5180802961290288,
      "eval_nq_true_num_tokens": 64.0,
      "step": 81250
    },
    {
      "epoch": 15.6,
      "learning_rate": 0.001,
      "loss": 2.6447,
      "step": 81252
    },
    {
      "epoch": 15.6,
      "learning_rate": 0.001,
      "loss": 2.6386,
      "step": 81264
    },
    {
      "epoch": 15.61,
      "learning_rate": 0.001,
      "loss": 2.6318,
      "step": 81276
    },
    {
      "epoch": 15.61,
      "learning_rate": 0.001,
      "loss": 2.6308,
      "step": 81288
    },
    {
      "epoch": 15.61,
      "learning_rate": 0.001,
      "loss": 2.6345,
      "step": 81300
    },
    {
      "epoch": 15.61,
      "learning_rate": 0.001,
      "loss": 2.646,
      "step": 81312
    },
    {
      "epoch": 15.62,
      "learning_rate": 0.001,
      "loss": 2.6499,
      "step": 81324
    },
    {
      "epoch": 15.62,
      "learning_rate": 0.001,
      "loss": 2.6416,
      "step": 81336
    },
    {
      "epoch": 15.62,
      "learning_rate": 0.001,
      "loss": 2.6386,
      "step": 81348
    },
    {
      "epoch": 15.62,
      "learning_rate": 0.001,
      "loss": 2.6357,
      "step": 81360
    },
    {
      "epoch": 15.62,
      "learning_rate": 0.001,
      "loss": 2.6372,
      "step": 81372
    },
    {
      "epoch": 15.63,
      "learning_rate": 0.001,
      "loss": 2.626,
      "step": 81384
    },
    {
      "epoch": 15.63,
      "learning_rate": 0.001,
      "loss": 2.6302,
      "step": 81396
    },
    {
      "epoch": 15.63,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 81408
    },
    {
      "epoch": 15.63,
      "learning_rate": 0.001,
      "loss": 2.6375,
      "step": 81420
    },
    {
      "epoch": 15.64,
      "learning_rate": 0.001,
      "loss": 2.6435,
      "step": 81432
    },
    {
      "epoch": 15.64,
      "learning_rate": 0.001,
      "loss": 2.6398,
      "step": 81444
    },
    {
      "epoch": 15.64,
      "learning_rate": 0.001,
      "loss": 2.6348,
      "step": 81456
    },
    {
      "epoch": 15.64,
      "learning_rate": 0.001,
      "loss": 2.632,
      "step": 81468
    },
    {
      "epoch": 15.65,
      "learning_rate": 0.001,
      "loss": 2.6208,
      "step": 81480
    },
    {
      "epoch": 15.65,
      "learning_rate": 0.001,
      "loss": 2.6339,
      "step": 81492
    },
    {
      "epoch": 15.65,
      "learning_rate": 0.001,
      "loss": 2.6355,
      "step": 81504
    },
    {
      "epoch": 15.65,
      "learning_rate": 0.001,
      "loss": 2.6314,
      "step": 81516
    },
    {
      "epoch": 15.65,
      "learning_rate": 0.001,
      "loss": 2.6367,
      "step": 81528
    },
    {
      "epoch": 15.66,
      "learning_rate": 0.001,
      "loss": 2.6374,
      "step": 81540
    },
    {
      "epoch": 15.66,
      "learning_rate": 0.001,
      "loss": 2.6424,
      "step": 81552
    },
    {
      "epoch": 15.66,
      "learning_rate": 0.001,
      "loss": 2.6342,
      "step": 81564
    },
    {
      "epoch": 15.66,
      "learning_rate": 0.001,
      "loss": 2.6328,
      "step": 81576
    },
    {
      "epoch": 15.67,
      "learning_rate": 0.001,
      "loss": 2.6446,
      "step": 81588
    },
    {
      "epoch": 15.67,
      "learning_rate": 0.001,
      "loss": 2.6302,
      "step": 81600
    },
    {
      "epoch": 15.67,
      "learning_rate": 0.001,
      "loss": 2.6265,
      "step": 81612
    },
    {
      "epoch": 15.67,
      "learning_rate": 0.001,
      "loss": 2.6405,
      "step": 81624
    },
    {
      "epoch": 15.68,
      "learning_rate": 0.001,
      "loss": 2.6379,
      "step": 81636
    },
    {
      "epoch": 15.68,
      "learning_rate": 0.001,
      "loss": 2.631,
      "step": 81648
    },
    {
      "epoch": 15.68,
      "learning_rate": 0.001,
      "loss": 2.639,
      "step": 81660
    },
    {
      "epoch": 15.68,
      "learning_rate": 0.001,
      "loss": 2.6453,
      "step": 81672
    },
    {
      "epoch": 15.68,
      "learning_rate": 0.001,
      "loss": 2.6283,
      "step": 81684
    },
    {
      "epoch": 15.69,
      "learning_rate": 0.001,
      "loss": 2.646,
      "step": 81696
    },
    {
      "epoch": 15.69,
      "learning_rate": 0.001,
      "loss": 2.63,
      "step": 81708
    },
    {
      "epoch": 15.69,
      "learning_rate": 0.001,
      "loss": 2.6409,
      "step": 81720
    },
    {
      "epoch": 15.69,
      "learning_rate": 0.001,
      "loss": 2.6306,
      "step": 81732
    },
    {
      "epoch": 15.7,
      "learning_rate": 0.001,
      "loss": 2.643,
      "step": 81744
    },
    {
      "epoch": 15.7,
      "learning_rate": 0.001,
      "loss": 2.6389,
      "step": 81756
    },
    {
      "epoch": 15.7,
      "learning_rate": 0.001,
      "loss": 2.6476,
      "step": 81768
    },
    {
      "epoch": 15.7,
      "learning_rate": 0.001,
      "loss": 2.6407,
      "step": 81780
    },
    {
      "epoch": 15.71,
      "learning_rate": 0.001,
      "loss": 2.6416,
      "step": 81792
    },
    {
      "epoch": 15.71,
      "learning_rate": 0.001,
      "loss": 2.6351,
      "step": 81804
    },
    {
      "epoch": 15.71,
      "learning_rate": 0.001,
      "loss": 2.6385,
      "step": 81816
    },
    {
      "epoch": 15.71,
      "learning_rate": 0.001,
      "loss": 2.644,
      "step": 81828
    },
    {
      "epoch": 15.71,
      "learning_rate": 0.001,
      "loss": 2.639,
      "step": 81840
    },
    {
      "epoch": 15.72,
      "learning_rate": 0.001,
      "loss": 2.6398,
      "step": 81852
    },
    {
      "epoch": 15.72,
      "learning_rate": 0.001,
      "loss": 2.6313,
      "step": 81864
    },
    {
      "epoch": 15.72,
      "eval_ag_news_accuracy": 0.31365625,
      "eval_ag_news_bleu_score": 4.653096769290414,
      "eval_ag_news_bleu_score_sem": 0.14484041879097112,
      "eval_ag_news_emb_cos_sim": 0.7947143316268921,
      "eval_ag_news_emb_cos_sim_sem": 0.007573163718669581,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.661522150039673,
      "eval_ag_news_n_ngrams_match_1": 13.618,
      "eval_ag_news_n_ngrams_match_2": 2.932,
      "eval_ag_news_n_ngrams_match_3": 0.854,
      "eval_ag_news_num_pred_words": 46.554,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.920540708491075,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33406986695124685,
      "eval_ag_news_runtime": 10.1353,
      "eval_ag_news_samples_per_second": 49.332,
      "eval_ag_news_steps_per_second": 0.099,
      "eval_ag_news_token_set_f1": 0.34113471006866314,
      "eval_ag_news_token_set_f1_sem": 0.004315589502120115,
      "eval_ag_news_token_set_precision": 0.32230251196354864,
      "eval_ag_news_token_set_recall": 0.3760528980351533,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 81875
    },
    {
      "epoch": 15.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.113,
      "eval_anthropic_toxic_prompts_bleu_score": 3.074410466826119,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12159790861728094,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6609877347946167,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009216894761861497,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2926738262176514,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.07,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.858,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.68,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.384,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.914732848546176,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20801436501360485,
      "eval_anthropic_toxic_prompts_runtime": 9.6643,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.737,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34938095029612537,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006275991095644129,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4296266110461715,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3205905337601249,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 81875
    },
    {
      "epoch": 15.72,
      "eval_arxiv_accuracy": 0.33609375,
      "eval_arxiv_bleu_score": 4.218588055979579,
      "eval_arxiv_bleu_score_sem": 0.1167802980186407,
      "eval_arxiv_emb_cos_sim": 0.7482851147651672,
      "eval_arxiv_emb_cos_sim_sem": 0.008023437599102195,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5122647285461426,
      "eval_arxiv_n_ngrams_match_1": 14.836,
      "eval_arxiv_n_ngrams_match_2": 2.89,
      "eval_arxiv_n_ngrams_match_3": 0.642,
      "eval_arxiv_num_pred_words": 41.748,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.524104873611144,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3491676657044462,
      "eval_arxiv_runtime": 10.0887,
      "eval_arxiv_samples_per_second": 49.56,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.34631324822890003,
      "eval_arxiv_token_set_f1_sem": 0.004176669327474083,
      "eval_arxiv_token_set_precision": 0.29527977866938615,
      "eval_arxiv_token_set_recall": 0.4368670721152247,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 81875
    },
    {
      "epoch": 15.72,
      "eval_python_code_alpaca_accuracy": 0.15596875,
      "eval_python_code_alpaca_bleu_score": 4.455513912659973,
      "eval_python_code_alpaca_bleu_score_sem": 0.14139354445013547,
      "eval_python_code_alpaca_emb_cos_sim": 0.7462294101715088,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007730929250813478,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.989055871963501,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.472,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.806,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.922,
      "eval_python_code_alpaca_num_pred_words": 44.164,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.866916721238418,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3165625792828408,
      "eval_python_code_alpaca_runtime": 10.1224,
      "eval_python_code_alpaca_samples_per_second": 49.396,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.4589637613160641,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005411980773500886,
      "eval_python_code_alpaca_token_set_precision": 0.5180586119644336,
      "eval_python_code_alpaca_token_set_recall": 0.43098785321148464,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 81875
    },
    {
      "epoch": 15.72,
      "eval_wikibio_accuracy": 0.31315625,
      "eval_wikibio_bleu_score": 5.584470858264593,
      "eval_wikibio_bleu_score_sem": 0.1974678380751109,
      "eval_wikibio_emb_cos_sim": 0.725496768951416,
      "eval_wikibio_emb_cos_sim_sem": 0.009804602929843054,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8564724922180176,
      "eval_wikibio_n_ngrams_match_1": 9.866,
      "eval_wikibio_n_ngrams_match_2": 3.182,
      "eval_wikibio_n_ngrams_match_3": 1.088,
      "eval_wikibio_num_pred_words": 36.088,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.29821193865967,
      "eval_wikibio_pred_num_tokens": 62.8671875,
      "eval_wikibio_rouge_score": 0.3451770807211191,
      "eval_wikibio_runtime": 9.6658,
      "eval_wikibio_samples_per_second": 51.729,
      "eval_wikibio_steps_per_second": 0.103,
      "eval_wikibio_token_set_f1": 0.3143199277366929,
      "eval_wikibio_token_set_f1_sem": 0.0051910819341498115,
      "eval_wikibio_token_set_precision": 0.3204884091735503,
      "eval_wikibio_token_set_recall": 0.3282853772180423,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 81875
    },
    {
      "epoch": 15.72,
      "eval_nq_accuracy": 0.51465625,
      "eval_nq_bleu_score": 11.303224318353855,
      "eval_nq_bleu_score_sem": 0.4823117457591126,
      "eval_nq_emb_cos_sim": 0.8220856785774231,
      "eval_nq_emb_cos_sim_sem": 0.007568303660200669,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.266073226928711,
      "eval_nq_n_ngrams_match_1": 22.558,
      "eval_nq_n_ngrams_match_2": 8.118,
      "eval_nq_n_ngrams_match_3": 3.754,
      "eval_nq_num_pred_words": 49.37,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.64146653201267,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.433758328343818,
      "eval_nq_runtime": 9.908,
      "eval_nq_samples_per_second": 50.464,
      "eval_nq_steps_per_second": 0.101,
      "eval_nq_token_set_f1": 0.44966478491130624,
      "eval_nq_token_set_f1_sem": 0.005027368720173926,
      "eval_nq_token_set_precision": 0.4063033738279913,
      "eval_nq_token_set_recall": 0.5125654540820727,
      "eval_nq_true_num_tokens": 64.0,
      "step": 81875
    },
    {
      "epoch": 15.72,
      "learning_rate": 0.001,
      "loss": 2.6361,
      "step": 81876
    },
    {
      "epoch": 15.72,
      "learning_rate": 0.001,
      "loss": 2.6425,
      "step": 81888
    },
    {
      "epoch": 15.73,
      "learning_rate": 0.001,
      "loss": 2.6328,
      "step": 81900
    },
    {
      "epoch": 15.73,
      "learning_rate": 0.001,
      "loss": 2.6434,
      "step": 81912
    },
    {
      "epoch": 15.73,
      "learning_rate": 0.001,
      "loss": 2.6471,
      "step": 81924
    },
    {
      "epoch": 15.73,
      "learning_rate": 0.001,
      "loss": 2.6456,
      "step": 81936
    },
    {
      "epoch": 15.74,
      "learning_rate": 0.001,
      "loss": 2.6359,
      "step": 81948
    },
    {
      "epoch": 15.74,
      "learning_rate": 0.001,
      "loss": 2.6537,
      "step": 81960
    },
    {
      "epoch": 15.74,
      "learning_rate": 0.001,
      "loss": 2.6341,
      "step": 81972
    },
    {
      "epoch": 15.74,
      "learning_rate": 0.001,
      "loss": 2.6323,
      "step": 81984
    },
    {
      "epoch": 15.74,
      "learning_rate": 0.001,
      "loss": 2.6446,
      "step": 81996
    },
    {
      "epoch": 15.75,
      "learning_rate": 0.001,
      "loss": 2.6406,
      "step": 82008
    },
    {
      "epoch": 15.75,
      "learning_rate": 0.001,
      "loss": 2.6368,
      "step": 82020
    },
    {
      "epoch": 15.75,
      "learning_rate": 0.001,
      "loss": 2.6343,
      "step": 82032
    },
    {
      "epoch": 15.75,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 82044
    },
    {
      "epoch": 15.76,
      "learning_rate": 0.001,
      "loss": 2.6499,
      "step": 82056
    },
    {
      "epoch": 15.76,
      "learning_rate": 0.001,
      "loss": 2.6372,
      "step": 82068
    },
    {
      "epoch": 15.76,
      "learning_rate": 0.001,
      "loss": 2.6332,
      "step": 82080
    },
    {
      "epoch": 15.76,
      "learning_rate": 0.001,
      "loss": 2.6371,
      "step": 82092
    },
    {
      "epoch": 15.76,
      "learning_rate": 0.001,
      "loss": 2.6379,
      "step": 82104
    },
    {
      "epoch": 15.77,
      "learning_rate": 0.001,
      "loss": 2.6407,
      "step": 82116
    },
    {
      "epoch": 15.77,
      "learning_rate": 0.001,
      "loss": 2.6285,
      "step": 82128
    },
    {
      "epoch": 15.77,
      "learning_rate": 0.001,
      "loss": 2.6395,
      "step": 82140
    },
    {
      "epoch": 15.77,
      "learning_rate": 0.001,
      "loss": 2.6399,
      "step": 82152
    },
    {
      "epoch": 15.78,
      "learning_rate": 0.001,
      "loss": 2.6362,
      "step": 82164
    },
    {
      "epoch": 15.78,
      "learning_rate": 0.001,
      "loss": 2.6344,
      "step": 82176
    },
    {
      "epoch": 15.78,
      "learning_rate": 0.001,
      "loss": 2.6352,
      "step": 82188
    },
    {
      "epoch": 15.78,
      "learning_rate": 0.001,
      "loss": 2.6345,
      "step": 82200
    },
    {
      "epoch": 15.79,
      "learning_rate": 0.001,
      "loss": 2.6321,
      "step": 82212
    },
    {
      "epoch": 15.79,
      "learning_rate": 0.001,
      "loss": 2.6396,
      "step": 82224
    },
    {
      "epoch": 15.79,
      "learning_rate": 0.001,
      "loss": 2.6361,
      "step": 82236
    },
    {
      "epoch": 15.79,
      "learning_rate": 0.001,
      "loss": 2.641,
      "step": 82248
    },
    {
      "epoch": 15.79,
      "learning_rate": 0.001,
      "loss": 2.6356,
      "step": 82260
    },
    {
      "epoch": 15.8,
      "learning_rate": 0.001,
      "loss": 2.6361,
      "step": 82272
    },
    {
      "epoch": 15.8,
      "learning_rate": 0.001,
      "loss": 2.6289,
      "step": 82284
    },
    {
      "epoch": 15.8,
      "learning_rate": 0.001,
      "loss": 2.635,
      "step": 82296
    },
    {
      "epoch": 15.8,
      "learning_rate": 0.001,
      "loss": 2.6331,
      "step": 82308
    },
    {
      "epoch": 15.81,
      "learning_rate": 0.001,
      "loss": 2.6376,
      "step": 82320
    },
    {
      "epoch": 15.81,
      "learning_rate": 0.001,
      "loss": 2.6374,
      "step": 82332
    },
    {
      "epoch": 15.81,
      "learning_rate": 0.001,
      "loss": 2.6436,
      "step": 82344
    },
    {
      "epoch": 15.81,
      "learning_rate": 0.001,
      "loss": 2.631,
      "step": 82356
    },
    {
      "epoch": 15.82,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 82368
    },
    {
      "epoch": 15.82,
      "learning_rate": 0.001,
      "loss": 2.6469,
      "step": 82380
    },
    {
      "epoch": 15.82,
      "learning_rate": 0.001,
      "loss": 2.6391,
      "step": 82392
    },
    {
      "epoch": 15.82,
      "learning_rate": 0.001,
      "loss": 2.6334,
      "step": 82404
    },
    {
      "epoch": 15.82,
      "learning_rate": 0.001,
      "loss": 2.6472,
      "step": 82416
    },
    {
      "epoch": 15.83,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 82428
    },
    {
      "epoch": 15.83,
      "learning_rate": 0.001,
      "loss": 2.6468,
      "step": 82440
    },
    {
      "epoch": 15.83,
      "learning_rate": 0.001,
      "loss": 2.633,
      "step": 82452
    },
    {
      "epoch": 15.83,
      "learning_rate": 0.001,
      "loss": 2.6291,
      "step": 82464
    },
    {
      "epoch": 15.84,
      "learning_rate": 0.001,
      "loss": 2.637,
      "step": 82476
    },
    {
      "epoch": 15.84,
      "learning_rate": 0.001,
      "loss": 2.6334,
      "step": 82488
    },
    {
      "epoch": 15.84,
      "learning_rate": 0.001,
      "loss": 2.6336,
      "step": 82500
    },
    {
      "epoch": 15.84,
      "eval_ag_news_accuracy": 0.3149375,
      "eval_ag_news_bleu_score": 4.743920818591882,
      "eval_ag_news_bleu_score_sem": 0.15399761909805726,
      "eval_ag_news_emb_cos_sim": 0.7969221472740173,
      "eval_ag_news_emb_cos_sim_sem": 0.007173364640595812,
      "eval_ag_news_emb_top1_equal": 0.1796875,
      "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.644028902053833,
      "eval_ag_news_n_ngrams_match_1": 13.57,
      "eval_ag_news_n_ngrams_match_2": 2.952,
      "eval_ag_news_n_ngrams_match_3": 0.89,
      "eval_ag_news_num_pred_words": 46.414,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.24561457455161,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3366953221162319,
      "eval_ag_news_runtime": 10.2061,
      "eval_ag_news_samples_per_second": 48.99,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3407323618587906,
      "eval_ag_news_token_set_f1_sem": 0.004428794274326815,
      "eval_ag_news_token_set_precision": 0.3207270524832423,
      "eval_ag_news_token_set_recall": 0.3785358314661035,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 82500
    },
    {
      "epoch": 15.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.11159375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8757697911840285,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11265081132312472,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.663439154624939,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009304339594353981,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2902793884277344,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.902,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.75,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.614,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.484,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.850364289117547,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20290831595159797,
      "eval_anthropic_toxic_prompts_runtime": 9.4882,
      "eval_anthropic_toxic_prompts_samples_per_second": 52.697,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.105,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34820099850808256,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006828062490744303,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.416371584162717,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3252265012940169,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 82500
    },
    {
      "epoch": 15.84,
      "eval_arxiv_accuracy": 0.3378125,
      "eval_arxiv_bleu_score": 4.195023864975421,
      "eval_arxiv_bleu_score_sem": 0.12094020078246574,
      "eval_arxiv_emb_cos_sim": 0.7507302761077881,
      "eval_arxiv_emb_cos_sim_sem": 0.00832024558448353,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.49530029296875,
      "eval_arxiv_n_ngrams_match_1": 14.68,
      "eval_arxiv_n_ngrams_match_2": 2.862,
      "eval_arxiv_n_ngrams_match_3": 0.632,
      "eval_arxiv_num_pred_words": 39.936,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.960184178604294,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.351676517500356,
      "eval_arxiv_runtime": 10.1285,
      "eval_arxiv_samples_per_second": 49.366,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3470788505617729,
      "eval_arxiv_token_set_f1_sem": 0.0042230193260675754,
      "eval_arxiv_token_set_precision": 0.2937546752019646,
      "eval_arxiv_token_set_recall": 0.44480318129726143,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 82500
    },
    {
      "epoch": 15.84,
      "eval_python_code_alpaca_accuracy": 0.15590625,
      "eval_python_code_alpaca_bleu_score": 4.122700851138748,
      "eval_python_code_alpaca_bleu_score_sem": 0.1314244047950497,
      "eval_python_code_alpaca_emb_cos_sim": 0.7421751022338867,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007808900920558632,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9635770320892334,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.418,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.658,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.844,
      "eval_python_code_alpaca_num_pred_words": 45.102,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.367124827372525,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31032192257807634,
      "eval_python_code_alpaca_runtime": 9.6118,
      "eval_python_code_alpaca_samples_per_second": 52.02,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.46543015368581114,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00552760558152969,
      "eval_python_code_alpaca_token_set_precision": 0.5124026161975384,
      "eval_python_code_alpaca_token_set_recall": 0.4454855803306646,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 82500
    },
    {
      "epoch": 15.84,
      "eval_wikibio_accuracy": 0.31265625,
      "eval_wikibio_bleu_score": 5.538278207704747,
      "eval_wikibio_bleu_score_sem": 0.19132065399044007,
      "eval_wikibio_emb_cos_sim": 0.7391377687454224,
      "eval_wikibio_emb_cos_sim_sem": 0.009374521999658715,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8321797847747803,
      "eval_wikibio_n_ngrams_match_1": 9.972,
      "eval_wikibio_n_ngrams_match_2": 3.218,
      "eval_wikibio_n_ngrams_match_3": 1.118,
      "eval_wikibio_num_pred_words": 36.88,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.16305416549366,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34619185338538433,
      "eval_wikibio_runtime": 9.8113,
      "eval_wikibio_samples_per_second": 50.962,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.3139462947599833,
      "eval_wikibio_token_set_f1_sem": 0.0053105548907498345,
      "eval_wikibio_token_set_precision": 0.3241977560214357,
      "eval_wikibio_token_set_recall": 0.31918734131446624,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 82500
    },
    {
      "epoch": 15.84,
      "eval_nq_accuracy": 0.51628125,
      "eval_nq_bleu_score": 11.318945229879374,
      "eval_nq_bleu_score_sem": 0.47357018239481374,
      "eval_nq_emb_cos_sim": 0.8236929774284363,
      "eval_nq_emb_cos_sim_sem": 0.007367660953686526,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.261533260345459,
      "eval_nq_n_ngrams_match_1": 22.708,
      "eval_nq_n_ngrams_match_2": 8.258,
      "eval_nq_n_ngrams_match_3": 3.744,
      "eval_nq_num_pred_words": 49.266,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.597793807511305,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4374386113314346,
      "eval_nq_runtime": 13.4722,
      "eval_nq_samples_per_second": 37.114,
      "eval_nq_steps_per_second": 0.074,
      "eval_nq_token_set_f1": 0.4528090485533119,
      "eval_nq_token_set_f1_sem": 0.004881551339401594,
      "eval_nq_token_set_precision": 0.4083421458933204,
      "eval_nq_token_set_recall": 0.5159063932117688,
      "eval_nq_true_num_tokens": 64.0,
      "step": 82500
    },
    {
      "epoch": 15.84,
      "learning_rate": 0.001,
      "loss": 2.6388,
      "step": 82512
    },
    {
      "epoch": 15.85,
      "learning_rate": 0.001,
      "loss": 2.6295,
      "step": 82524
    },
    {
      "epoch": 15.85,
      "learning_rate": 0.001,
      "loss": 2.6355,
      "step": 82536
    },
    {
      "epoch": 15.85,
      "learning_rate": 0.001,
      "loss": 2.6354,
      "step": 82548
    },
    {
      "epoch": 15.85,
      "learning_rate": 0.001,
      "loss": 2.6374,
      "step": 82560
    },
    {
      "epoch": 15.85,
      "learning_rate": 0.001,
      "loss": 2.6465,
      "step": 82572
    },
    {
      "epoch": 15.86,
      "learning_rate": 0.001,
      "loss": 2.6461,
      "step": 82584
    },
    {
      "epoch": 15.86,
      "learning_rate": 0.001,
      "loss": 2.6351,
      "step": 82596
    },
    {
      "epoch": 15.86,
      "learning_rate": 0.001,
      "loss": 2.6353,
      "step": 82608
    },
    {
      "epoch": 15.86,
      "learning_rate": 0.001,
      "loss": 2.6363,
      "step": 82620
    },
    {
      "epoch": 15.87,
      "learning_rate": 0.001,
      "loss": 2.6353,
      "step": 82632
    },
    {
      "epoch": 15.87,
      "learning_rate": 0.001,
      "loss": 2.6298,
      "step": 82644
    },
    {
      "epoch": 15.87,
      "learning_rate": 0.001,
      "loss": 2.6401,
      "step": 82656
    },
    {
      "epoch": 15.87,
      "learning_rate": 0.001,
      "loss": 2.6289,
      "step": 82668
    },
    {
      "epoch": 15.88,
      "learning_rate": 0.001,
      "loss": 2.6451,
      "step": 82680
    },
    {
      "epoch": 15.88,
      "learning_rate": 0.001,
      "loss": 2.6386,
      "step": 82692
    },
    {
      "epoch": 15.88,
      "learning_rate": 0.001,
      "loss": 2.6327,
      "step": 82704
    },
    {
      "epoch": 15.88,
      "learning_rate": 0.001,
      "loss": 2.6376,
      "step": 82716
    },
    {
      "epoch": 15.88,
      "learning_rate": 0.001,
      "loss": 2.6367,
      "step": 82728
    },
    {
      "epoch": 15.89,
      "learning_rate": 0.001,
      "loss": 2.6408,
      "step": 82740
    },
    {
      "epoch": 15.89,
      "learning_rate": 0.001,
      "loss": 2.64,
      "step": 82752
    },
    {
      "epoch": 15.89,
      "learning_rate": 0.001,
      "loss": 2.6289,
      "step": 82764
    },
    {
      "epoch": 15.89,
      "learning_rate": 0.001,
      "loss": 2.633,
      "step": 82776
    },
    {
      "epoch": 15.9,
      "learning_rate": 0.001,
      "loss": 2.6398,
      "step": 82788
    },
    {
      "epoch": 15.9,
      "learning_rate": 0.001,
      "loss": 2.6356,
      "step": 82800
    },
    {
      "epoch": 15.9,
      "learning_rate": 0.001,
      "loss": 2.6277,
      "step": 82812
    },
    {
      "epoch": 15.9,
      "learning_rate": 0.001,
      "loss": 2.6326,
      "step": 82824
    },
    {
      "epoch": 15.91,
      "learning_rate": 0.001,
      "loss": 2.6276,
      "step": 82836
    },
    {
      "epoch": 15.91,
      "learning_rate": 0.001,
      "loss": 2.6239,
      "step": 82848
    },
    {
      "epoch": 15.91,
      "learning_rate": 0.001,
      "loss": 2.6373,
      "step": 82860
    },
    {
      "epoch": 15.91,
      "learning_rate": 0.001,
      "loss": 2.6393,
      "step": 82872
    },
    {
      "epoch": 15.91,
      "learning_rate": 0.001,
      "loss": 2.6413,
      "step": 82884
    },
    {
      "epoch": 15.92,
      "learning_rate": 0.001,
      "loss": 2.6406,
      "step": 82896
    },
    {
      "epoch": 15.92,
      "learning_rate": 0.001,
      "loss": 2.6303,
      "step": 82908
    },
    {
      "epoch": 15.92,
      "learning_rate": 0.001,
      "loss": 2.646,
      "step": 82920
    },
    {
      "epoch": 15.92,
      "learning_rate": 0.001,
      "loss": 2.6312,
      "step": 82932
    },
    {
      "epoch": 15.93,
      "learning_rate": 0.001,
      "loss": 2.6293,
      "step": 82944
    },
    {
      "epoch": 15.93,
      "learning_rate": 0.001,
      "loss": 2.6231,
      "step": 82956
    },
    {
      "epoch": 15.93,
      "learning_rate": 0.001,
      "loss": 2.6377,
      "step": 82968
    },
    {
      "epoch": 15.93,
      "learning_rate": 0.001,
      "loss": 2.6292,
      "step": 82980
    },
    {
      "epoch": 15.94,
      "learning_rate": 0.001,
      "loss": 2.6319,
      "step": 82992
    },
    {
      "epoch": 15.94,
      "learning_rate": 0.001,
      "loss": 2.6313,
      "step": 83004
    },
    {
      "epoch": 15.94,
      "learning_rate": 0.001,
      "loss": 2.6283,
      "step": 83016
    },
    {
      "epoch": 15.94,
      "learning_rate": 0.001,
      "loss": 2.6445,
      "step": 83028
    },
    {
      "epoch": 15.94,
      "learning_rate": 0.001,
      "loss": 2.6474,
      "step": 83040
    },
    {
      "epoch": 15.95,
      "learning_rate": 0.001,
      "loss": 2.636,
      "step": 83052
    },
    {
      "epoch": 15.95,
      "learning_rate": 0.001,
      "loss": 2.6451,
      "step": 83064
    },
    {
      "epoch": 15.95,
      "learning_rate": 0.001,
      "loss": 2.6384,
      "step": 83076
    },
    {
      "epoch": 15.95,
      "learning_rate": 0.001,
      "loss": 2.636,
      "step": 83088
    },
    {
      "epoch": 15.96,
      "learning_rate": 0.001,
      "loss": 2.6367,
      "step": 83100
    },
    {
      "epoch": 15.96,
      "learning_rate": 0.001,
      "loss": 2.6332,
      "step": 83112
    },
    {
      "epoch": 15.96,
      "learning_rate": 0.001,
      "loss": 2.6495,
      "step": 83124
    },
    {
      "epoch": 15.96,
      "eval_ag_news_accuracy": 0.31571875,
      "eval_ag_news_bleu_score": 4.7158153481480785,
      "eval_ag_news_bleu_score_sem": 0.149493396097868,
      "eval_ag_news_emb_cos_sim": 0.8041837215423584,
      "eval_ag_news_emb_cos_sim_sem": 0.006463236341302807,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6436686515808105,
      "eval_ag_news_n_ngrams_match_1": 13.762,
      "eval_ag_news_n_ngrams_match_2": 3.04,
      "eval_ag_news_n_ngrams_match_3": 0.84,
      "eval_ag_news_num_pred_words": 46.724,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.231839055277746,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33859412448786486,
      "eval_ag_news_runtime": 10.5173,
      "eval_ag_news_samples_per_second": 47.541,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.34299660295670165,
      "eval_ag_news_token_set_f1_sem": 0.004379750457796472,
      "eval_ag_news_token_set_precision": 0.3262943497457247,
      "eval_ag_news_token_set_recall": 0.3780150029047542,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 83125
    },
    {
      "epoch": 15.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.1111875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.966256704995643,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11747048160457095,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6478981971740723,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00952050682909931,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2904324531555176,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.922,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.652,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.238,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.854474447370443,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20393821476296053,
      "eval_anthropic_toxic_prompts_runtime": 9.9979,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.01,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3500859084481026,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006656529371002729,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4190015808094765,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3309564905086757,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 83125
    },
    {
      "epoch": 15.96,
      "eval_arxiv_accuracy": 0.33925,
      "eval_arxiv_bleu_score": 4.23220891877265,
      "eval_arxiv_bleu_score_sem": 0.12432208954775212,
      "eval_arxiv_emb_cos_sim": 0.7382143139839172,
      "eval_arxiv_emb_cos_sim_sem": 0.008107729979034965,
      "eval_arxiv_emb_top1_equal": 0.1953125,
      "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.49281644821167,
      "eval_arxiv_n_ngrams_match_1": 14.534,
      "eval_arxiv_n_ngrams_match_2": 2.844,
      "eval_arxiv_n_ngrams_match_3": 0.65,
      "eval_arxiv_num_pred_words": 40.24,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.87841778748886,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34569924447872247,
      "eval_arxiv_runtime": 10.3184,
      "eval_arxiv_samples_per_second": 48.457,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3390566314840279,
      "eval_arxiv_token_set_f1_sem": 0.004331655636610428,
      "eval_arxiv_token_set_precision": 0.2888177199714239,
      "eval_arxiv_token_set_recall": 0.42727804965630967,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 83125
    },
    {
      "epoch": 15.96,
      "eval_python_code_alpaca_accuracy": 0.15775,
      "eval_python_code_alpaca_bleu_score": 4.216840807094817,
      "eval_python_code_alpaca_bleu_score_sem": 0.1336840732887577,
      "eval_python_code_alpaca_emb_cos_sim": 0.7436763644218445,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008376523481046717,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9375433921813965,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.246,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.626,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.816,
      "eval_python_code_alpaca_num_pred_words": 42.952,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.86943452742655,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31167991216738633,
      "eval_python_code_alpaca_runtime": 10.7835,
      "eval_python_code_alpaca_samples_per_second": 46.367,
      "eval_python_code_alpaca_steps_per_second": 0.093,
      "eval_python_code_alpaca_token_set_f1": 0.45965523369747596,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006060744782603551,
      "eval_python_code_alpaca_token_set_precision": 0.5054978829886636,
      "eval_python_code_alpaca_token_set_recall": 0.4481730812637992,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 83125
    },
    {
      "epoch": 15.96,
      "eval_wikibio_accuracy": 0.314875,
      "eval_wikibio_bleu_score": 5.605258287117928,
      "eval_wikibio_bleu_score_sem": 0.19961011748198315,
      "eval_wikibio_emb_cos_sim": 0.7294102311134338,
      "eval_wikibio_emb_cos_sim_sem": 0.01004817944555513,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8227322101593018,
      "eval_wikibio_n_ngrams_match_1": 10.124,
      "eval_wikibio_n_ngrams_match_2": 3.308,
      "eval_wikibio_n_ngrams_match_3": 1.138,
      "eval_wikibio_num_pred_words": 36.982,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.728978974335945,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3503548495126094,
      "eval_wikibio_runtime": 10.5536,
      "eval_wikibio_samples_per_second": 47.377,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.3173837592198084,
      "eval_wikibio_token_set_f1_sem": 0.005280946933688539,
      "eval_wikibio_token_set_precision": 0.3282269135592735,
      "eval_wikibio_token_set_recall": 0.3243064622440714,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 83125
    },
    {
      "epoch": 15.96,
      "eval_nq_accuracy": 0.51765625,
      "eval_nq_bleu_score": 11.121061925867597,
      "eval_nq_bleu_score_sem": 0.4568811427806432,
      "eval_nq_emb_cos_sim": 0.8262262344360352,
      "eval_nq_emb_cos_sim_sem": 0.007302851613022423,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2610249519348145,
      "eval_nq_n_ngrams_match_1": 22.408,
      "eval_nq_n_ngrams_match_2": 8.124,
      "eval_nq_n_ngrams_match_3": 3.688,
      "eval_nq_num_pred_words": 48.944,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.592916407911952,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.433072069476134,
      "eval_nq_runtime": 10.0085,
      "eval_nq_samples_per_second": 49.958,
      "eval_nq_steps_per_second": 0.1,
      "eval_nq_token_set_f1": 0.45073766113574465,
      "eval_nq_token_set_f1_sem": 0.004862898292423386,
      "eval_nq_token_set_precision": 0.4054533992713064,
      "eval_nq_token_set_recall": 0.5165805774669422,
      "eval_nq_true_num_tokens": 64.0,
      "step": 83125
    },
    {
      "epoch": 15.96,
      "learning_rate": 0.001,
      "loss": 2.6291,
      "step": 83136
    },
    {
      "epoch": 15.97,
      "learning_rate": 0.001,
      "loss": 2.6414,
      "step": 83148
    },
    {
      "epoch": 15.97,
      "learning_rate": 0.001,
      "loss": 2.6486,
      "step": 83160
    },
    {
      "epoch": 15.97,
      "learning_rate": 0.001,
      "loss": 2.6524,
      "step": 83172
    },
    {
      "epoch": 15.97,
      "learning_rate": 0.001,
      "loss": 2.6396,
      "step": 83184
    },
    {
      "epoch": 15.97,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 83196
    },
    {
      "epoch": 15.98,
      "learning_rate": 0.001,
      "loss": 2.6472,
      "step": 83208
    },
    {
      "epoch": 15.98,
      "learning_rate": 0.001,
      "loss": 2.6433,
      "step": 83220
    },
    {
      "epoch": 15.98,
      "learning_rate": 0.001,
      "loss": 2.632,
      "step": 83232
    },
    {
      "epoch": 15.98,
      "learning_rate": 0.001,
      "loss": 2.6261,
      "step": 83244
    },
    {
      "epoch": 15.99,
      "learning_rate": 0.001,
      "loss": 2.6394,
      "step": 83256
    },
    {
      "epoch": 15.99,
      "learning_rate": 0.001,
      "loss": 2.6415,
      "step": 83268
    },
    {
      "epoch": 15.99,
      "learning_rate": 0.001,
      "loss": 2.6361,
      "step": 83280
    },
    {
      "epoch": 15.99,
      "learning_rate": 0.001,
      "loss": 2.6442,
      "step": 83292
    },
    {
      "epoch": 16.0,
      "learning_rate": 0.001,
      "loss": 2.6426,
      "step": 83304
    },
    {
      "epoch": 16.0,
      "learning_rate": 0.001,
      "loss": 2.6368,
      "step": 83316
    },
    {
      "epoch": 16.0,
      "learning_rate": 0.001,
      "loss": 2.6352,
      "step": 83328
    },
    {
      "epoch": 16.0,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 83340
    },
    {
      "epoch": 16.0,
      "learning_rate": 0.001,
      "loss": 2.6171,
      "step": 83352
    },
    {
      "epoch": 16.01,
      "learning_rate": 0.001,
      "loss": 2.6304,
      "step": 83364
    },
    {
      "epoch": 16.01,
      "learning_rate": 0.001,
      "loss": 2.6274,
      "step": 83376
    },
    {
      "epoch": 16.01,
      "learning_rate": 0.001,
      "loss": 2.6245,
      "step": 83388
    },
    {
      "epoch": 16.01,
      "learning_rate": 0.001,
      "loss": 2.621,
      "step": 83400
    },
    {
      "epoch": 16.02,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 83412
    },
    {
      "epoch": 16.02,
      "learning_rate": 0.001,
      "loss": 2.6137,
      "step": 83424
    },
    {
      "epoch": 16.02,
      "learning_rate": 0.001,
      "loss": 2.6221,
      "step": 83436
    },
    {
      "epoch": 16.02,
      "learning_rate": 0.001,
      "loss": 2.6213,
      "step": 83448
    },
    {
      "epoch": 16.03,
      "learning_rate": 0.001,
      "loss": 2.6155,
      "step": 83460
    },
    {
      "epoch": 16.03,
      "learning_rate": 0.001,
      "loss": 2.6218,
      "step": 83472
    },
    {
      "epoch": 16.03,
      "learning_rate": 0.001,
      "loss": 2.6184,
      "step": 83484
    },
    {
      "epoch": 16.03,
      "learning_rate": 0.001,
      "loss": 2.6187,
      "step": 83496
    },
    {
      "epoch": 16.03,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 83508
    },
    {
      "epoch": 16.04,
      "learning_rate": 0.001,
      "loss": 2.6112,
      "step": 83520
    },
    {
      "epoch": 16.04,
      "learning_rate": 0.001,
      "loss": 2.6113,
      "step": 83532
    },
    {
      "epoch": 16.04,
      "learning_rate": 0.001,
      "loss": 2.6272,
      "step": 83544
    },
    {
      "epoch": 16.04,
      "learning_rate": 0.001,
      "loss": 2.6249,
      "step": 83556
    },
    {
      "epoch": 16.05,
      "learning_rate": 0.001,
      "loss": 2.625,
      "step": 83568
    },
    {
      "epoch": 16.05,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 83580
    },
    {
      "epoch": 16.05,
      "learning_rate": 0.001,
      "loss": 2.6198,
      "step": 83592
    },
    {
      "epoch": 16.05,
      "learning_rate": 0.001,
      "loss": 2.6224,
      "step": 83604
    },
    {
      "epoch": 16.06,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 83616
    },
    {
      "epoch": 16.06,
      "learning_rate": 0.001,
      "loss": 2.6299,
      "step": 83628
    },
    {
      "epoch": 16.06,
      "learning_rate": 0.001,
      "loss": 2.619,
      "step": 83640
    },
    {
      "epoch": 16.06,
      "learning_rate": 0.001,
      "loss": 2.6157,
      "step": 83652
    },
    {
      "epoch": 16.06,
      "learning_rate": 0.001,
      "loss": 2.63,
      "step": 83664
    },
    {
      "epoch": 16.07,
      "learning_rate": 0.001,
      "loss": 2.6111,
      "step": 83676
    },
    {
      "epoch": 16.07,
      "learning_rate": 0.001,
      "loss": 2.6288,
      "step": 83688
    },
    {
      "epoch": 16.07,
      "learning_rate": 0.001,
      "loss": 2.6177,
      "step": 83700
    },
    {
      "epoch": 16.07,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 83712
    },
    {
      "epoch": 16.08,
      "learning_rate": 0.001,
      "loss": 2.6216,
      "step": 83724
    },
    {
      "epoch": 16.08,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 83736
    },
    {
      "epoch": 16.08,
      "learning_rate": 0.001,
      "loss": 2.628,
      "step": 83748
    },
    {
      "epoch": 16.08,
      "eval_ag_news_accuracy": 0.31571875,
      "eval_ag_news_bleu_score": 4.618034986198973,
      "eval_ag_news_bleu_score_sem": 0.14789886440354028,
      "eval_ag_news_emb_cos_sim": 0.7968583106994629,
      "eval_ag_news_emb_cos_sim_sem": 0.007380884527406819,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6520488262176514,
      "eval_ag_news_n_ngrams_match_1": 13.628,
      "eval_ag_news_n_ngrams_match_2": 2.94,
      "eval_ag_news_n_ngrams_match_3": 0.836,
      "eval_ag_news_num_pred_words": 46.318,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.55357476106492,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33657850819526497,
      "eval_ag_news_runtime": 9.8453,
      "eval_ag_news_samples_per_second": 50.785,
      "eval_ag_news_steps_per_second": 0.102,
      "eval_ag_news_token_set_f1": 0.3380274685715532,
      "eval_ag_news_token_set_f1_sem": 0.004571799012330631,
      "eval_ag_news_token_set_precision": 0.3217310225997908,
      "eval_ag_news_token_set_recall": 0.3753033130487659,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 83750
    },
    {
      "epoch": 16.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.1109375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0222862580351335,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11738101824093727,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6538487672805786,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010066713522187796,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3295137882232666,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.938,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.828,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.686,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.164,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.92476105428126,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20294638649351732,
      "eval_anthropic_toxic_prompts_runtime": 10.8426,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.115,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.092,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3466827605390762,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006285916213736284,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41859248089187356,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32533021540303253,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 83750
    },
    {
      "epoch": 16.08,
      "eval_arxiv_accuracy": 0.34034375,
      "eval_arxiv_bleu_score": 4.127423373112979,
      "eval_arxiv_bleu_score_sem": 0.12185425671865842,
      "eval_arxiv_emb_cos_sim": 0.73603355884552,
      "eval_arxiv_emb_cos_sim_sem": 0.00845819748801575,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5093226432800293,
      "eval_arxiv_n_ngrams_match_1": 14.444,
      "eval_arxiv_n_ngrams_match_2": 2.76,
      "eval_arxiv_n_ngrams_match_3": 0.616,
      "eval_arxiv_num_pred_words": 40.438,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.425619046493296,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34379042548984545,
      "eval_arxiv_runtime": 10.0146,
      "eval_arxiv_samples_per_second": 49.927,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.3358893421196323,
      "eval_arxiv_token_set_f1_sem": 0.004352717057264788,
      "eval_arxiv_token_set_precision": 0.2869200031339711,
      "eval_arxiv_token_set_recall": 0.42342576457847847,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 83750
    },
    {
      "epoch": 16.08,
      "eval_python_code_alpaca_accuracy": 0.15371875,
      "eval_python_code_alpaca_bleu_score": 4.368329700521992,
      "eval_python_code_alpaca_bleu_score_sem": 0.133721985079977,
      "eval_python_code_alpaca_emb_cos_sim": 0.7390093803405762,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00882011445181232,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 3.0144386291503906,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.618,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.738,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.842,
      "eval_python_code_alpaca_num_pred_words": 42.89,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 20.377648311324897,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3275447197112485,
      "eval_python_code_alpaca_runtime": 9.4836,
      "eval_python_code_alpaca_samples_per_second": 52.722,
      "eval_python_code_alpaca_steps_per_second": 0.105,
      "eval_python_code_alpaca_token_set_f1": 0.46918880461179113,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005625904039514021,
      "eval_python_code_alpaca_token_set_precision": 0.5273688185119285,
      "eval_python_code_alpaca_token_set_recall": 0.44039669123479835,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 83750
    },
    {
      "epoch": 16.08,
      "eval_wikibio_accuracy": 0.31578125,
      "eval_wikibio_bleu_score": 5.698544085571491,
      "eval_wikibio_bleu_score_sem": 0.21452601186787376,
      "eval_wikibio_emb_cos_sim": 0.7213823795318604,
      "eval_wikibio_emb_cos_sim_sem": 0.010027102070032635,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8448588848114014,
      "eval_wikibio_n_ngrams_match_1": 9.81,
      "eval_wikibio_n_ngrams_match_2": 3.258,
      "eval_wikibio_n_ngrams_match_3": 1.192,
      "eval_wikibio_num_pred_words": 36.308,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.75208645589393,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3413679034825258,
      "eval_wikibio_runtime": 9.9827,
      "eval_wikibio_samples_per_second": 50.086,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3072155253130508,
      "eval_wikibio_token_set_f1_sem": 0.005834321851084432,
      "eval_wikibio_token_set_precision": 0.31666316004807177,
      "eval_wikibio_token_set_recall": 0.31865292330922224,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 83750
    },
    {
      "epoch": 16.08,
      "eval_nq_accuracy": 0.5160625,
      "eval_nq_bleu_score": 11.519020405608947,
      "eval_nq_bleu_score_sem": 0.4763464428342576,
      "eval_nq_emb_cos_sim": 0.8216896653175354,
      "eval_nq_emb_cos_sim_sem": 0.0074411275204525704,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2601318359375,
      "eval_nq_n_ngrams_match_1": 22.814,
      "eval_nq_n_ngrams_match_2": 8.24,
      "eval_nq_n_ngrams_match_3": 3.81,
      "eval_nq_num_pred_words": 49.148,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.584352645592952,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4403451104988666,
      "eval_nq_runtime": 10.4114,
      "eval_nq_samples_per_second": 48.024,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4559059226611072,
      "eval_nq_token_set_f1_sem": 0.0048010431161150755,
      "eval_nq_token_set_precision": 0.41266719159446524,
      "eval_nq_token_set_recall": 0.5184486411962422,
      "eval_nq_true_num_tokens": 64.0,
      "step": 83750
    },
    {
      "epoch": 16.08,
      "learning_rate": 0.001,
      "loss": 2.6377,
      "step": 83760
    },
    {
      "epoch": 16.09,
      "learning_rate": 0.001,
      "loss": 2.6327,
      "step": 83772
    },
    {
      "epoch": 16.09,
      "learning_rate": 0.001,
      "loss": 2.6349,
      "step": 83784
    },
    {
      "epoch": 16.09,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 83796
    },
    {
      "epoch": 16.09,
      "learning_rate": 0.001,
      "loss": 2.6257,
      "step": 83808
    },
    {
      "epoch": 16.09,
      "learning_rate": 0.001,
      "loss": 2.6296,
      "step": 83820
    },
    {
      "epoch": 16.1,
      "learning_rate": 0.001,
      "loss": 2.6226,
      "step": 83832
    },
    {
      "epoch": 16.1,
      "learning_rate": 0.001,
      "loss": 2.6152,
      "step": 83844
    },
    {
      "epoch": 16.1,
      "learning_rate": 0.001,
      "loss": 2.6121,
      "step": 83856
    },
    {
      "epoch": 16.1,
      "learning_rate": 0.001,
      "loss": 2.6223,
      "step": 83868
    },
    {
      "epoch": 16.11,
      "learning_rate": 0.001,
      "loss": 2.6311,
      "step": 83880
    },
    {
      "epoch": 16.11,
      "learning_rate": 0.001,
      "loss": 2.6207,
      "step": 83892
    },
    {
      "epoch": 16.11,
      "learning_rate": 0.001,
      "loss": 2.6284,
      "step": 83904
    },
    {
      "epoch": 16.11,
      "learning_rate": 0.001,
      "loss": 2.622,
      "step": 83916
    },
    {
      "epoch": 16.12,
      "learning_rate": 0.001,
      "loss": 2.6151,
      "step": 83928
    },
    {
      "epoch": 16.12,
      "learning_rate": 0.001,
      "loss": 2.6243,
      "step": 83940
    },
    {
      "epoch": 16.12,
      "learning_rate": 0.001,
      "loss": 2.6194,
      "step": 83952
    },
    {
      "epoch": 16.12,
      "learning_rate": 0.001,
      "loss": 2.6235,
      "step": 83964
    },
    {
      "epoch": 16.12,
      "learning_rate": 0.001,
      "loss": 2.6246,
      "step": 83976
    },
    {
      "epoch": 16.13,
      "learning_rate": 0.001,
      "loss": 2.6273,
      "step": 83988
    },
    {
      "epoch": 16.13,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 84000
    },
    {
      "epoch": 16.13,
      "learning_rate": 0.001,
      "loss": 2.6333,
      "step": 84012
    },
    {
      "epoch": 16.13,
      "learning_rate": 0.001,
      "loss": 2.6312,
      "step": 84024
    },
    {
      "epoch": 16.14,
      "learning_rate": 0.001,
      "loss": 2.6152,
      "step": 84036
    },
    {
      "epoch": 16.14,
      "learning_rate": 0.001,
      "loss": 2.6343,
      "step": 84048
    },
    {
      "epoch": 16.14,
      "learning_rate": 0.001,
      "loss": 2.6167,
      "step": 84060
    },
    {
      "epoch": 16.14,
      "learning_rate": 0.001,
      "loss": 2.6234,
      "step": 84072
    },
    {
      "epoch": 16.15,
      "learning_rate": 0.001,
      "loss": 2.6369,
      "step": 84084
    },
    {
      "epoch": 16.15,
      "learning_rate": 0.001,
      "loss": 2.6258,
      "step": 84096
    },
    {
      "epoch": 16.15,
      "learning_rate": 0.001,
      "loss": 2.6303,
      "step": 84108
    },
    {
      "epoch": 16.15,
      "learning_rate": 0.001,
      "loss": 2.6273,
      "step": 84120
    },
    {
      "epoch": 16.15,
      "learning_rate": 0.001,
      "loss": 2.6277,
      "step": 84132
    },
    {
      "epoch": 16.16,
      "learning_rate": 0.001,
      "loss": 2.6222,
      "step": 84144
    },
    {
      "epoch": 16.16,
      "learning_rate": 0.001,
      "loss": 2.6205,
      "step": 84156
    },
    {
      "epoch": 16.16,
      "learning_rate": 0.001,
      "loss": 2.6221,
      "step": 84168
    },
    {
      "epoch": 16.16,
      "learning_rate": 0.001,
      "loss": 2.6219,
      "step": 84180
    },
    {
      "epoch": 16.17,
      "learning_rate": 0.001,
      "loss": 2.6249,
      "step": 84192
    },
    {
      "epoch": 16.17,
      "learning_rate": 0.001,
      "loss": 2.6221,
      "step": 84204
    },
    {
      "epoch": 16.17,
      "learning_rate": 0.001,
      "loss": 2.6182,
      "step": 84216
    },
    {
      "epoch": 16.17,
      "learning_rate": 0.001,
      "loss": 2.6208,
      "step": 84228
    },
    {
      "epoch": 16.18,
      "learning_rate": 0.001,
      "loss": 2.6252,
      "step": 84240
    },
    {
      "epoch": 16.18,
      "learning_rate": 0.001,
      "loss": 2.6221,
      "step": 84252
    },
    {
      "epoch": 16.18,
      "learning_rate": 0.001,
      "loss": 2.6284,
      "step": 84264
    },
    {
      "epoch": 16.18,
      "learning_rate": 0.001,
      "loss": 2.6267,
      "step": 84276
    },
    {
      "epoch": 16.18,
      "learning_rate": 0.001,
      "loss": 2.6296,
      "step": 84288
    },
    {
      "epoch": 16.19,
      "learning_rate": 0.001,
      "loss": 2.6327,
      "step": 84300
    },
    {
      "epoch": 16.19,
      "learning_rate": 0.001,
      "loss": 2.6287,
      "step": 84312
    },
    {
      "epoch": 16.19,
      "learning_rate": 0.001,
      "loss": 2.6225,
      "step": 84324
    },
    {
      "epoch": 16.19,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 84336
    },
    {
      "epoch": 16.2,
      "learning_rate": 0.001,
      "loss": 2.6138,
      "step": 84348
    },
    {
      "epoch": 16.2,
      "learning_rate": 0.001,
      "loss": 2.6338,
      "step": 84360
    },
    {
      "epoch": 16.2,
      "learning_rate": 0.001,
      "loss": 2.6146,
      "step": 84372
    },
    {
      "epoch": 16.2,
      "eval_ag_news_accuracy": 0.3146875,
      "eval_ag_news_bleu_score": 4.88589691783047,
      "eval_ag_news_bleu_score_sem": 0.15371935382029328,
      "eval_ag_news_emb_cos_sim": 0.7930101156234741,
      "eval_ag_news_emb_cos_sim_sem": 0.008083512004753901,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6391215324401855,
      "eval_ag_news_n_ngrams_match_1": 13.8,
      "eval_ag_news_n_ngrams_match_2": 3.044,
      "eval_ag_news_n_ngrams_match_3": 0.918,
      "eval_ag_news_num_pred_words": 46.578,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.058388976074355,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3414253069014549,
      "eval_ag_news_runtime": 10.3437,
      "eval_ag_news_samples_per_second": 48.339,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.34519393810923765,
      "eval_ag_news_token_set_f1_sem": 0.004459209421154889,
      "eval_ag_news_token_set_precision": 0.3295150500837755,
      "eval_ag_news_token_set_recall": 0.3798884725693181,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 84375
    },
    {
      "epoch": 16.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.112625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9601658378255724,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12681353408248747,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6597671508789062,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010072174173247989,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.289334535598755,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.044,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.64,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.202,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.825006627975274,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20684666386602268,
      "eval_anthropic_toxic_prompts_runtime": 9.7033,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.529,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34441522601250485,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006292221899092376,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42216671479720663,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3192011476426655,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 84375
    },
    {
      "epoch": 16.2,
      "eval_arxiv_accuracy": 0.3398125,
      "eval_arxiv_bleu_score": 4.211391263269069,
      "eval_arxiv_bleu_score_sem": 0.12270232596287158,
      "eval_arxiv_emb_cos_sim": 0.7509287595748901,
      "eval_arxiv_emb_cos_sim_sem": 0.0081844379018693,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.479011297225952,
      "eval_arxiv_n_ngrams_match_1": 14.416,
      "eval_arxiv_n_ngrams_match_2": 2.816,
      "eval_arxiv_n_ngrams_match_3": 0.644,
      "eval_arxiv_num_pred_words": 39.982,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.427644918602795,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34605244704182136,
      "eval_arxiv_runtime": 9.5925,
      "eval_arxiv_samples_per_second": 52.124,
      "eval_arxiv_steps_per_second": 0.104,
      "eval_arxiv_token_set_f1": 0.3380610236518875,
      "eval_arxiv_token_set_f1_sem": 0.0042917909115546245,
      "eval_arxiv_token_set_precision": 0.2876701786778184,
      "eval_arxiv_token_set_recall": 0.42752571493336267,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 84375
    },
    {
      "epoch": 16.2,
      "eval_python_code_alpaca_accuracy": 0.1561875,
      "eval_python_code_alpaca_bleu_score": 4.239576376436463,
      "eval_python_code_alpaca_bleu_score_sem": 0.14032350530114324,
      "eval_python_code_alpaca_emb_cos_sim": 0.724153995513916,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011613196639341395,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.981078863143921,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.276,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.666,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.864,
      "eval_python_code_alpaca_num_pred_words": 43.224,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.709068566420875,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.313791982881696,
      "eval_python_code_alpaca_runtime": 10.0186,
      "eval_python_code_alpaca_samples_per_second": 49.907,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4527995957374217,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006052336887982428,
      "eval_python_code_alpaca_token_set_precision": 0.5023335821693716,
      "eval_python_code_alpaca_token_set_recall": 0.43624477146604856,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 84375
    },
    {
      "epoch": 16.2,
      "eval_wikibio_accuracy": 0.31346875,
      "eval_wikibio_bleu_score": 5.629230960932355,
      "eval_wikibio_bleu_score_sem": 0.20059788976519283,
      "eval_wikibio_emb_cos_sim": 0.7350760698318481,
      "eval_wikibio_emb_cos_sim_sem": 0.01087538510456632,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.836883783340454,
      "eval_wikibio_n_ngrams_match_1": 10.11,
      "eval_wikibio_n_ngrams_match_2": 3.348,
      "eval_wikibio_n_ngrams_match_3": 1.192,
      "eval_wikibio_num_pred_words": 37.386,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.38071664671274,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3462497533280867,
      "eval_wikibio_runtime": 9.5887,
      "eval_wikibio_samples_per_second": 52.145,
      "eval_wikibio_steps_per_second": 0.104,
      "eval_wikibio_token_set_f1": 0.3132445112394064,
      "eval_wikibio_token_set_f1_sem": 0.005398729135291608,
      "eval_wikibio_token_set_precision": 0.32755744273525395,
      "eval_wikibio_token_set_recall": 0.31590463957420123,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 84375
    },
    {
      "epoch": 16.2,
      "eval_nq_accuracy": 0.51740625,
      "eval_nq_bleu_score": 11.361840306640428,
      "eval_nq_bleu_score_sem": 0.46694134822007893,
      "eval_nq_emb_cos_sim": 0.8280055522918701,
      "eval_nq_emb_cos_sim_sem": 0.007018084455425043,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.257619619369507,
      "eval_nq_n_ngrams_match_1": 22.81,
      "eval_nq_n_ngrams_match_2": 8.182,
      "eval_nq_n_ngrams_match_3": 3.73,
      "eval_nq_num_pred_words": 49.188,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.560304895309123,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.440902518635086,
      "eval_nq_runtime": 9.9465,
      "eval_nq_samples_per_second": 50.269,
      "eval_nq_steps_per_second": 0.101,
      "eval_nq_token_set_f1": 0.4540385159517221,
      "eval_nq_token_set_f1_sem": 0.005017009922286769,
      "eval_nq_token_set_precision": 0.411597901664635,
      "eval_nq_token_set_recall": 0.5156963591591491,
      "eval_nq_true_num_tokens": 64.0,
      "step": 84375
    },
    {
      "epoch": 16.2,
      "learning_rate": 0.001,
      "loss": 2.6402,
      "step": 84384
    },
    {
      "epoch": 16.21,
      "learning_rate": 0.001,
      "loss": 2.6168,
      "step": 84396
    },
    {
      "epoch": 16.21,
      "learning_rate": 0.001,
      "loss": 2.6203,
      "step": 84408
    },
    {
      "epoch": 16.21,
      "learning_rate": 0.001,
      "loss": 2.6293,
      "step": 84420
    },
    {
      "epoch": 16.21,
      "learning_rate": 0.001,
      "loss": 2.6277,
      "step": 84432
    },
    {
      "epoch": 16.21,
      "learning_rate": 0.001,
      "loss": 2.6258,
      "step": 84444
    },
    {
      "epoch": 16.22,
      "learning_rate": 0.001,
      "loss": 2.6301,
      "step": 84456
    },
    {
      "epoch": 16.22,
      "learning_rate": 0.001,
      "loss": 2.6288,
      "step": 84468
    },
    {
      "epoch": 16.22,
      "learning_rate": 0.001,
      "loss": 2.6285,
      "step": 84480
    },
    {
      "epoch": 16.22,
      "learning_rate": 0.001,
      "loss": 2.6398,
      "step": 84492
    },
    {
      "epoch": 16.23,
      "learning_rate": 0.001,
      "loss": 2.6236,
      "step": 84504
    },
    {
      "epoch": 16.23,
      "learning_rate": 0.001,
      "loss": 2.6295,
      "step": 84516
    },
    {
      "epoch": 16.23,
      "learning_rate": 0.001,
      "loss": 2.6177,
      "step": 84528
    },
    {
      "epoch": 16.23,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 84540
    },
    {
      "epoch": 16.24,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 84552
    },
    {
      "epoch": 16.24,
      "learning_rate": 0.001,
      "loss": 2.6162,
      "step": 84564
    },
    {
      "epoch": 16.24,
      "learning_rate": 0.001,
      "loss": 2.6184,
      "step": 84576
    },
    {
      "epoch": 16.24,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 84588
    },
    {
      "epoch": 16.24,
      "learning_rate": 0.001,
      "loss": 2.6277,
      "step": 84600
    },
    {
      "epoch": 16.25,
      "learning_rate": 0.001,
      "loss": 2.6261,
      "step": 84612
    },
    {
      "epoch": 16.25,
      "learning_rate": 0.001,
      "loss": 2.6307,
      "step": 84624
    },
    {
      "epoch": 16.25,
      "learning_rate": 0.001,
      "loss": 2.6177,
      "step": 84636
    },
    {
      "epoch": 16.25,
      "learning_rate": 0.001,
      "loss": 2.6271,
      "step": 84648
    },
    {
      "epoch": 16.26,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 84660
    },
    {
      "epoch": 16.26,
      "learning_rate": 0.001,
      "loss": 2.6289,
      "step": 84672
    },
    {
      "epoch": 16.26,
      "learning_rate": 0.001,
      "loss": 2.6112,
      "step": 84684
    },
    {
      "epoch": 16.26,
      "learning_rate": 0.001,
      "loss": 2.6208,
      "step": 84696
    },
    {
      "epoch": 16.26,
      "learning_rate": 0.001,
      "loss": 2.6214,
      "step": 84708
    },
    {
      "epoch": 16.27,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 84720
    },
    {
      "epoch": 16.27,
      "learning_rate": 0.001,
      "loss": 2.6284,
      "step": 84732
    },
    {
      "epoch": 16.27,
      "learning_rate": 0.001,
      "loss": 2.6413,
      "step": 84744
    },
    {
      "epoch": 16.27,
      "learning_rate": 0.001,
      "loss": 2.6197,
      "step": 84756
    },
    {
      "epoch": 16.28,
      "learning_rate": 0.001,
      "loss": 2.6354,
      "step": 84768
    },
    {
      "epoch": 16.28,
      "learning_rate": 0.001,
      "loss": 2.6162,
      "step": 84780
    },
    {
      "epoch": 16.28,
      "learning_rate": 0.001,
      "loss": 2.6258,
      "step": 84792
    },
    {
      "epoch": 16.28,
      "learning_rate": 0.001,
      "loss": 2.621,
      "step": 84804
    },
    {
      "epoch": 16.29,
      "learning_rate": 0.001,
      "loss": 2.6229,
      "step": 84816
    },
    {
      "epoch": 16.29,
      "learning_rate": 0.001,
      "loss": 2.6196,
      "step": 84828
    },
    {
      "epoch": 16.29,
      "learning_rate": 0.001,
      "loss": 2.6161,
      "step": 84840
    },
    {
      "epoch": 16.29,
      "learning_rate": 0.001,
      "loss": 2.6205,
      "step": 84852
    },
    {
      "epoch": 16.29,
      "learning_rate": 0.001,
      "loss": 2.6281,
      "step": 84864
    },
    {
      "epoch": 16.3,
      "learning_rate": 0.001,
      "loss": 2.6156,
      "step": 84876
    },
    {
      "epoch": 16.3,
      "learning_rate": 0.001,
      "loss": 2.6242,
      "step": 84888
    },
    {
      "epoch": 16.3,
      "learning_rate": 0.001,
      "loss": 2.6272,
      "step": 84900
    },
    {
      "epoch": 16.3,
      "learning_rate": 0.001,
      "loss": 2.6186,
      "step": 84912
    },
    {
      "epoch": 16.31,
      "learning_rate": 0.001,
      "loss": 2.6193,
      "step": 84924
    },
    {
      "epoch": 16.31,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 84936
    },
    {
      "epoch": 16.31,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 84948
    },
    {
      "epoch": 16.31,
      "learning_rate": 0.001,
      "loss": 2.6306,
      "step": 84960
    },
    {
      "epoch": 16.32,
      "learning_rate": 0.001,
      "loss": 2.622,
      "step": 84972
    },
    {
      "epoch": 16.32,
      "learning_rate": 0.001,
      "loss": 2.6336,
      "step": 84984
    },
    {
      "epoch": 16.32,
      "learning_rate": 0.001,
      "loss": 2.6214,
      "step": 84996
    },
    {
      "epoch": 16.32,
      "eval_ag_news_accuracy": 0.3143125,
      "eval_ag_news_bleu_score": 4.727496476663176,
      "eval_ag_news_bleu_score_sem": 0.1562693439582564,
      "eval_ag_news_emb_cos_sim": 0.7889615297317505,
      "eval_ag_news_emb_cos_sim_sem": 0.00816652944251922,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6564488410949707,
      "eval_ag_news_n_ngrams_match_1": 13.726,
      "eval_ag_news_n_ngrams_match_2": 2.906,
      "eval_ag_news_n_ngrams_match_3": 0.848,
      "eval_ag_news_num_pred_words": 46.784,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.723584812681004,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33623640164923313,
      "eval_ag_news_runtime": 10.9815,
      "eval_ag_news_samples_per_second": 45.531,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.34343677388608007,
      "eval_ag_news_token_set_f1_sem": 0.004335082304686956,
      "eval_ag_news_token_set_precision": 0.3268466522484483,
      "eval_ag_news_token_set_recall": 0.37934872835441796,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 85000
    },
    {
      "epoch": 16.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.1116875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9282192607489956,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11937367289925747,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6589334607124329,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009213499306378338,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.315546989440918,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.944,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.79,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.57,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.537452561532632,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.921875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20273210609631653,
      "eval_anthropic_toxic_prompts_runtime": 9.7814,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.117,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.343269996247756,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006497621599196589,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4162591130326654,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3248448605719658,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 85000
    },
    {
      "epoch": 16.32,
      "eval_arxiv_accuracy": 0.33671875,
      "eval_arxiv_bleu_score": 4.078923104170819,
      "eval_arxiv_bleu_score_sem": 0.11423218570266515,
      "eval_arxiv_emb_cos_sim": 0.7389238476753235,
      "eval_arxiv_emb_cos_sim_sem": 0.009505083058177845,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5032198429107666,
      "eval_arxiv_n_ngrams_match_1": 14.394,
      "eval_arxiv_n_ngrams_match_2": 2.774,
      "eval_arxiv_n_ngrams_match_3": 0.618,
      "eval_arxiv_num_pred_words": 40.238,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.22225035668231,
      "eval_arxiv_pred_num_tokens": 62.8515625,
      "eval_arxiv_rouge_score": 0.33986931113436525,
      "eval_arxiv_runtime": 10.073,
      "eval_arxiv_samples_per_second": 49.638,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.33733828360133833,
      "eval_arxiv_token_set_f1_sem": 0.004321378906322271,
      "eval_arxiv_token_set_precision": 0.2860937611205259,
      "eval_arxiv_token_set_recall": 0.43232976402111967,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 85000
    },
    {
      "epoch": 16.32,
      "eval_python_code_alpaca_accuracy": 0.15565625,
      "eval_python_code_alpaca_bleu_score": 4.162090658429672,
      "eval_python_code_alpaca_bleu_score_sem": 0.12508577022293813,
      "eval_python_code_alpaca_emb_cos_sim": 0.7425490617752075,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010618933385178674,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9794883728027344,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.532,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.646,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.85,
      "eval_python_code_alpaca_num_pred_words": 44.338,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.67774639863773,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3157292150017719,
      "eval_python_code_alpaca_runtime": 9.7105,
      "eval_python_code_alpaca_samples_per_second": 51.49,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.45761048278123767,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005685353377185493,
      "eval_python_code_alpaca_token_set_precision": 0.5177254218602015,
      "eval_python_code_alpaca_token_set_recall": 0.4312237556584592,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 85000
    },
    {
      "epoch": 16.32,
      "eval_wikibio_accuracy": 0.3140625,
      "eval_wikibio_bleu_score": 5.612636316287121,
      "eval_wikibio_bleu_score_sem": 0.20710063374958973,
      "eval_wikibio_emb_cos_sim": 0.725387692451477,
      "eval_wikibio_emb_cos_sim_sem": 0.01092995144395326,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8017916679382324,
      "eval_wikibio_n_ngrams_match_1": 10.046,
      "eval_wikibio_n_ngrams_match_2": 3.198,
      "eval_wikibio_n_ngrams_match_3": 1.138,
      "eval_wikibio_num_pred_words": 36.964,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.781345962281726,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3462115290660953,
      "eval_wikibio_runtime": 9.9508,
      "eval_wikibio_samples_per_second": 50.247,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.315266367220626,
      "eval_wikibio_token_set_f1_sem": 0.005458211988072525,
      "eval_wikibio_token_set_precision": 0.32534941182735977,
      "eval_wikibio_token_set_recall": 0.32195284964155896,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 85000
    },
    {
      "epoch": 16.32,
      "eval_nq_accuracy": 0.51740625,
      "eval_nq_bleu_score": 11.435618014216091,
      "eval_nq_bleu_score_sem": 0.47053422152115154,
      "eval_nq_emb_cos_sim": 0.8246973752975464,
      "eval_nq_emb_cos_sim_sem": 0.0071987139943415915,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2601561546325684,
      "eval_nq_n_ngrams_match_1": 22.584,
      "eval_nq_n_ngrams_match_2": 8.25,
      "eval_nq_n_ngrams_match_3": 3.824,
      "eval_nq_num_pred_words": 48.834,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.58458572737648,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4370232865482836,
      "eval_nq_runtime": 10.122,
      "eval_nq_samples_per_second": 49.398,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.45127677756435214,
      "eval_nq_token_set_f1_sem": 0.005047810596697375,
      "eval_nq_token_set_precision": 0.40730639601428537,
      "eval_nq_token_set_recall": 0.5170273041283616,
      "eval_nq_true_num_tokens": 64.0,
      "step": 85000
    },
    {
      "epoch": 16.32,
      "learning_rate": 0.001,
      "loss": 2.634,
      "step": 85008
    },
    {
      "epoch": 16.32,
      "learning_rate": 0.001,
      "loss": 2.6208,
      "step": 85020
    },
    {
      "epoch": 16.33,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 85032
    },
    {
      "epoch": 16.33,
      "learning_rate": 0.001,
      "loss": 2.6194,
      "step": 85044
    },
    {
      "epoch": 16.33,
      "learning_rate": 0.001,
      "loss": 2.633,
      "step": 85056
    },
    {
      "epoch": 16.33,
      "learning_rate": 0.001,
      "loss": 2.6338,
      "step": 85068
    },
    {
      "epoch": 16.34,
      "learning_rate": 0.001,
      "loss": 2.6284,
      "step": 85080
    },
    {
      "epoch": 16.34,
      "learning_rate": 0.001,
      "loss": 2.6245,
      "step": 85092
    },
    {
      "epoch": 16.34,
      "learning_rate": 0.001,
      "loss": 2.6322,
      "step": 85104
    },
    {
      "epoch": 16.34,
      "learning_rate": 0.001,
      "loss": 2.6245,
      "step": 85116
    },
    {
      "epoch": 16.35,
      "learning_rate": 0.001,
      "loss": 2.6284,
      "step": 85128
    },
    {
      "epoch": 16.35,
      "learning_rate": 0.001,
      "loss": 2.6262,
      "step": 85140
    },
    {
      "epoch": 16.35,
      "learning_rate": 0.001,
      "loss": 2.6295,
      "step": 85152
    },
    {
      "epoch": 16.35,
      "learning_rate": 0.001,
      "loss": 2.6296,
      "step": 85164
    },
    {
      "epoch": 16.35,
      "learning_rate": 0.001,
      "loss": 2.6367,
      "step": 85176
    },
    {
      "epoch": 16.36,
      "learning_rate": 0.001,
      "loss": 2.6347,
      "step": 85188
    },
    {
      "epoch": 16.36,
      "learning_rate": 0.001,
      "loss": 2.6218,
      "step": 85200
    },
    {
      "epoch": 16.36,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 85212
    },
    {
      "epoch": 16.36,
      "learning_rate": 0.001,
      "loss": 2.6307,
      "step": 85224
    },
    {
      "epoch": 16.37,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 85236
    },
    {
      "epoch": 16.37,
      "learning_rate": 0.001,
      "loss": 2.6322,
      "step": 85248
    },
    {
      "epoch": 16.37,
      "learning_rate": 0.001,
      "loss": 2.625,
      "step": 85260
    },
    {
      "epoch": 16.37,
      "learning_rate": 0.001,
      "loss": 2.6268,
      "step": 85272
    },
    {
      "epoch": 16.38,
      "learning_rate": 0.001,
      "loss": 2.6328,
      "step": 85284
    },
    {
      "epoch": 16.38,
      "learning_rate": 0.001,
      "loss": 2.6244,
      "step": 85296
    },
    {
      "epoch": 16.38,
      "learning_rate": 0.001,
      "loss": 2.6208,
      "step": 85308
    },
    {
      "epoch": 16.38,
      "learning_rate": 0.001,
      "loss": 2.6251,
      "step": 85320
    },
    {
      "epoch": 16.38,
      "learning_rate": 0.001,
      "loss": 2.6273,
      "step": 85332
    },
    {
      "epoch": 16.39,
      "learning_rate": 0.001,
      "loss": 2.6226,
      "step": 85344
    },
    {
      "epoch": 16.39,
      "learning_rate": 0.001,
      "loss": 2.631,
      "step": 85356
    },
    {
      "epoch": 16.39,
      "learning_rate": 0.001,
      "loss": 2.6276,
      "step": 85368
    },
    {
      "epoch": 16.39,
      "learning_rate": 0.001,
      "loss": 2.6216,
      "step": 85380
    },
    {
      "epoch": 16.4,
      "learning_rate": 0.001,
      "loss": 2.6235,
      "step": 85392
    },
    {
      "epoch": 16.4,
      "learning_rate": 0.001,
      "loss": 2.6208,
      "step": 85404
    },
    {
      "epoch": 16.4,
      "learning_rate": 0.001,
      "loss": 2.6154,
      "step": 85416
    },
    {
      "epoch": 16.4,
      "learning_rate": 0.001,
      "loss": 2.6171,
      "step": 85428
    },
    {
      "epoch": 16.41,
      "learning_rate": 0.001,
      "loss": 2.6265,
      "step": 85440
    },
    {
      "epoch": 16.41,
      "learning_rate": 0.001,
      "loss": 2.6313,
      "step": 85452
    },
    {
      "epoch": 16.41,
      "learning_rate": 0.001,
      "loss": 2.6252,
      "step": 85464
    },
    {
      "epoch": 16.41,
      "learning_rate": 0.001,
      "loss": 2.6331,
      "step": 85476
    },
    {
      "epoch": 16.41,
      "learning_rate": 0.001,
      "loss": 2.633,
      "step": 85488
    },
    {
      "epoch": 16.42,
      "learning_rate": 0.001,
      "loss": 2.6302,
      "step": 85500
    },
    {
      "epoch": 16.42,
      "learning_rate": 0.001,
      "loss": 2.632,
      "step": 85512
    },
    {
      "epoch": 16.42,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 85524
    },
    {
      "epoch": 16.42,
      "learning_rate": 0.001,
      "loss": 2.6249,
      "step": 85536
    },
    {
      "epoch": 16.43,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 85548
    },
    {
      "epoch": 16.43,
      "learning_rate": 0.001,
      "loss": 2.6228,
      "step": 85560
    },
    {
      "epoch": 16.43,
      "learning_rate": 0.001,
      "loss": 2.627,
      "step": 85572
    },
    {
      "epoch": 16.43,
      "learning_rate": 0.001,
      "loss": 2.6189,
      "step": 85584
    },
    {
      "epoch": 16.44,
      "learning_rate": 0.001,
      "loss": 2.6281,
      "step": 85596
    },
    {
      "epoch": 16.44,
      "learning_rate": 0.001,
      "loss": 2.6379,
      "step": 85608
    },
    {
      "epoch": 16.44,
      "learning_rate": 0.001,
      "loss": 2.618,
      "step": 85620
    },
    {
      "epoch": 16.44,
      "eval_ag_news_accuracy": 0.31503125,
      "eval_ag_news_bleu_score": 4.734232285240887,
      "eval_ag_news_bleu_score_sem": 0.14446832776285212,
      "eval_ag_news_emb_cos_sim": 0.8040810823440552,
      "eval_ag_news_emb_cos_sim_sem": 0.006647003844799893,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.637950897216797,
      "eval_ag_news_n_ngrams_match_1": 13.738,
      "eval_ag_news_n_ngrams_match_2": 3.036,
      "eval_ag_news_n_ngrams_match_3": 0.866,
      "eval_ag_news_num_pred_words": 47.152,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.01386255257826,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3392382166818193,
      "eval_ag_news_runtime": 11.6904,
      "eval_ag_news_samples_per_second": 42.77,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.3453922108777078,
      "eval_ag_news_token_set_f1_sem": 0.0044595911647469615,
      "eval_ag_news_token_set_precision": 0.32844362009404904,
      "eval_ag_news_token_set_recall": 0.3796238776180352,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 85625
    },
    {
      "epoch": 16.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.11078125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.029252097289845,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11951039790080126,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6618566513061523,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0091044278722437,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3571712970733643,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.122,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.858,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.664,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.086,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.707869881443887,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21110080755282318,
      "eval_anthropic_toxic_prompts_runtime": 9.8345,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.842,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3503955953883387,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063915426667671815,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43577397594430767,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3220171069084458,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 85625
    },
    {
      "epoch": 16.44,
      "eval_arxiv_accuracy": 0.33709375,
      "eval_arxiv_bleu_score": 4.225227686712552,
      "eval_arxiv_bleu_score_sem": 0.12029798950622987,
      "eval_arxiv_emb_cos_sim": 0.7448480129241943,
      "eval_arxiv_emb_cos_sim_sem": 0.00807973261782259,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5028719902038574,
      "eval_arxiv_n_ngrams_match_1": 14.592,
      "eval_arxiv_n_ngrams_match_2": 2.874,
      "eval_arxiv_n_ngrams_match_3": 0.636,
      "eval_arxiv_num_pred_words": 40.39,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.210695916704246,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34799392645174365,
      "eval_arxiv_runtime": 10.1383,
      "eval_arxiv_samples_per_second": 49.318,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3404732212119082,
      "eval_arxiv_token_set_f1_sem": 0.004081932664778177,
      "eval_arxiv_token_set_precision": 0.28951478070597453,
      "eval_arxiv_token_set_recall": 0.43068390351571334,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 85625
    },
    {
      "epoch": 16.44,
      "eval_python_code_alpaca_accuracy": 0.158125,
      "eval_python_code_alpaca_bleu_score": 4.4898623454880875,
      "eval_python_code_alpaca_bleu_score_sem": 0.14630471170329362,
      "eval_python_code_alpaca_emb_cos_sim": 0.7492319345474243,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008818117086319608,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9721035957336426,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.608,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.742,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.936,
      "eval_python_code_alpaca_num_pred_words": 43.226,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.532965872183325,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32464325540349837,
      "eval_python_code_alpaca_runtime": 9.6706,
      "eval_python_code_alpaca_samples_per_second": 51.703,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.46409365894739485,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005555074158514538,
      "eval_python_code_alpaca_token_set_precision": 0.5250415209739485,
      "eval_python_code_alpaca_token_set_recall": 0.4338619172201921,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 85625
    },
    {
      "epoch": 16.44,
      "eval_wikibio_accuracy": 0.31275,
      "eval_wikibio_bleu_score": 5.802939841794243,
      "eval_wikibio_bleu_score_sem": 0.2075321053057482,
      "eval_wikibio_emb_cos_sim": 0.7380551099777222,
      "eval_wikibio_emb_cos_sim_sem": 0.009217947115985253,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8536365032196045,
      "eval_wikibio_n_ngrams_match_1": 10.09,
      "eval_wikibio_n_ngrams_match_2": 3.348,
      "eval_wikibio_n_ngrams_match_3": 1.172,
      "eval_wikibio_num_pred_words": 36.682,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.164264756101296,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35331046831363877,
      "eval_wikibio_runtime": 9.6557,
      "eval_wikibio_samples_per_second": 51.783,
      "eval_wikibio_steps_per_second": 0.104,
      "eval_wikibio_token_set_f1": 0.31607802876324065,
      "eval_wikibio_token_set_f1_sem": 0.005275383067759731,
      "eval_wikibio_token_set_precision": 0.32525107111719104,
      "eval_wikibio_token_set_recall": 0.324323498345772,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 85625
    },
    {
      "epoch": 16.44,
      "eval_nq_accuracy": 0.5179375,
      "eval_nq_bleu_score": 11.115929575777233,
      "eval_nq_bleu_score_sem": 0.46169152675620856,
      "eval_nq_emb_cos_sim": 0.8265016078948975,
      "eval_nq_emb_cos_sim_sem": 0.007319747889463735,
      "eval_nq_emb_top1_equal": 0.1953125,
      "eval_nq_emb_top1_equal_sem": 0.035178457165496856,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2583084106445312,
      "eval_nq_n_ngrams_match_1": 22.734,
      "eval_nq_n_ngrams_match_2": 8.128,
      "eval_nq_n_ngrams_match_3": 3.644,
      "eval_nq_num_pred_words": 49.032,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.566892218292447,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4375737394517144,
      "eval_nq_runtime": 10.1345,
      "eval_nq_samples_per_second": 49.336,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.45498667808267784,
      "eval_nq_token_set_f1_sem": 0.005013999226700478,
      "eval_nq_token_set_precision": 0.4093649179282037,
      "eval_nq_token_set_recall": 0.5220906308707194,
      "eval_nq_true_num_tokens": 64.0,
      "step": 85625
    },
    {
      "epoch": 16.44,
      "learning_rate": 0.001,
      "loss": 2.6312,
      "step": 85632
    },
    {
      "epoch": 16.44,
      "learning_rate": 0.001,
      "loss": 2.624,
      "step": 85644
    },
    {
      "epoch": 16.45,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 85656
    },
    {
      "epoch": 16.45,
      "learning_rate": 0.001,
      "loss": 2.6249,
      "step": 85668
    },
    {
      "epoch": 16.45,
      "learning_rate": 0.001,
      "loss": 2.6199,
      "step": 85680
    },
    {
      "epoch": 16.45,
      "learning_rate": 0.001,
      "loss": 2.6379,
      "step": 85692
    },
    {
      "epoch": 16.46,
      "learning_rate": 0.001,
      "loss": 2.6259,
      "step": 85704
    },
    {
      "epoch": 16.46,
      "learning_rate": 0.001,
      "loss": 2.6299,
      "step": 85716
    },
    {
      "epoch": 16.46,
      "learning_rate": 0.001,
      "loss": 2.6356,
      "step": 85728
    },
    {
      "epoch": 16.46,
      "learning_rate": 0.001,
      "loss": 2.6372,
      "step": 85740
    },
    {
      "epoch": 16.47,
      "learning_rate": 0.001,
      "loss": 2.6292,
      "step": 85752
    },
    {
      "epoch": 16.47,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 85764
    },
    {
      "epoch": 16.47,
      "learning_rate": 0.001,
      "loss": 2.6373,
      "step": 85776
    },
    {
      "epoch": 16.47,
      "learning_rate": 0.001,
      "loss": 2.6225,
      "step": 85788
    },
    {
      "epoch": 16.47,
      "learning_rate": 0.001,
      "loss": 2.626,
      "step": 85800
    },
    {
      "epoch": 16.48,
      "learning_rate": 0.001,
      "loss": 2.6246,
      "step": 85812
    },
    {
      "epoch": 16.48,
      "learning_rate": 0.001,
      "loss": 2.6243,
      "step": 85824
    },
    {
      "epoch": 16.48,
      "learning_rate": 0.001,
      "loss": 2.6226,
      "step": 85836
    },
    {
      "epoch": 16.48,
      "learning_rate": 0.001,
      "loss": 2.6318,
      "step": 85848
    },
    {
      "epoch": 16.49,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 85860
    },
    {
      "epoch": 16.49,
      "learning_rate": 0.001,
      "loss": 2.6207,
      "step": 85872
    },
    {
      "epoch": 16.49,
      "learning_rate": 0.001,
      "loss": 2.6243,
      "step": 85884
    },
    {
      "epoch": 16.49,
      "learning_rate": 0.001,
      "loss": 2.6303,
      "step": 85896
    },
    {
      "epoch": 16.5,
      "learning_rate": 0.001,
      "loss": 2.6345,
      "step": 85908
    },
    {
      "epoch": 16.5,
      "learning_rate": 0.001,
      "loss": 2.6258,
      "step": 85920
    },
    {
      "epoch": 16.5,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 85932
    },
    {
      "epoch": 16.5,
      "learning_rate": 0.001,
      "loss": 2.6377,
      "step": 85944
    },
    {
      "epoch": 16.5,
      "learning_rate": 0.001,
      "loss": 2.6261,
      "step": 85956
    },
    {
      "epoch": 16.51,
      "learning_rate": 0.001,
      "loss": 2.6248,
      "step": 85968
    },
    {
      "epoch": 16.51,
      "learning_rate": 0.001,
      "loss": 2.6278,
      "step": 85980
    },
    {
      "epoch": 16.51,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 85992
    },
    {
      "epoch": 16.51,
      "learning_rate": 0.001,
      "loss": 2.6224,
      "step": 86004
    },
    {
      "epoch": 16.52,
      "learning_rate": 0.001,
      "loss": 2.6251,
      "step": 86016
    },
    {
      "epoch": 16.52,
      "learning_rate": 0.001,
      "loss": 2.6217,
      "step": 86028
    },
    {
      "epoch": 16.52,
      "learning_rate": 0.001,
      "loss": 2.6249,
      "step": 86040
    },
    {
      "epoch": 16.52,
      "learning_rate": 0.001,
      "loss": 2.6233,
      "step": 86052
    },
    {
      "epoch": 16.53,
      "learning_rate": 0.001,
      "loss": 2.6221,
      "step": 86064
    },
    {
      "epoch": 16.53,
      "learning_rate": 0.001,
      "loss": 2.6342,
      "step": 86076
    },
    {
      "epoch": 16.53,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 86088
    },
    {
      "epoch": 16.53,
      "learning_rate": 0.001,
      "loss": 2.6303,
      "step": 86100
    },
    {
      "epoch": 16.53,
      "learning_rate": 0.001,
      "loss": 2.6229,
      "step": 86112
    },
    {
      "epoch": 16.54,
      "learning_rate": 0.001,
      "loss": 2.6348,
      "step": 86124
    },
    {
      "epoch": 16.54,
      "learning_rate": 0.001,
      "loss": 2.6325,
      "step": 86136
    },
    {
      "epoch": 16.54,
      "learning_rate": 0.001,
      "loss": 2.6287,
      "step": 86148
    },
    {
      "epoch": 16.54,
      "learning_rate": 0.001,
      "loss": 2.6375,
      "step": 86160
    },
    {
      "epoch": 16.55,
      "learning_rate": 0.001,
      "loss": 2.6331,
      "step": 86172
    },
    {
      "epoch": 16.55,
      "learning_rate": 0.001,
      "loss": 2.6229,
      "step": 86184
    },
    {
      "epoch": 16.55,
      "learning_rate": 0.001,
      "loss": 2.6298,
      "step": 86196
    },
    {
      "epoch": 16.55,
      "learning_rate": 0.001,
      "loss": 2.6308,
      "step": 86208
    },
    {
      "epoch": 16.56,
      "learning_rate": 0.001,
      "loss": 2.6335,
      "step": 86220
    },
    {
      "epoch": 16.56,
      "learning_rate": 0.001,
      "loss": 2.6356,
      "step": 86232
    },
    {
      "epoch": 16.56,
      "learning_rate": 0.001,
      "loss": 2.6324,
      "step": 86244
    },
    {
      "epoch": 16.56,
      "eval_ag_news_accuracy": 0.31484375,
      "eval_ag_news_bleu_score": 4.677569717950757,
      "eval_ag_news_bleu_score_sem": 0.1536075207240273,
      "eval_ag_news_emb_cos_sim": 0.7919803261756897,
      "eval_ag_news_emb_cos_sim_sem": 0.0076983503954590644,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6351852416992188,
      "eval_ag_news_n_ngrams_match_1": 13.454,
      "eval_ag_news_n_ngrams_match_2": 2.938,
      "eval_ag_news_n_ngrams_match_3": 0.862,
      "eval_ag_news_num_pred_words": 46.584,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.90887455110782,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3328643009688087,
      "eval_ag_news_runtime": 9.9686,
      "eval_ag_news_samples_per_second": 50.157,
      "eval_ag_news_steps_per_second": 0.1,
      "eval_ag_news_token_set_f1": 0.3380049465612954,
      "eval_ag_news_token_set_f1_sem": 0.004545375509986334,
      "eval_ag_news_token_set_precision": 0.31841268807992196,
      "eval_ag_news_token_set_recall": 0.3783377703612223,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 86250
    },
    {
      "epoch": 16.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.11259375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9140743862987084,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11594501363240359,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6440126299858093,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010369487262266948,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2765464782714844,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.046,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.808,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.632,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.168,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.48415099124652,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20559105113898218,
      "eval_anthropic_toxic_prompts_runtime": 9.805,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.994,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35503537504920624,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006506715944413094,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4241620850914107,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33386737837819425,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 86250
    },
    {
      "epoch": 16.56,
      "eval_arxiv_accuracy": 0.3390625,
      "eval_arxiv_bleu_score": 4.153893751254837,
      "eval_arxiv_bleu_score_sem": 0.11692658314158827,
      "eval_arxiv_emb_cos_sim": 0.7477889060974121,
      "eval_arxiv_emb_cos_sim_sem": 0.007050107499214719,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.504267692565918,
      "eval_arxiv_n_ngrams_match_1": 14.614,
      "eval_arxiv_n_ngrams_match_2": 2.784,
      "eval_arxiv_n_ngrams_match_3": 0.612,
      "eval_arxiv_num_pred_words": 41.108,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.257080525465156,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34724834799599713,
      "eval_arxiv_runtime": 9.9529,
      "eval_arxiv_samples_per_second": 50.236,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.33965850333694886,
      "eval_arxiv_token_set_f1_sem": 0.004114648945668827,
      "eval_arxiv_token_set_precision": 0.29188368571785067,
      "eval_arxiv_token_set_recall": 0.4252304605018358,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 86250
    },
    {
      "epoch": 16.56,
      "eval_python_code_alpaca_accuracy": 0.155375,
      "eval_python_code_alpaca_bleu_score": 4.42037969556335,
      "eval_python_code_alpaca_bleu_score_sem": 0.14584883274688865,
      "eval_python_code_alpaca_emb_cos_sim": 0.7608622312545776,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006207836976038917,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.976240396499634,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.654,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.838,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.914,
      "eval_python_code_alpaca_num_pred_words": 44.61,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.613937226072167,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31759725655093096,
      "eval_python_code_alpaca_runtime": 9.9947,
      "eval_python_code_alpaca_samples_per_second": 50.027,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4719123221850839,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005591315482465464,
      "eval_python_code_alpaca_token_set_precision": 0.5294576661254947,
      "eval_python_code_alpaca_token_set_recall": 0.4465949770512454,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 86250
    },
    {
      "epoch": 16.56,
      "eval_wikibio_accuracy": 0.3121875,
      "eval_wikibio_bleu_score": 5.557797813810281,
      "eval_wikibio_bleu_score_sem": 0.19400367573684665,
      "eval_wikibio_emb_cos_sim": 0.7214940190315247,
      "eval_wikibio_emb_cos_sim_sem": 0.01092002717821796,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.840996026992798,
      "eval_wikibio_n_ngrams_match_1": 9.628,
      "eval_wikibio_n_ngrams_match_2": 3.176,
      "eval_wikibio_n_ngrams_match_3": 1.088,
      "eval_wikibio_num_pred_words": 35.774,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.57183815410644,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34312089050957295,
      "eval_wikibio_runtime": 9.6657,
      "eval_wikibio_samples_per_second": 51.729,
      "eval_wikibio_steps_per_second": 0.103,
      "eval_wikibio_token_set_f1": 0.30667002405518456,
      "eval_wikibio_token_set_f1_sem": 0.005711237778105406,
      "eval_wikibio_token_set_precision": 0.313010097137424,
      "eval_wikibio_token_set_recall": 0.3178837283556537,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 86250
    },
    {
      "epoch": 16.56,
      "eval_nq_accuracy": 0.51871875,
      "eval_nq_bleu_score": 11.108348794910503,
      "eval_nq_bleu_score_sem": 0.4506995806699304,
      "eval_nq_emb_cos_sim": 0.8245291113853455,
      "eval_nq_emb_cos_sim_sem": 0.007480438017767582,
      "eval_nq_emb_top1_equal": 0.203125,
      "eval_nq_emb_top1_equal_sem": 0.03570055125142555,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2540647983551025,
      "eval_nq_n_ngrams_match_1": 22.364,
      "eval_nq_n_ngrams_match_2": 8.07,
      "eval_nq_n_ngrams_match_3": 3.644,
      "eval_nq_num_pred_words": 49.282,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.526380056652824,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4303017423573058,
      "eval_nq_runtime": 10.1022,
      "eval_nq_samples_per_second": 49.494,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.4482803320685933,
      "eval_nq_token_set_f1_sem": 0.004701399922315706,
      "eval_nq_token_set_precision": 0.4042105412576047,
      "eval_nq_token_set_recall": 0.5114986170305418,
      "eval_nq_true_num_tokens": 64.0,
      "step": 86250
    },
    {
      "epoch": 16.56,
      "learning_rate": 0.001,
      "loss": 2.6258,
      "step": 86256
    },
    {
      "epoch": 16.56,
      "learning_rate": 0.001,
      "loss": 2.6309,
      "step": 86268
    },
    {
      "epoch": 16.57,
      "learning_rate": 0.001,
      "loss": 2.6232,
      "step": 86280
    },
    {
      "epoch": 16.57,
      "learning_rate": 0.001,
      "loss": 2.6133,
      "step": 86292
    },
    {
      "epoch": 16.57,
      "learning_rate": 0.001,
      "loss": 2.6218,
      "step": 86304
    },
    {
      "epoch": 16.57,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 86316
    },
    {
      "epoch": 16.58,
      "learning_rate": 0.001,
      "loss": 2.618,
      "step": 86328
    },
    {
      "epoch": 16.58,
      "learning_rate": 0.001,
      "loss": 2.6342,
      "step": 86340
    },
    {
      "epoch": 16.58,
      "learning_rate": 0.001,
      "loss": 2.6257,
      "step": 86352
    },
    {
      "epoch": 16.58,
      "learning_rate": 0.001,
      "loss": 2.6304,
      "step": 86364
    },
    {
      "epoch": 16.59,
      "learning_rate": 0.001,
      "loss": 2.6174,
      "step": 86376
    },
    {
      "epoch": 16.59,
      "learning_rate": 0.001,
      "loss": 2.619,
      "step": 86388
    },
    {
      "epoch": 16.59,
      "learning_rate": 0.001,
      "loss": 2.6274,
      "step": 86400
    },
    {
      "epoch": 16.59,
      "learning_rate": 0.001,
      "loss": 2.625,
      "step": 86412
    },
    {
      "epoch": 16.59,
      "learning_rate": 0.001,
      "loss": 2.6291,
      "step": 86424
    },
    {
      "epoch": 16.6,
      "learning_rate": 0.001,
      "loss": 2.6281,
      "step": 86436
    },
    {
      "epoch": 16.6,
      "learning_rate": 0.001,
      "loss": 2.6226,
      "step": 86448
    },
    {
      "epoch": 16.6,
      "learning_rate": 0.001,
      "loss": 2.6328,
      "step": 86460
    },
    {
      "epoch": 16.6,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 86472
    },
    {
      "epoch": 16.61,
      "learning_rate": 0.001,
      "loss": 2.6215,
      "step": 86484
    },
    {
      "epoch": 16.61,
      "learning_rate": 0.001,
      "loss": 2.6274,
      "step": 86496
    },
    {
      "epoch": 16.61,
      "learning_rate": 0.001,
      "loss": 2.6337,
      "step": 86508
    },
    {
      "epoch": 16.61,
      "learning_rate": 0.001,
      "loss": 2.6342,
      "step": 86520
    },
    {
      "epoch": 16.62,
      "learning_rate": 0.001,
      "loss": 2.6326,
      "step": 86532
    },
    {
      "epoch": 16.62,
      "learning_rate": 0.001,
      "loss": 2.6372,
      "step": 86544
    },
    {
      "epoch": 16.62,
      "learning_rate": 0.001,
      "loss": 2.6208,
      "step": 86556
    },
    {
      "epoch": 16.62,
      "learning_rate": 0.001,
      "loss": 2.6234,
      "step": 86568
    },
    {
      "epoch": 16.62,
      "learning_rate": 0.001,
      "loss": 2.6309,
      "step": 86580
    },
    {
      "epoch": 16.63,
      "learning_rate": 0.001,
      "loss": 2.6374,
      "step": 86592
    },
    {
      "epoch": 16.63,
      "learning_rate": 0.001,
      "loss": 2.6285,
      "step": 86604
    },
    {
      "epoch": 16.63,
      "learning_rate": 0.001,
      "loss": 2.6272,
      "step": 86616
    },
    {
      "epoch": 16.63,
      "learning_rate": 0.001,
      "loss": 2.6281,
      "step": 86628
    },
    {
      "epoch": 16.64,
      "learning_rate": 0.001,
      "loss": 2.6147,
      "step": 86640
    },
    {
      "epoch": 16.64,
      "learning_rate": 0.001,
      "loss": 2.6315,
      "step": 86652
    },
    {
      "epoch": 16.64,
      "learning_rate": 0.001,
      "loss": 2.6331,
      "step": 86664
    },
    {
      "epoch": 16.64,
      "learning_rate": 0.001,
      "loss": 2.6349,
      "step": 86676
    },
    {
      "epoch": 16.65,
      "learning_rate": 0.001,
      "loss": 2.6286,
      "step": 86688
    },
    {
      "epoch": 16.65,
      "learning_rate": 0.001,
      "loss": 2.6315,
      "step": 86700
    },
    {
      "epoch": 16.65,
      "learning_rate": 0.001,
      "loss": 2.6276,
      "step": 86712
    },
    {
      "epoch": 16.65,
      "learning_rate": 0.001,
      "loss": 2.635,
      "step": 86724
    },
    {
      "epoch": 16.65,
      "learning_rate": 0.001,
      "loss": 2.6239,
      "step": 86736
    },
    {
      "epoch": 16.66,
      "learning_rate": 0.001,
      "loss": 2.6185,
      "step": 86748
    },
    {
      "epoch": 16.66,
      "learning_rate": 0.001,
      "loss": 2.6298,
      "step": 86760
    },
    {
      "epoch": 16.66,
      "learning_rate": 0.001,
      "loss": 2.6291,
      "step": 86772
    },
    {
      "epoch": 16.66,
      "learning_rate": 0.001,
      "loss": 2.6365,
      "step": 86784
    },
    {
      "epoch": 16.67,
      "learning_rate": 0.001,
      "loss": 2.633,
      "step": 86796
    },
    {
      "epoch": 16.67,
      "learning_rate": 0.001,
      "loss": 2.6266,
      "step": 86808
    },
    {
      "epoch": 16.67,
      "learning_rate": 0.001,
      "loss": 2.6243,
      "step": 86820
    },
    {
      "epoch": 16.67,
      "learning_rate": 0.001,
      "loss": 2.6229,
      "step": 86832
    },
    {
      "epoch": 16.68,
      "learning_rate": 0.001,
      "loss": 2.6222,
      "step": 86844
    },
    {
      "epoch": 16.68,
      "learning_rate": 0.001,
      "loss": 2.6401,
      "step": 86856
    },
    {
      "epoch": 16.68,
      "learning_rate": 0.001,
      "loss": 2.6294,
      "step": 86868
    },
    {
      "epoch": 16.68,
      "eval_ag_news_accuracy": 0.315375,
      "eval_ag_news_bleu_score": 4.679309175483281,
      "eval_ag_news_bleu_score_sem": 0.153342466902705,
      "eval_ag_news_emb_cos_sim": 0.804725170135498,
      "eval_ag_news_emb_cos_sim_sem": 0.006796262298989668,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6281321048736572,
      "eval_ag_news_n_ngrams_match_1": 13.722,
      "eval_ag_news_n_ngrams_match_2": 3.012,
      "eval_ag_news_n_ngrams_match_3": 0.868,
      "eval_ag_news_num_pred_words": 46.602,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.64243878049348,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3384316212282945,
      "eval_ag_news_runtime": 10.9929,
      "eval_ag_news_samples_per_second": 45.484,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.345484160230268,
      "eval_ag_news_token_set_f1_sem": 0.004385774029902639,
      "eval_ag_news_token_set_precision": 0.3278777292416484,
      "eval_ag_news_token_set_recall": 0.38073365646483104,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 86875
    },
    {
      "epoch": 16.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.1115625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.94961734013026,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11458936431158522,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6625691652297974,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010334130666177863,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.316422700881958,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.004,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.82,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.636,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.096,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.561577985711093,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20786075367716803,
      "eval_anthropic_toxic_prompts_runtime": 10.8339,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.151,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.092,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3505138942601821,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006591600058821367,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42305282286384216,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3263186822116573,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 86875
    },
    {
      "epoch": 16.68,
      "eval_arxiv_accuracy": 0.33928125,
      "eval_arxiv_bleu_score": 4.1213584226465265,
      "eval_arxiv_bleu_score_sem": 0.1243666975247188,
      "eval_arxiv_emb_cos_sim": 0.7476789951324463,
      "eval_arxiv_emb_cos_sim_sem": 0.007997047745033802,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.5029208660125732,
      "eval_arxiv_n_ngrams_match_1": 14.28,
      "eval_arxiv_n_ngrams_match_2": 2.77,
      "eval_arxiv_n_ngrams_match_3": 0.654,
      "eval_arxiv_num_pred_words": 39.87,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 33.212319155993434,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3419498009749126,
      "eval_arxiv_runtime": 10.3148,
      "eval_arxiv_samples_per_second": 48.474,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3361871733138312,
      "eval_arxiv_token_set_f1_sem": 0.004443572811533335,
      "eval_arxiv_token_set_precision": 0.28552665801110716,
      "eval_arxiv_token_set_recall": 0.42890751481817746,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 86875
    },
    {
      "epoch": 16.68,
      "eval_python_code_alpaca_accuracy": 0.159,
      "eval_python_code_alpaca_bleu_score": 4.4949201955534575,
      "eval_python_code_alpaca_bleu_score_sem": 0.14911391424469905,
      "eval_python_code_alpaca_emb_cos_sim": 0.7516356706619263,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009768329639798859,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9453086853027344,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.868,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.9,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.958,
      "eval_python_code_alpaca_num_pred_words": 44.588,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.01653160436644,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3261765212791481,
      "eval_python_code_alpaca_runtime": 9.9321,
      "eval_python_code_alpaca_samples_per_second": 50.342,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4750200440609405,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005573481937169099,
      "eval_python_code_alpaca_token_set_precision": 0.5392437295997247,
      "eval_python_code_alpaca_token_set_recall": 0.44507940543250574,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 86875
    },
    {
      "epoch": 16.68,
      "eval_wikibio_accuracy": 0.31346875,
      "eval_wikibio_bleu_score": 5.483483780610567,
      "eval_wikibio_bleu_score_sem": 0.20548743364077995,
      "eval_wikibio_emb_cos_sim": 0.7220487594604492,
      "eval_wikibio_emb_cos_sim_sem": 0.010978563882443528,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.851616144180298,
      "eval_wikibio_n_ngrams_match_1": 9.57,
      "eval_wikibio_n_ngrams_match_2": 3.118,
      "eval_wikibio_n_ngrams_match_3": 1.096,
      "eval_wikibio_num_pred_words": 35.356,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.06907220141837,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33593630782168704,
      "eval_wikibio_runtime": 11.225,
      "eval_wikibio_samples_per_second": 44.543,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.3060713827508873,
      "eval_wikibio_token_set_f1_sem": 0.005734146280167845,
      "eval_wikibio_token_set_precision": 0.3124944838666867,
      "eval_wikibio_token_set_recall": 0.31781332903744397,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 86875
    },
    {
      "epoch": 16.68,
      "eval_nq_accuracy": 0.5173125,
      "eval_nq_bleu_score": 11.09245589224382,
      "eval_nq_bleu_score_sem": 0.4527721311143663,
      "eval_nq_emb_cos_sim": 0.8283681869506836,
      "eval_nq_emb_cos_sim_sem": 0.006580984757874625,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.250608444213867,
      "eval_nq_n_ngrams_match_1": 22.568,
      "eval_nq_n_ngrams_match_2": 8.062,
      "eval_nq_n_ngrams_match_3": 3.616,
      "eval_nq_num_pred_words": 49.19,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.49351035088769,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4355600451312414,
      "eval_nq_runtime": 10.2445,
      "eval_nq_samples_per_second": 48.806,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.4528203612599476,
      "eval_nq_token_set_f1_sem": 0.004808790989413383,
      "eval_nq_token_set_precision": 0.40919570157161866,
      "eval_nq_token_set_recall": 0.5152685511416994,
      "eval_nq_true_num_tokens": 64.0,
      "step": 86875
    },
    {
      "epoch": 16.68,
      "learning_rate": 0.001,
      "loss": 2.6284,
      "step": 86880
    },
    {
      "epoch": 16.68,
      "learning_rate": 0.001,
      "loss": 2.6391,
      "step": 86892
    },
    {
      "epoch": 16.69,
      "learning_rate": 0.001,
      "loss": 2.6352,
      "step": 86904
    },
    {
      "epoch": 16.69,
      "learning_rate": 0.001,
      "loss": 2.6215,
      "step": 86916
    },
    {
      "epoch": 16.69,
      "learning_rate": 0.001,
      "loss": 2.6264,
      "step": 86928
    },
    {
      "epoch": 16.69,
      "learning_rate": 0.001,
      "loss": 2.6277,
      "step": 86940
    },
    {
      "epoch": 16.7,
      "learning_rate": 0.001,
      "loss": 2.6351,
      "step": 86952
    },
    {
      "epoch": 16.7,
      "learning_rate": 0.001,
      "loss": 2.6242,
      "step": 86964
    },
    {
      "epoch": 16.7,
      "learning_rate": 0.001,
      "loss": 2.6337,
      "step": 86976
    },
    {
      "epoch": 16.7,
      "learning_rate": 0.001,
      "loss": 2.6224,
      "step": 86988
    },
    {
      "epoch": 16.71,
      "learning_rate": 0.001,
      "loss": 2.6178,
      "step": 87000
    },
    {
      "epoch": 16.71,
      "learning_rate": 0.001,
      "loss": 2.6285,
      "step": 87012
    },
    {
      "epoch": 16.71,
      "learning_rate": 0.001,
      "loss": 2.6318,
      "step": 87024
    },
    {
      "epoch": 16.71,
      "learning_rate": 0.001,
      "loss": 2.6251,
      "step": 87036
    },
    {
      "epoch": 16.71,
      "learning_rate": 0.001,
      "loss": 2.623,
      "step": 87048
    },
    {
      "epoch": 16.72,
      "learning_rate": 0.001,
      "loss": 2.6302,
      "step": 87060
    },
    {
      "epoch": 16.72,
      "learning_rate": 0.001,
      "loss": 2.6285,
      "step": 87072
    },
    {
      "epoch": 16.72,
      "learning_rate": 0.001,
      "loss": 2.6324,
      "step": 87084
    },
    {
      "epoch": 16.72,
      "learning_rate": 0.001,
      "loss": 2.6366,
      "step": 87096
    },
    {
      "epoch": 16.73,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 87108
    },
    {
      "epoch": 16.73,
      "learning_rate": 0.001,
      "loss": 2.6307,
      "step": 87120
    },
    {
      "epoch": 16.73,
      "learning_rate": 0.001,
      "loss": 2.6341,
      "step": 87132
    },
    {
      "epoch": 16.73,
      "learning_rate": 0.001,
      "loss": 2.6228,
      "step": 87144
    },
    {
      "epoch": 16.74,
      "learning_rate": 0.001,
      "loss": 2.6267,
      "step": 87156
    },
    {
      "epoch": 16.74,
      "learning_rate": 0.001,
      "loss": 2.6245,
      "step": 87168
    },
    {
      "epoch": 16.74,
      "learning_rate": 0.001,
      "loss": 2.6248,
      "step": 87180
    },
    {
      "epoch": 16.74,
      "learning_rate": 0.001,
      "loss": 2.6277,
      "step": 87192
    },
    {
      "epoch": 16.74,
      "learning_rate": 0.001,
      "loss": 2.6327,
      "step": 87204
    },
    {
      "epoch": 16.75,
      "learning_rate": 0.001,
      "loss": 2.628,
      "step": 87216
    },
    {
      "epoch": 16.75,
      "learning_rate": 0.001,
      "loss": 2.6282,
      "step": 87228
    },
    {
      "epoch": 16.75,
      "learning_rate": 0.001,
      "loss": 2.6221,
      "step": 87240
    },
    {
      "epoch": 16.75,
      "learning_rate": 0.001,
      "loss": 2.6274,
      "step": 87252
    },
    {
      "epoch": 16.76,
      "learning_rate": 0.001,
      "loss": 2.6143,
      "step": 87264
    },
    {
      "epoch": 16.76,
      "learning_rate": 0.001,
      "loss": 2.6182,
      "step": 87276
    },
    {
      "epoch": 16.76,
      "learning_rate": 0.001,
      "loss": 2.6306,
      "step": 87288
    },
    {
      "epoch": 16.76,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 87300
    },
    {
      "epoch": 16.76,
      "learning_rate": 0.001,
      "loss": 2.6252,
      "step": 87312
    },
    {
      "epoch": 16.77,
      "learning_rate": 0.001,
      "loss": 2.6214,
      "step": 87324
    },
    {
      "epoch": 16.77,
      "learning_rate": 0.001,
      "loss": 2.6312,
      "step": 87336
    },
    {
      "epoch": 16.77,
      "learning_rate": 0.001,
      "loss": 2.6291,
      "step": 87348
    },
    {
      "epoch": 16.77,
      "learning_rate": 0.001,
      "loss": 2.6267,
      "step": 87360
    },
    {
      "epoch": 16.78,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 87372
    },
    {
      "epoch": 16.78,
      "learning_rate": 0.001,
      "loss": 2.6312,
      "step": 87384
    },
    {
      "epoch": 16.78,
      "learning_rate": 0.001,
      "loss": 2.6265,
      "step": 87396
    },
    {
      "epoch": 16.78,
      "learning_rate": 0.001,
      "loss": 2.6306,
      "step": 87408
    },
    {
      "epoch": 16.79,
      "learning_rate": 0.001,
      "loss": 2.6298,
      "step": 87420
    },
    {
      "epoch": 16.79,
      "learning_rate": 0.001,
      "loss": 2.6242,
      "step": 87432
    },
    {
      "epoch": 16.79,
      "learning_rate": 0.001,
      "loss": 2.6342,
      "step": 87444
    },
    {
      "epoch": 16.79,
      "learning_rate": 0.001,
      "loss": 2.6242,
      "step": 87456
    },
    {
      "epoch": 16.79,
      "learning_rate": 0.001,
      "loss": 2.6282,
      "step": 87468
    },
    {
      "epoch": 16.8,
      "learning_rate": 0.001,
      "loss": 2.6298,
      "step": 87480
    },
    {
      "epoch": 16.8,
      "learning_rate": 0.001,
      "loss": 2.6309,
      "step": 87492
    },
    {
      "epoch": 16.8,
      "eval_ag_news_accuracy": 0.315875,
      "eval_ag_news_bleu_score": 4.657084491866013,
      "eval_ag_news_bleu_score_sem": 0.14432158159014946,
      "eval_ag_news_emb_cos_sim": 0.7948165535926819,
      "eval_ag_news_emb_cos_sim_sem": 0.006874925440468204,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6305880546569824,
      "eval_ag_news_n_ngrams_match_1": 13.744,
      "eval_ag_news_n_ngrams_match_2": 2.902,
      "eval_ag_news_n_ngrams_match_3": 0.8,
      "eval_ag_news_num_pred_words": 46.338,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.735000336601885,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3421852298285454,
      "eval_ag_news_runtime": 11.5052,
      "eval_ag_news_samples_per_second": 43.459,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.3420956438696702,
      "eval_ag_news_token_set_f1_sem": 0.004355803102029173,
      "eval_ag_news_token_set_precision": 0.3268219834945953,
      "eval_ag_news_token_set_recall": 0.3716286075695179,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 87500
    },
    {
      "epoch": 16.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.112125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.931454337216971,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11134417270026975,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6524926424026489,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009885640782817376,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.284168004989624,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.072,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.814,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.632,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.672,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.68677181483051,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21189186707560131,
      "eval_anthropic_toxic_prompts_runtime": 10.2872,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.604,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3447543818932813,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006533334158630998,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42872770985861525,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3171399078838364,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 87500
    },
    {
      "epoch": 16.8,
      "eval_arxiv_accuracy": 0.33959375,
      "eval_arxiv_bleu_score": 4.250707398541021,
      "eval_arxiv_bleu_score_sem": 0.1153725770113524,
      "eval_arxiv_emb_cos_sim": 0.753612756729126,
      "eval_arxiv_emb_cos_sim_sem": 0.00784480476713385,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4876291751861572,
      "eval_arxiv_n_ngrams_match_1": 14.866,
      "eval_arxiv_n_ngrams_match_2": 2.934,
      "eval_arxiv_n_ngrams_match_3": 0.632,
      "eval_arxiv_num_pred_words": 40.53,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.70831003688664,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35340056514613727,
      "eval_arxiv_runtime": 14.7361,
      "eval_arxiv_samples_per_second": 33.93,
      "eval_arxiv_steps_per_second": 0.068,
      "eval_arxiv_token_set_f1": 0.3462515136679858,
      "eval_arxiv_token_set_f1_sem": 0.004426609603788637,
      "eval_arxiv_token_set_precision": 0.298764043174962,
      "eval_arxiv_token_set_recall": 0.42918116469275047,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 87500
    },
    {
      "epoch": 16.8,
      "eval_python_code_alpaca_accuracy": 0.158,
      "eval_python_code_alpaca_bleu_score": 4.494220265961678,
      "eval_python_code_alpaca_bleu_score_sem": 0.14884272467127235,
      "eval_python_code_alpaca_emb_cos_sim": 0.7626599073410034,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008429878757083476,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.927032470703125,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.688,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.802,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.914,
      "eval_python_code_alpaca_num_pred_words": 43.178,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.672138082918018,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32646084283444116,
      "eval_python_code_alpaca_runtime": 11.4607,
      "eval_python_code_alpaca_samples_per_second": 43.627,
      "eval_python_code_alpaca_steps_per_second": 0.087,
      "eval_python_code_alpaca_token_set_f1": 0.46112390962471544,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005661878858625047,
      "eval_python_code_alpaca_token_set_precision": 0.52582671037576,
      "eval_python_code_alpaca_token_set_recall": 0.43302542089499885,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 87500
    },
    {
      "epoch": 16.8,
      "eval_wikibio_accuracy": 0.31640625,
      "eval_wikibio_bleu_score": 5.752457003825269,
      "eval_wikibio_bleu_score_sem": 0.20977092202575026,
      "eval_wikibio_emb_cos_sim": 0.7287917733192444,
      "eval_wikibio_emb_cos_sim_sem": 0.01227176880619345,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8216605186462402,
      "eval_wikibio_n_ngrams_match_1": 9.902,
      "eval_wikibio_n_ngrams_match_2": 3.216,
      "eval_wikibio_n_ngrams_match_3": 1.144,
      "eval_wikibio_num_pred_words": 35.52,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.67999786667487,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3464011932803648,
      "eval_wikibio_runtime": 11.3364,
      "eval_wikibio_samples_per_second": 44.106,
      "eval_wikibio_steps_per_second": 0.088,
      "eval_wikibio_token_set_f1": 0.31653711702892806,
      "eval_wikibio_token_set_f1_sem": 0.005471945999342703,
      "eval_wikibio_token_set_precision": 0.321491679501232,
      "eval_wikibio_token_set_recall": 0.32963470436920483,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 87500
    },
    {
      "epoch": 16.8,
      "eval_nq_accuracy": 0.51884375,
      "eval_nq_bleu_score": 11.16567350559356,
      "eval_nq_bleu_score_sem": 0.4698487845390746,
      "eval_nq_emb_cos_sim": 0.8192366361618042,
      "eval_nq_emb_cos_sim_sem": 0.007553513409095885,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.254143476486206,
      "eval_nq_n_ngrams_match_1": 22.45,
      "eval_nq_n_ngrams_match_2": 8.03,
      "eval_nq_n_ngrams_match_3": 3.676,
      "eval_nq_num_pred_words": 48.618,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.527129603917967,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43583982559655565,
      "eval_nq_runtime": 10.3907,
      "eval_nq_samples_per_second": 48.12,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.45002597107440806,
      "eval_nq_token_set_f1_sem": 0.004958619229636719,
      "eval_nq_token_set_precision": 0.4060904601311901,
      "eval_nq_token_set_recall": 0.5141355170620853,
      "eval_nq_true_num_tokens": 64.0,
      "step": 87500
    },
    {
      "epoch": 16.8,
      "learning_rate": 0.001,
      "loss": 2.628,
      "step": 87504
    },
    {
      "epoch": 16.8,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 87516
    },
    {
      "epoch": 16.81,
      "learning_rate": 0.001,
      "loss": 2.6279,
      "step": 87528
    },
    {
      "epoch": 16.81,
      "learning_rate": 0.001,
      "loss": 2.6366,
      "step": 87540
    },
    {
      "epoch": 16.81,
      "learning_rate": 0.001,
      "loss": 2.6243,
      "step": 87552
    },
    {
      "epoch": 16.81,
      "learning_rate": 0.001,
      "loss": 2.6164,
      "step": 87564
    },
    {
      "epoch": 16.82,
      "learning_rate": 0.001,
      "loss": 2.6303,
      "step": 87576
    },
    {
      "epoch": 16.82,
      "learning_rate": 0.001,
      "loss": 2.6373,
      "step": 87588
    },
    {
      "epoch": 16.82,
      "learning_rate": 0.001,
      "loss": 2.6401,
      "step": 87600
    },
    {
      "epoch": 16.82,
      "learning_rate": 0.001,
      "loss": 2.6297,
      "step": 87612
    },
    {
      "epoch": 16.82,
      "learning_rate": 0.001,
      "loss": 2.634,
      "step": 87624
    },
    {
      "epoch": 16.83,
      "learning_rate": 0.001,
      "loss": 2.6239,
      "step": 87636
    },
    {
      "epoch": 16.83,
      "learning_rate": 0.001,
      "loss": 2.6227,
      "step": 87648
    },
    {
      "epoch": 16.83,
      "learning_rate": 0.001,
      "loss": 2.6219,
      "step": 87660
    },
    {
      "epoch": 16.83,
      "learning_rate": 0.001,
      "loss": 2.6405,
      "step": 87672
    },
    {
      "epoch": 16.84,
      "learning_rate": 0.001,
      "loss": 2.6322,
      "step": 87684
    },
    {
      "epoch": 16.84,
      "learning_rate": 0.001,
      "loss": 2.6288,
      "step": 87696
    },
    {
      "epoch": 16.84,
      "learning_rate": 0.001,
      "loss": 2.6369,
      "step": 87708
    },
    {
      "epoch": 16.84,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 87720
    },
    {
      "epoch": 16.85,
      "learning_rate": 0.001,
      "loss": 2.6224,
      "step": 87732
    },
    {
      "epoch": 16.85,
      "learning_rate": 0.001,
      "loss": 2.6331,
      "step": 87744
    },
    {
      "epoch": 16.85,
      "learning_rate": 0.001,
      "loss": 2.6277,
      "step": 87756
    },
    {
      "epoch": 16.85,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 87768
    },
    {
      "epoch": 16.85,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 87780
    },
    {
      "epoch": 16.86,
      "learning_rate": 0.001,
      "loss": 2.6251,
      "step": 87792
    },
    {
      "epoch": 16.86,
      "learning_rate": 0.001,
      "loss": 2.62,
      "step": 87804
    },
    {
      "epoch": 16.86,
      "learning_rate": 0.001,
      "loss": 2.6306,
      "step": 87816
    },
    {
      "epoch": 16.86,
      "learning_rate": 0.001,
      "loss": 2.6291,
      "step": 87828
    },
    {
      "epoch": 16.87,
      "learning_rate": 0.001,
      "loss": 2.6276,
      "step": 87840
    },
    {
      "epoch": 16.87,
      "learning_rate": 0.001,
      "loss": 2.629,
      "step": 87852
    },
    {
      "epoch": 16.87,
      "learning_rate": 0.001,
      "loss": 2.6154,
      "step": 87864
    },
    {
      "epoch": 16.87,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 87876
    },
    {
      "epoch": 16.88,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 87888
    },
    {
      "epoch": 16.88,
      "learning_rate": 0.001,
      "loss": 2.6323,
      "step": 87900
    },
    {
      "epoch": 16.88,
      "learning_rate": 0.001,
      "loss": 2.6311,
      "step": 87912
    },
    {
      "epoch": 16.88,
      "learning_rate": 0.001,
      "loss": 2.6283,
      "step": 87924
    },
    {
      "epoch": 16.88,
      "learning_rate": 0.001,
      "loss": 2.6135,
      "step": 87936
    },
    {
      "epoch": 16.89,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 87948
    },
    {
      "epoch": 16.89,
      "learning_rate": 0.001,
      "loss": 2.6272,
      "step": 87960
    },
    {
      "epoch": 16.89,
      "learning_rate": 0.001,
      "loss": 2.6303,
      "step": 87972
    },
    {
      "epoch": 16.89,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 87984
    },
    {
      "epoch": 16.9,
      "learning_rate": 0.001,
      "loss": 2.6301,
      "step": 87996
    },
    {
      "epoch": 16.9,
      "learning_rate": 0.001,
      "loss": 2.631,
      "step": 88008
    },
    {
      "epoch": 16.9,
      "learning_rate": 0.001,
      "loss": 2.6372,
      "step": 88020
    },
    {
      "epoch": 16.9,
      "learning_rate": 0.001,
      "loss": 2.6262,
      "step": 88032
    },
    {
      "epoch": 16.91,
      "learning_rate": 0.001,
      "loss": 2.6334,
      "step": 88044
    },
    {
      "epoch": 16.91,
      "learning_rate": 0.001,
      "loss": 2.6346,
      "step": 88056
    },
    {
      "epoch": 16.91,
      "learning_rate": 0.001,
      "loss": 2.633,
      "step": 88068
    },
    {
      "epoch": 16.91,
      "learning_rate": 0.001,
      "loss": 2.6215,
      "step": 88080
    },
    {
      "epoch": 16.91,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 88092
    },
    {
      "epoch": 16.92,
      "learning_rate": 0.001,
      "loss": 2.6323,
      "step": 88104
    },
    {
      "epoch": 16.92,
      "learning_rate": 0.001,
      "loss": 2.6276,
      "step": 88116
    },
    {
      "epoch": 16.92,
      "eval_ag_news_accuracy": 0.315375,
      "eval_ag_news_bleu_score": 4.761302212620485,
      "eval_ag_news_bleu_score_sem": 0.14603135465010694,
      "eval_ag_news_emb_cos_sim": 0.7959756851196289,
      "eval_ag_news_emb_cos_sim_sem": 0.007182710045708595,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.621840238571167,
      "eval_ag_news_n_ngrams_match_1": 13.6,
      "eval_ag_news_n_ngrams_match_2": 2.924,
      "eval_ag_news_n_ngrams_match_3": 0.884,
      "eval_ag_news_num_pred_words": 46.356,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.40634111473802,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3368090266406085,
      "eval_ag_news_runtime": 15.577,
      "eval_ag_news_samples_per_second": 32.099,
      "eval_ag_news_steps_per_second": 0.064,
      "eval_ag_news_token_set_f1": 0.34362525760917834,
      "eval_ag_news_token_set_f1_sem": 0.004277298319728863,
      "eval_ag_news_token_set_precision": 0.3243572906241741,
      "eval_ag_news_token_set_recall": 0.3826852145542097,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 88125
    },
    {
      "epoch": 16.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.1130625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.967610211310427,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11480654972381421,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6533322334289551,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011938517626284858,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.305030584335327,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.978,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.774,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.662,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.214,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.249374980078517,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.859375,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2077508680093504,
      "eval_anthropic_toxic_prompts_runtime": 10.6444,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.973,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3402503238126781,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006429198441423453,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42224335670650015,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3110297696693973,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 88125
    },
    {
      "epoch": 16.92,
      "eval_arxiv_accuracy": 0.3396875,
      "eval_arxiv_bleu_score": 4.195958106552601,
      "eval_arxiv_bleu_score_sem": 0.12024509106008427,
      "eval_arxiv_emb_cos_sim": 0.7449824810028076,
      "eval_arxiv_emb_cos_sim_sem": 0.007879530477584286,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4940834045410156,
      "eval_arxiv_n_ngrams_match_1": 14.8,
      "eval_arxiv_n_ngrams_match_2": 2.834,
      "eval_arxiv_n_ngrams_match_3": 0.624,
      "eval_arxiv_num_pred_words": 40.88,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.920099706013254,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35012523894011904,
      "eval_arxiv_runtime": 12.8273,
      "eval_arxiv_samples_per_second": 38.979,
      "eval_arxiv_steps_per_second": 0.078,
      "eval_arxiv_token_set_f1": 0.3471188638055089,
      "eval_arxiv_token_set_f1_sem": 0.0040905597524204095,
      "eval_arxiv_token_set_precision": 0.29688315243872465,
      "eval_arxiv_token_set_recall": 0.43750842002303675,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 88125
    },
    {
      "epoch": 16.92,
      "eval_python_code_alpaca_accuracy": 0.1600625,
      "eval_python_code_alpaca_bleu_score": 4.494884433419647,
      "eval_python_code_alpaca_bleu_score_sem": 0.14699369630166378,
      "eval_python_code_alpaca_emb_cos_sim": 0.7556917667388916,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008510427311488367,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9216811656951904,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.598,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.85,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.908,
      "eval_python_code_alpaca_num_pred_words": 43.852,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.572484652646338,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32320164210912244,
      "eval_python_code_alpaca_runtime": 11.04,
      "eval_python_code_alpaca_samples_per_second": 45.29,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4687875625658835,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005874725154574615,
      "eval_python_code_alpaca_token_set_precision": 0.5253247145040438,
      "eval_python_code_alpaca_token_set_recall": 0.4462019786189241,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 88125
    },
    {
      "epoch": 16.92,
      "eval_wikibio_accuracy": 0.31334375,
      "eval_wikibio_bleu_score": 5.572736817357021,
      "eval_wikibio_bleu_score_sem": 0.19615787657738634,
      "eval_wikibio_emb_cos_sim": 0.745276927947998,
      "eval_wikibio_emb_cos_sim_sem": 0.008959668594132462,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.85088849067688,
      "eval_wikibio_n_ngrams_match_1": 9.876,
      "eval_wikibio_n_ngrams_match_2": 3.224,
      "eval_wikibio_n_ngrams_match_3": 1.174,
      "eval_wikibio_num_pred_words": 37.112,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.034834684163734,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3459333687339124,
      "eval_wikibio_runtime": 11.4967,
      "eval_wikibio_samples_per_second": 43.491,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.3111855269544557,
      "eval_wikibio_token_set_f1_sem": 0.005510770828876851,
      "eval_wikibio_token_set_precision": 0.3218343067096335,
      "eval_wikibio_token_set_recall": 0.31837329079144966,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 88125
    },
    {
      "epoch": 16.92,
      "eval_nq_accuracy": 0.5205,
      "eval_nq_bleu_score": 11.412114587487824,
      "eval_nq_bleu_score_sem": 0.47907245164006523,
      "eval_nq_emb_cos_sim": 0.8303395509719849,
      "eval_nq_emb_cos_sim_sem": 0.006778473694740039,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.252239465713501,
      "eval_nq_n_ngrams_match_1": 22.816,
      "eval_nq_n_ngrams_match_2": 8.274,
      "eval_nq_n_ngrams_match_3": 3.752,
      "eval_nq_num_pred_words": 49.164,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.509007104710902,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4398628779804204,
      "eval_nq_runtime": 11.2926,
      "eval_nq_samples_per_second": 44.277,
      "eval_nq_steps_per_second": 0.089,
      "eval_nq_token_set_f1": 0.45623160835884286,
      "eval_nq_token_set_f1_sem": 0.00501310246656571,
      "eval_nq_token_set_precision": 0.41365505697784927,
      "eval_nq_token_set_recall": 0.5164852625746719,
      "eval_nq_true_num_tokens": 64.0,
      "step": 88125
    },
    {
      "epoch": 16.92,
      "learning_rate": 0.001,
      "loss": 2.6295,
      "step": 88128
    },
    {
      "epoch": 16.92,
      "learning_rate": 0.001,
      "loss": 2.6264,
      "step": 88140
    },
    {
      "epoch": 16.93,
      "learning_rate": 0.001,
      "loss": 2.6211,
      "step": 88152
    },
    {
      "epoch": 16.93,
      "learning_rate": 0.001,
      "loss": 2.6302,
      "step": 88164
    },
    {
      "epoch": 16.93,
      "learning_rate": 0.001,
      "loss": 2.6378,
      "step": 88176
    },
    {
      "epoch": 16.93,
      "learning_rate": 0.001,
      "loss": 2.6219,
      "step": 88188
    },
    {
      "epoch": 16.94,
      "learning_rate": 0.001,
      "loss": 2.6288,
      "step": 88200
    },
    {
      "epoch": 16.94,
      "learning_rate": 0.001,
      "loss": 2.6334,
      "step": 88212
    },
    {
      "epoch": 16.94,
      "learning_rate": 0.001,
      "loss": 2.6375,
      "step": 88224
    },
    {
      "epoch": 16.94,
      "learning_rate": 0.001,
      "loss": 2.6304,
      "step": 88236
    },
    {
      "epoch": 16.94,
      "learning_rate": 0.001,
      "loss": 2.6238,
      "step": 88248
    },
    {
      "epoch": 16.95,
      "learning_rate": 0.001,
      "loss": 2.6209,
      "step": 88260
    },
    {
      "epoch": 16.95,
      "learning_rate": 0.001,
      "loss": 2.6316,
      "step": 88272
    },
    {
      "epoch": 16.95,
      "learning_rate": 0.001,
      "loss": 2.6296,
      "step": 88284
    },
    {
      "epoch": 16.95,
      "learning_rate": 0.001,
      "loss": 2.6196,
      "step": 88296
    },
    {
      "epoch": 16.96,
      "learning_rate": 0.001,
      "loss": 2.6317,
      "step": 88308
    },
    {
      "epoch": 16.96,
      "learning_rate": 0.001,
      "loss": 2.6374,
      "step": 88320
    },
    {
      "epoch": 16.96,
      "learning_rate": 0.001,
      "loss": 2.6376,
      "step": 88332
    },
    {
      "epoch": 16.96,
      "learning_rate": 0.001,
      "loss": 2.6301,
      "step": 88344
    },
    {
      "epoch": 16.97,
      "learning_rate": 0.001,
      "loss": 2.6365,
      "step": 88356
    },
    {
      "epoch": 16.97,
      "learning_rate": 0.001,
      "loss": 2.6311,
      "step": 88368
    },
    {
      "epoch": 16.97,
      "learning_rate": 0.001,
      "loss": 2.6262,
      "step": 88380
    },
    {
      "epoch": 16.97,
      "learning_rate": 0.001,
      "loss": 2.6271,
      "step": 88392
    },
    {
      "epoch": 16.97,
      "learning_rate": 0.001,
      "loss": 2.6346,
      "step": 88404
    },
    {
      "epoch": 16.98,
      "learning_rate": 0.001,
      "loss": 2.6248,
      "step": 88416
    },
    {
      "epoch": 16.98,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 88428
    },
    {
      "epoch": 16.98,
      "learning_rate": 0.001,
      "loss": 2.63,
      "step": 88440
    },
    {
      "epoch": 16.98,
      "learning_rate": 0.001,
      "loss": 2.6346,
      "step": 88452
    },
    {
      "epoch": 16.99,
      "learning_rate": 0.001,
      "loss": 2.6271,
      "step": 88464
    },
    {
      "epoch": 16.99,
      "learning_rate": 0.001,
      "loss": 2.6324,
      "step": 88476
    },
    {
      "epoch": 16.99,
      "learning_rate": 0.001,
      "loss": 2.6363,
      "step": 88488
    },
    {
      "epoch": 16.99,
      "learning_rate": 0.001,
      "loss": 2.6304,
      "step": 88500
    },
    {
      "epoch": 17.0,
      "learning_rate": 0.001,
      "loss": 2.6291,
      "step": 88512
    },
    {
      "epoch": 17.0,
      "learning_rate": 0.001,
      "loss": 2.6306,
      "step": 88524
    },
    {
      "epoch": 17.0,
      "learning_rate": 0.001,
      "loss": 2.6415,
      "step": 88536
    },
    {
      "epoch": 17.0,
      "learning_rate": 0.001,
      "loss": 2.602,
      "step": 88548
    },
    {
      "epoch": 17.0,
      "learning_rate": 0.001,
      "loss": 2.6149,
      "step": 88560
    },
    {
      "epoch": 17.01,
      "learning_rate": 0.001,
      "loss": 2.6113,
      "step": 88572
    },
    {
      "epoch": 17.01,
      "learning_rate": 0.001,
      "loss": 2.6142,
      "step": 88584
    },
    {
      "epoch": 17.01,
      "learning_rate": 0.001,
      "loss": 2.6126,
      "step": 88596
    },
    {
      "epoch": 17.01,
      "learning_rate": 0.001,
      "loss": 2.6136,
      "step": 88608
    },
    {
      "epoch": 17.02,
      "learning_rate": 0.001,
      "loss": 2.6139,
      "step": 88620
    },
    {
      "epoch": 17.02,
      "learning_rate": 0.001,
      "loss": 2.6216,
      "step": 88632
    },
    {
      "epoch": 17.02,
      "learning_rate": 0.001,
      "loss": 2.6034,
      "step": 88644
    },
    {
      "epoch": 17.02,
      "learning_rate": 0.001,
      "loss": 2.6078,
      "step": 88656
    },
    {
      "epoch": 17.03,
      "learning_rate": 0.001,
      "loss": 2.6058,
      "step": 88668
    },
    {
      "epoch": 17.03,
      "learning_rate": 0.001,
      "loss": 2.6126,
      "step": 88680
    },
    {
      "epoch": 17.03,
      "learning_rate": 0.001,
      "loss": 2.6021,
      "step": 88692
    },
    {
      "epoch": 17.03,
      "learning_rate": 0.001,
      "loss": 2.6197,
      "step": 88704
    },
    {
      "epoch": 17.03,
      "learning_rate": 0.001,
      "loss": 2.6012,
      "step": 88716
    },
    {
      "epoch": 17.04,
      "learning_rate": 0.001,
      "loss": 2.5997,
      "step": 88728
    },
    {
      "epoch": 17.04,
      "learning_rate": 0.001,
      "loss": 2.6143,
      "step": 88740
    },
    {
      "epoch": 17.04,
      "eval_ag_news_accuracy": 0.3156875,
      "eval_ag_news_bleu_score": 4.6675147192186595,
      "eval_ag_news_bleu_score_sem": 0.1491365633011962,
      "eval_ag_news_emb_cos_sim": 0.8023483753204346,
      "eval_ag_news_emb_cos_sim_sem": 0.007222164105726348,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.63279390335083,
      "eval_ag_news_n_ngrams_match_1": 13.742,
      "eval_ag_news_n_ngrams_match_2": 2.93,
      "eval_ag_news_n_ngrams_match_3": 0.828,
      "eval_ag_news_num_pred_words": 46.644,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.81832991023377,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33985919669849074,
      "eval_ag_news_runtime": 11.646,
      "eval_ag_news_samples_per_second": 42.933,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.3448987879333023,
      "eval_ag_news_token_set_f1_sem": 0.004097661930618755,
      "eval_ag_news_token_set_precision": 0.32733849255868835,
      "eval_ag_news_token_set_recall": 0.3814584780406689,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 88750
    },
    {
      "epoch": 17.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.11065625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.987299517984093,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11847478846882847,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6587800979614258,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009545800865106417,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.337388753890991,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.004,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.656,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.826,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 28.145535744470337,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20851663610810683,
      "eval_anthropic_toxic_prompts_runtime": 9.6327,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.907,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.104,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3444497014907716,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006509039386899776,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42286864078268754,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31930339771790134,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 88750
    },
    {
      "epoch": 17.04,
      "eval_arxiv_accuracy": 0.34165625,
      "eval_arxiv_bleu_score": 4.162906050002206,
      "eval_arxiv_bleu_score_sem": 0.1149602719791407,
      "eval_arxiv_emb_cos_sim": 0.7473907470703125,
      "eval_arxiv_emb_cos_sim_sem": 0.007669048457576373,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.48599910736084,
      "eval_arxiv_n_ngrams_match_1": 14.626,
      "eval_arxiv_n_ngrams_match_2": 2.796,
      "eval_arxiv_n_ngrams_match_3": 0.618,
      "eval_arxiv_num_pred_words": 40.098,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.655036704443724,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34754595692208085,
      "eval_arxiv_runtime": 10.1581,
      "eval_arxiv_samples_per_second": 49.222,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3436310725683357,
      "eval_arxiv_token_set_f1_sem": 0.004235325662792161,
      "eval_arxiv_token_set_precision": 0.29311578404507777,
      "eval_arxiv_token_set_recall": 0.4329247457494549,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 88750
    },
    {
      "epoch": 17.04,
      "eval_python_code_alpaca_accuracy": 0.15725,
      "eval_python_code_alpaca_bleu_score": 4.575335067226012,
      "eval_python_code_alpaca_bleu_score_sem": 0.15995974271436172,
      "eval_python_code_alpaca_emb_cos_sim": 0.7575218677520752,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007072569135292994,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.940490484237671,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.76,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.866,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.948,
      "eval_python_code_alpaca_num_pred_words": 44.686,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.92512651242331,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32253498688181537,
      "eval_python_code_alpaca_runtime": 9.3878,
      "eval_python_code_alpaca_samples_per_second": 53.261,
      "eval_python_code_alpaca_steps_per_second": 0.107,
      "eval_python_code_alpaca_token_set_f1": 0.46720290052314467,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005382000646208587,
      "eval_python_code_alpaca_token_set_precision": 0.5306423808027152,
      "eval_python_code_alpaca_token_set_recall": 0.43688671655481814,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 88750
    },
    {
      "epoch": 17.04,
      "eval_wikibio_accuracy": 0.3148125,
      "eval_wikibio_bleu_score": 5.638334766672394,
      "eval_wikibio_bleu_score_sem": 0.20056598930431221,
      "eval_wikibio_emb_cos_sim": 0.7304085493087769,
      "eval_wikibio_emb_cos_sim_sem": 0.009793156767749807,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.82354474067688,
      "eval_wikibio_n_ngrams_match_1": 9.906,
      "eval_wikibio_n_ngrams_match_2": 3.216,
      "eval_wikibio_n_ngrams_match_3": 1.162,
      "eval_wikibio_num_pred_words": 36.65,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.766150264649106,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34350377350783534,
      "eval_wikibio_runtime": 9.4401,
      "eval_wikibio_samples_per_second": 52.965,
      "eval_wikibio_steps_per_second": 0.106,
      "eval_wikibio_token_set_f1": 0.31244307355739775,
      "eval_wikibio_token_set_f1_sem": 0.005371737001507651,
      "eval_wikibio_token_set_precision": 0.321914726382962,
      "eval_wikibio_token_set_recall": 0.3210492601335306,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 88750
    },
    {
      "epoch": 17.04,
      "eval_nq_accuracy": 0.51915625,
      "eval_nq_bleu_score": 11.011399486763613,
      "eval_nq_bleu_score_sem": 0.4680170479662032,
      "eval_nq_emb_cos_sim": 0.823344349861145,
      "eval_nq_emb_cos_sim_sem": 0.007228062688884349,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2518482208251953,
      "eval_nq_n_ngrams_match_1": 22.434,
      "eval_nq_n_ngrams_match_2": 7.982,
      "eval_nq_n_ngrams_match_3": 3.602,
      "eval_nq_num_pred_words": 48.856,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.505287481977458,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43349728674212645,
      "eval_nq_runtime": 10.1848,
      "eval_nq_samples_per_second": 49.093,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.4489598693999021,
      "eval_nq_token_set_f1_sem": 0.004888694817836205,
      "eval_nq_token_set_precision": 0.40569028540608576,
      "eval_nq_token_set_recall": 0.5106004254677634,
      "eval_nq_true_num_tokens": 64.0,
      "step": 88750
    },
    {
      "epoch": 17.04,
      "learning_rate": 0.001,
      "loss": 2.6088,
      "step": 88752
    },
    {
      "epoch": 17.04,
      "learning_rate": 0.001,
      "loss": 2.6073,
      "step": 88764
    },
    {
      "epoch": 17.05,
      "learning_rate": 0.001,
      "loss": 2.619,
      "step": 88776
    },
    {
      "epoch": 17.05,
      "learning_rate": 0.001,
      "loss": 2.6128,
      "step": 88788
    },
    {
      "epoch": 17.05,
      "learning_rate": 0.001,
      "loss": 2.6111,
      "step": 88800
    },
    {
      "epoch": 17.05,
      "learning_rate": 0.001,
      "loss": 2.6096,
      "step": 88812
    },
    {
      "epoch": 17.06,
      "learning_rate": 0.001,
      "loss": 2.6104,
      "step": 88824
    },
    {
      "epoch": 17.06,
      "learning_rate": 0.001,
      "loss": 2.6071,
      "step": 88836
    },
    {
      "epoch": 17.06,
      "learning_rate": 0.001,
      "loss": 2.612,
      "step": 88848
    },
    {
      "epoch": 17.06,
      "learning_rate": 0.001,
      "loss": 2.6068,
      "step": 88860
    },
    {
      "epoch": 17.06,
      "learning_rate": 0.001,
      "loss": 2.624,
      "step": 88872
    },
    {
      "epoch": 17.07,
      "learning_rate": 0.001,
      "loss": 2.6137,
      "step": 88884
    },
    {
      "epoch": 17.07,
      "learning_rate": 0.001,
      "loss": 2.6182,
      "step": 88896
    },
    {
      "epoch": 17.07,
      "learning_rate": 0.001,
      "loss": 2.6166,
      "step": 88908
    },
    {
      "epoch": 17.07,
      "learning_rate": 0.001,
      "loss": 2.6157,
      "step": 88920
    },
    {
      "epoch": 17.08,
      "learning_rate": 0.001,
      "loss": 2.6145,
      "step": 88932
    },
    {
      "epoch": 17.08,
      "learning_rate": 0.001,
      "loss": 2.62,
      "step": 88944
    },
    {
      "epoch": 17.08,
      "learning_rate": 0.001,
      "loss": 2.6154,
      "step": 88956
    },
    {
      "epoch": 17.08,
      "learning_rate": 0.001,
      "loss": 2.6093,
      "step": 88968
    },
    {
      "epoch": 17.09,
      "learning_rate": 0.001,
      "loss": 2.6231,
      "step": 88980
    },
    {
      "epoch": 17.09,
      "learning_rate": 0.001,
      "loss": 2.6107,
      "step": 88992
    },
    {
      "epoch": 17.09,
      "learning_rate": 0.001,
      "loss": 2.6167,
      "step": 89004
    },
    {
      "epoch": 17.09,
      "learning_rate": 0.001,
      "loss": 2.6161,
      "step": 89016
    },
    {
      "epoch": 17.09,
      "learning_rate": 0.001,
      "loss": 2.6162,
      "step": 89028
    },
    {
      "epoch": 17.1,
      "learning_rate": 0.001,
      "loss": 2.6138,
      "step": 89040
    },
    {
      "epoch": 17.1,
      "learning_rate": 0.001,
      "loss": 2.6074,
      "step": 89052
    },
    {
      "epoch": 17.1,
      "learning_rate": 0.001,
      "loss": 2.6065,
      "step": 89064
    },
    {
      "epoch": 17.1,
      "learning_rate": 0.001,
      "loss": 2.6103,
      "step": 89076
    },
    {
      "epoch": 17.11,
      "learning_rate": 0.001,
      "loss": 2.6114,
      "step": 89088
    },
    {
      "epoch": 17.11,
      "learning_rate": 0.001,
      "loss": 2.6103,
      "step": 89100
    },
    {
      "epoch": 17.11,
      "learning_rate": 0.001,
      "loss": 2.6131,
      "step": 89112
    },
    {
      "epoch": 17.11,
      "learning_rate": 0.001,
      "loss": 2.6238,
      "step": 89124
    },
    {
      "epoch": 17.12,
      "learning_rate": 0.001,
      "loss": 2.6013,
      "step": 89136
    },
    {
      "epoch": 17.12,
      "learning_rate": 0.001,
      "loss": 2.6099,
      "step": 89148
    },
    {
      "epoch": 17.12,
      "learning_rate": 0.001,
      "loss": 2.6164,
      "step": 89160
    },
    {
      "epoch": 17.12,
      "learning_rate": 0.001,
      "loss": 2.6151,
      "step": 89172
    },
    {
      "epoch": 17.12,
      "learning_rate": 0.001,
      "loss": 2.6078,
      "step": 89184
    },
    {
      "epoch": 17.13,
      "learning_rate": 0.001,
      "loss": 2.6167,
      "step": 89196
    },
    {
      "epoch": 17.13,
      "learning_rate": 0.001,
      "loss": 2.6181,
      "step": 89208
    },
    {
      "epoch": 17.13,
      "learning_rate": 0.001,
      "loss": 2.6267,
      "step": 89220
    },
    {
      "epoch": 17.13,
      "learning_rate": 0.001,
      "loss": 2.6087,
      "step": 89232
    },
    {
      "epoch": 17.14,
      "learning_rate": 0.001,
      "loss": 2.6252,
      "step": 89244
    },
    {
      "epoch": 17.14,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 89256
    },
    {
      "epoch": 17.14,
      "learning_rate": 0.001,
      "loss": 2.6178,
      "step": 89268
    },
    {
      "epoch": 17.14,
      "learning_rate": 0.001,
      "loss": 2.6094,
      "step": 89280
    },
    {
      "epoch": 17.15,
      "learning_rate": 0.001,
      "loss": 2.6052,
      "step": 89292
    },
    {
      "epoch": 17.15,
      "learning_rate": 0.001,
      "loss": 2.604,
      "step": 89304
    },
    {
      "epoch": 17.15,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 89316
    },
    {
      "epoch": 17.15,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 89328
    },
    {
      "epoch": 17.15,
      "learning_rate": 0.001,
      "loss": 2.6251,
      "step": 89340
    },
    {
      "epoch": 17.16,
      "learning_rate": 0.001,
      "loss": 2.6134,
      "step": 89352
    },
    {
      "epoch": 17.16,
      "learning_rate": 0.001,
      "loss": 2.6114,
      "step": 89364
    },
    {
      "epoch": 17.16,
      "eval_ag_news_accuracy": 0.31690625,
      "eval_ag_news_bleu_score": 4.694297971392216,
      "eval_ag_news_bleu_score_sem": 0.14418469822231234,
      "eval_ag_news_emb_cos_sim": 0.7981653213500977,
      "eval_ag_news_emb_cos_sim_sem": 0.0077829595353001765,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.63482666015625,
      "eval_ag_news_n_ngrams_match_1": 13.67,
      "eval_ag_news_n_ngrams_match_2": 2.96,
      "eval_ag_news_n_ngrams_match_3": 0.872,
      "eval_ag_news_num_pred_words": 46.352,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.89528356526304,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3386531153738642,
      "eval_ag_news_runtime": 9.8397,
      "eval_ag_news_samples_per_second": 50.814,
      "eval_ag_news_steps_per_second": 0.102,
      "eval_ag_news_token_set_f1": 0.3437386125918118,
      "eval_ag_news_token_set_f1_sem": 0.004223226291789117,
      "eval_ag_news_token_set_precision": 0.32422626731649756,
      "eval_ag_news_token_set_recall": 0.38526324839311427,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 89375
    },
    {
      "epoch": 17.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.11253125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.804244444386395,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.106576171121192,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6520543694496155,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010191310801642635,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.304748296737671,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.91,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.688,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.592,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.022,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.24168390507637,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20585357014902225,
      "eval_anthropic_toxic_prompts_runtime": 9.876,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.628,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3465561970843481,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006598468124445643,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4153035290859674,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32719297589939617,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 89375
    },
    {
      "epoch": 17.16,
      "eval_arxiv_accuracy": 0.33596875,
      "eval_arxiv_bleu_score": 4.1826069297446375,
      "eval_arxiv_bleu_score_sem": 0.12194631517516684,
      "eval_arxiv_emb_cos_sim": 0.7566304206848145,
      "eval_arxiv_emb_cos_sim_sem": 0.006831079547126714,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4935834407806396,
      "eval_arxiv_n_ngrams_match_1": 14.622,
      "eval_arxiv_n_ngrams_match_2": 2.816,
      "eval_arxiv_n_ngrams_match_3": 0.632,
      "eval_arxiv_num_pred_words": 40.368,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.90364496290266,
      "eval_arxiv_pred_num_tokens": 62.9296875,
      "eval_arxiv_rouge_score": 0.34803102864349195,
      "eval_arxiv_runtime": 10.1579,
      "eval_arxiv_samples_per_second": 49.223,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3420449944327993,
      "eval_arxiv_token_set_f1_sem": 0.004321572434466679,
      "eval_arxiv_token_set_precision": 0.29222614572001665,
      "eval_arxiv_token_set_recall": 0.4304774748558225,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 89375
    },
    {
      "epoch": 17.16,
      "eval_python_code_alpaca_accuracy": 0.15809375,
      "eval_python_code_alpaca_bleu_score": 4.121970189101972,
      "eval_python_code_alpaca_bleu_score_sem": 0.1261764158826135,
      "eval_python_code_alpaca_emb_cos_sim": 0.7409356832504272,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010704806271930604,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.932617425918579,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.44,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.642,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.784,
      "eval_python_code_alpaca_num_pred_words": 43.396,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.7767128888696,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31756882108525736,
      "eval_python_code_alpaca_runtime": 10.9009,
      "eval_python_code_alpaca_samples_per_second": 45.868,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.4612355606578279,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005953285676128513,
      "eval_python_code_alpaca_token_set_precision": 0.515498659707188,
      "eval_python_code_alpaca_token_set_recall": 0.44057147854144446,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 89375
    },
    {
      "epoch": 17.16,
      "eval_wikibio_accuracy": 0.3150625,
      "eval_wikibio_bleu_score": 5.533098819145493,
      "eval_wikibio_bleu_score_sem": 0.20389396864023673,
      "eval_wikibio_emb_cos_sim": 0.732222318649292,
      "eval_wikibio_emb_cos_sim_sem": 0.009723669126900994,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8134427070617676,
      "eval_wikibio_n_ngrams_match_1": 9.482,
      "eval_wikibio_n_ngrams_match_2": 3.156,
      "eval_wikibio_n_ngrams_match_3": 1.144,
      "eval_wikibio_num_pred_words": 35.762,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.30614647509766,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3319803673388446,
      "eval_wikibio_runtime": 9.9792,
      "eval_wikibio_samples_per_second": 50.104,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.2993725549059298,
      "eval_wikibio_token_set_f1_sem": 0.00593470346574923,
      "eval_wikibio_token_set_precision": 0.3055587030199752,
      "eval_wikibio_token_set_recall": 0.3096231074058095,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 89375
    },
    {
      "epoch": 17.16,
      "eval_nq_accuracy": 0.5186875,
      "eval_nq_bleu_score": 11.268336451329896,
      "eval_nq_bleu_score_sem": 0.47553609192078244,
      "eval_nq_emb_cos_sim": 0.8264628648757935,
      "eval_nq_emb_cos_sim_sem": 0.007219576684354484,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2470948696136475,
      "eval_nq_n_ngrams_match_1": 22.698,
      "eval_nq_n_ngrams_match_2": 8.252,
      "eval_nq_n_ngrams_match_3": 3.73,
      "eval_nq_num_pred_words": 49.194,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.460212725153399,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4379513194953767,
      "eval_nq_runtime": 10.5909,
      "eval_nq_samples_per_second": 47.21,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.45475631645148573,
      "eval_nq_token_set_f1_sem": 0.004952126246909426,
      "eval_nq_token_set_precision": 0.4104870432286527,
      "eval_nq_token_set_recall": 0.5182013174448464,
      "eval_nq_true_num_tokens": 64.0,
      "step": 89375
    },
    {
      "epoch": 17.16,
      "learning_rate": 0.001,
      "loss": 2.6117,
      "step": 89376
    },
    {
      "epoch": 17.16,
      "learning_rate": 0.001,
      "loss": 2.6114,
      "step": 89388
    },
    {
      "epoch": 17.17,
      "learning_rate": 0.001,
      "loss": 2.606,
      "step": 89400
    },
    {
      "epoch": 17.17,
      "learning_rate": 0.001,
      "loss": 2.6149,
      "step": 89412
    },
    {
      "epoch": 17.17,
      "learning_rate": 0.001,
      "loss": 2.6144,
      "step": 89424
    },
    {
      "epoch": 17.17,
      "learning_rate": 0.001,
      "loss": 2.6201,
      "step": 89436
    },
    {
      "epoch": 17.18,
      "learning_rate": 0.001,
      "loss": 2.6128,
      "step": 89448
    },
    {
      "epoch": 17.18,
      "learning_rate": 0.001,
      "loss": 2.6205,
      "step": 89460
    },
    {
      "epoch": 17.18,
      "learning_rate": 0.001,
      "loss": 2.6242,
      "step": 89472
    },
    {
      "epoch": 17.18,
      "learning_rate": 0.001,
      "loss": 2.6156,
      "step": 89484
    },
    {
      "epoch": 17.18,
      "learning_rate": 0.001,
      "loss": 2.6061,
      "step": 89496
    },
    {
      "epoch": 17.19,
      "learning_rate": 0.001,
      "loss": 2.6138,
      "step": 89508
    },
    {
      "epoch": 17.19,
      "learning_rate": 0.001,
      "loss": 2.6254,
      "step": 89520
    },
    {
      "epoch": 17.19,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 89532
    },
    {
      "epoch": 17.19,
      "learning_rate": 0.001,
      "loss": 2.6137,
      "step": 89544
    },
    {
      "epoch": 17.2,
      "learning_rate": 0.001,
      "loss": 2.6167,
      "step": 89556
    },
    {
      "epoch": 17.2,
      "learning_rate": 0.001,
      "loss": 2.6159,
      "step": 89568
    },
    {
      "epoch": 17.2,
      "learning_rate": 0.001,
      "loss": 2.6213,
      "step": 89580
    },
    {
      "epoch": 17.2,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 89592
    },
    {
      "epoch": 17.21,
      "learning_rate": 0.001,
      "loss": 2.6202,
      "step": 89604
    },
    {
      "epoch": 17.21,
      "learning_rate": 0.001,
      "loss": 2.6054,
      "step": 89616
    },
    {
      "epoch": 17.21,
      "learning_rate": 0.001,
      "loss": 2.6115,
      "step": 89628
    },
    {
      "epoch": 17.21,
      "learning_rate": 0.001,
      "loss": 2.6216,
      "step": 89640
    },
    {
      "epoch": 17.21,
      "learning_rate": 0.001,
      "loss": 2.621,
      "step": 89652
    },
    {
      "epoch": 17.22,
      "learning_rate": 0.001,
      "loss": 2.6148,
      "step": 89664
    },
    {
      "epoch": 17.22,
      "learning_rate": 0.001,
      "loss": 2.6173,
      "step": 89676
    },
    {
      "epoch": 17.22,
      "learning_rate": 0.001,
      "loss": 2.6144,
      "step": 89688
    },
    {
      "epoch": 17.22,
      "learning_rate": 0.001,
      "loss": 2.6248,
      "step": 89700
    },
    {
      "epoch": 17.23,
      "learning_rate": 0.001,
      "loss": 2.6197,
      "step": 89712
    },
    {
      "epoch": 17.23,
      "learning_rate": 0.001,
      "loss": 2.6108,
      "step": 89724
    },
    {
      "epoch": 17.23,
      "learning_rate": 0.001,
      "loss": 2.6146,
      "step": 89736
    },
    {
      "epoch": 17.23,
      "learning_rate": 0.001,
      "loss": 2.6135,
      "step": 89748
    },
    {
      "epoch": 17.24,
      "learning_rate": 0.001,
      "loss": 2.607,
      "step": 89760
    },
    {
      "epoch": 17.24,
      "learning_rate": 0.001,
      "loss": 2.6227,
      "step": 89772
    },
    {
      "epoch": 17.24,
      "learning_rate": 0.001,
      "loss": 2.6101,
      "step": 89784
    },
    {
      "epoch": 17.24,
      "learning_rate": 0.001,
      "loss": 2.6225,
      "step": 89796
    },
    {
      "epoch": 17.24,
      "learning_rate": 0.001,
      "loss": 2.6107,
      "step": 89808
    },
    {
      "epoch": 17.25,
      "learning_rate": 0.001,
      "loss": 2.6199,
      "step": 89820
    },
    {
      "epoch": 17.25,
      "learning_rate": 0.001,
      "loss": 2.6206,
      "step": 89832
    },
    {
      "epoch": 17.25,
      "learning_rate": 0.001,
      "loss": 2.6202,
      "step": 89844
    },
    {
      "epoch": 17.25,
      "learning_rate": 0.001,
      "loss": 2.607,
      "step": 89856
    },
    {
      "epoch": 17.26,
      "learning_rate": 0.001,
      "loss": 2.6057,
      "step": 89868
    },
    {
      "epoch": 17.26,
      "learning_rate": 0.001,
      "loss": 2.6111,
      "step": 89880
    },
    {
      "epoch": 17.26,
      "learning_rate": 0.001,
      "loss": 2.6072,
      "step": 89892
    },
    {
      "epoch": 17.26,
      "learning_rate": 0.001,
      "loss": 2.617,
      "step": 89904
    },
    {
      "epoch": 17.26,
      "learning_rate": 0.001,
      "loss": 2.6185,
      "step": 89916
    },
    {
      "epoch": 17.27,
      "learning_rate": 0.001,
      "loss": 2.6171,
      "step": 89928
    },
    {
      "epoch": 17.27,
      "learning_rate": 0.001,
      "loss": 2.6158,
      "step": 89940
    },
    {
      "epoch": 17.27,
      "learning_rate": 0.001,
      "loss": 2.6148,
      "step": 89952
    },
    {
      "epoch": 17.27,
      "learning_rate": 0.001,
      "loss": 2.6203,
      "step": 89964
    },
    {
      "epoch": 17.28,
      "learning_rate": 0.001,
      "loss": 2.6193,
      "step": 89976
    },
    {
      "epoch": 17.28,
      "learning_rate": 0.001,
      "loss": 2.6283,
      "step": 89988
    },
    {
      "epoch": 17.28,
      "learning_rate": 0.001,
      "loss": 2.6204,
      "step": 90000
    },
    {
      "epoch": 17.28,
      "eval_ag_news_accuracy": 0.31640625,
      "eval_ag_news_bleu_score": 4.7260294896508785,
      "eval_ag_news_bleu_score_sem": 0.14776170162887764,
      "eval_ag_news_emb_cos_sim": 0.795169472694397,
      "eval_ag_news_emb_cos_sim_sem": 0.007596495950587409,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6366219520568848,
      "eval_ag_news_n_ngrams_match_1": 13.712,
      "eval_ag_news_n_ngrams_match_2": 2.95,
      "eval_ag_news_n_ngrams_match_3": 0.852,
      "eval_ag_news_num_pred_words": 46.868,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.96337776711517,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3375020792953003,
      "eval_ag_news_runtime": 10.7034,
      "eval_ag_news_samples_per_second": 46.714,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.3427749737585387,
      "eval_ag_news_token_set_f1_sem": 0.004338805055273504,
      "eval_ag_news_token_set_precision": 0.3266052240158575,
      "eval_ag_news_token_set_recall": 0.3775923610504178,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 90000
    },
    {
      "epoch": 17.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.1096875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9373206136223606,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10876827192405612,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6542224884033203,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009752020126514159,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3060574531555176,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.042,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.806,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.646,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.884,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.27737088521623,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20829231220160277,
      "eval_anthropic_toxic_prompts_runtime": 19.0422,
      "eval_anthropic_toxic_prompts_samples_per_second": 26.257,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.053,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3478834925706295,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064968851025868505,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42644561128024494,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32165547307147596,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 90000
    },
    {
      "epoch": 17.28,
      "eval_arxiv_accuracy": 0.34090625,
      "eval_arxiv_bleu_score": 4.249045914147333,
      "eval_arxiv_bleu_score_sem": 0.12409575623378936,
      "eval_arxiv_emb_cos_sim": 0.7413960695266724,
      "eval_arxiv_emb_cos_sim_sem": 0.008937436790087473,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4815762042999268,
      "eval_arxiv_n_ngrams_match_1": 14.562,
      "eval_arxiv_n_ngrams_match_2": 2.82,
      "eval_arxiv_n_ngrams_match_3": 0.624,
      "eval_arxiv_num_pred_words": 40.066,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.51092557235896,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3487994924683863,
      "eval_arxiv_runtime": 10.0611,
      "eval_arxiv_samples_per_second": 49.697,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.34326679836964336,
      "eval_arxiv_token_set_f1_sem": 0.004248880201704165,
      "eval_arxiv_token_set_precision": 0.2917895595173516,
      "eval_arxiv_token_set_recall": 0.4298496292041814,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 90000
    },
    {
      "epoch": 17.28,
      "eval_python_code_alpaca_accuracy": 0.15740625,
      "eval_python_code_alpaca_bleu_score": 4.399076243455381,
      "eval_python_code_alpaca_bleu_score_sem": 0.13434979844042105,
      "eval_python_code_alpaca_emb_cos_sim": 0.7520021796226501,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009809162692016326,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9550557136535645,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.664,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.812,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.868,
      "eval_python_code_alpaca_num_pred_words": 43.576,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.202792547762346,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32213327366366057,
      "eval_python_code_alpaca_runtime": 9.7654,
      "eval_python_code_alpaca_samples_per_second": 51.201,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.45818732986380006,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005535650094250777,
      "eval_python_code_alpaca_token_set_precision": 0.5298475077770941,
      "eval_python_code_alpaca_token_set_recall": 0.4260776070673612,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 90000
    },
    {
      "epoch": 17.28,
      "eval_wikibio_accuracy": 0.31521875,
      "eval_wikibio_bleu_score": 5.514890354089845,
      "eval_wikibio_bleu_score_sem": 0.20984692919267317,
      "eval_wikibio_emb_cos_sim": 0.7312206625938416,
      "eval_wikibio_emb_cos_sim_sem": 0.009199000131946081,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8664937019348145,
      "eval_wikibio_n_ngrams_match_1": 9.498,
      "eval_wikibio_n_ngrams_match_2": 3.084,
      "eval_wikibio_n_ngrams_match_3": 1.116,
      "eval_wikibio_num_pred_words": 36.028,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 47.7745801459949,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3289124809013327,
      "eval_wikibio_runtime": 19.3745,
      "eval_wikibio_samples_per_second": 25.807,
      "eval_wikibio_steps_per_second": 0.052,
      "eval_wikibio_token_set_f1": 0.30423374802300884,
      "eval_wikibio_token_set_f1_sem": 0.005436087489321035,
      "eval_wikibio_token_set_precision": 0.30820377615406147,
      "eval_wikibio_token_set_recall": 0.321600153976657,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 90000
    },
    {
      "epoch": 17.28,
      "eval_nq_accuracy": 0.5193125,
      "eval_nq_bleu_score": 11.064626359348138,
      "eval_nq_bleu_score_sem": 0.4438811018196554,
      "eval_nq_emb_cos_sim": 0.8243062496185303,
      "eval_nq_emb_cos_sim_sem": 0.00740522433242186,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.250351667404175,
      "eval_nq_n_ngrams_match_1": 22.512,
      "eval_nq_n_ngrams_match_2": 8.146,
      "eval_nq_n_ngrams_match_3": 3.646,
      "eval_nq_num_pred_words": 48.812,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.491072950534342,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43625374932715244,
      "eval_nq_runtime": 10.2379,
      "eval_nq_samples_per_second": 48.838,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.4513238546504728,
      "eval_nq_token_set_f1_sem": 0.004952742793422712,
      "eval_nq_token_set_precision": 0.4073663963632933,
      "eval_nq_token_set_recall": 0.5142992826843792,
      "eval_nq_true_num_tokens": 64.0,
      "step": 90000
    },
    {
      "epoch": 17.28,
      "learning_rate": 0.001,
      "loss": 2.6264,
      "step": 90012
    },
    {
      "epoch": 17.29,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 90024
    },
    {
      "epoch": 17.29,
      "learning_rate": 0.001,
      "loss": 2.6117,
      "step": 90036
    },
    {
      "epoch": 17.29,
      "learning_rate": 0.001,
      "loss": 2.6142,
      "step": 90048
    },
    {
      "epoch": 17.29,
      "learning_rate": 0.001,
      "loss": 2.6216,
      "step": 90060
    },
    {
      "epoch": 17.29,
      "learning_rate": 0.001,
      "loss": 2.6087,
      "step": 90072
    },
    {
      "epoch": 17.3,
      "learning_rate": 0.001,
      "loss": 2.6109,
      "step": 90084
    },
    {
      "epoch": 17.3,
      "learning_rate": 0.001,
      "loss": 2.6042,
      "step": 90096
    },
    {
      "epoch": 17.3,
      "learning_rate": 0.001,
      "loss": 2.6122,
      "step": 90108
    },
    {
      "epoch": 17.3,
      "learning_rate": 0.001,
      "loss": 2.6001,
      "step": 90120
    },
    {
      "epoch": 17.31,
      "learning_rate": 0.001,
      "loss": 2.603,
      "step": 90132
    },
    {
      "epoch": 17.31,
      "learning_rate": 0.001,
      "loss": 2.6115,
      "step": 90144
    },
    {
      "epoch": 17.31,
      "learning_rate": 0.001,
      "loss": 2.6097,
      "step": 90156
    },
    {
      "epoch": 17.31,
      "learning_rate": 0.001,
      "loss": 2.6115,
      "step": 90168
    },
    {
      "epoch": 17.32,
      "learning_rate": 0.001,
      "loss": 2.6136,
      "step": 90180
    },
    {
      "epoch": 17.32,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 90192
    },
    {
      "epoch": 17.32,
      "learning_rate": 0.001,
      "loss": 2.6179,
      "step": 90204
    },
    {
      "epoch": 17.32,
      "learning_rate": 0.001,
      "loss": 2.6199,
      "step": 90216
    },
    {
      "epoch": 17.32,
      "learning_rate": 0.001,
      "loss": 2.6238,
      "step": 90228
    },
    {
      "epoch": 17.33,
      "learning_rate": 0.001,
      "loss": 2.6164,
      "step": 90240
    },
    {
      "epoch": 17.33,
      "learning_rate": 0.001,
      "loss": 2.6166,
      "step": 90252
    },
    {
      "epoch": 17.33,
      "learning_rate": 0.001,
      "loss": 2.6241,
      "step": 90264
    },
    {
      "epoch": 17.33,
      "learning_rate": 0.001,
      "loss": 2.6273,
      "step": 90276
    },
    {
      "epoch": 17.34,
      "learning_rate": 0.001,
      "loss": 2.6156,
      "step": 90288
    },
    {
      "epoch": 17.34,
      "learning_rate": 0.001,
      "loss": 2.6147,
      "step": 90300
    },
    {
      "epoch": 17.34,
      "learning_rate": 0.001,
      "loss": 2.6191,
      "step": 90312
    },
    {
      "epoch": 17.34,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 90324
    },
    {
      "epoch": 17.35,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 90336
    },
    {
      "epoch": 17.35,
      "learning_rate": 0.001,
      "loss": 2.6218,
      "step": 90348
    },
    {
      "epoch": 17.35,
      "learning_rate": 0.001,
      "loss": 2.6252,
      "step": 90360
    },
    {
      "epoch": 17.35,
      "learning_rate": 0.001,
      "loss": 2.6087,
      "step": 90372
    },
    {
      "epoch": 17.35,
      "learning_rate": 0.001,
      "loss": 2.6169,
      "step": 90384
    },
    {
      "epoch": 17.36,
      "learning_rate": 0.001,
      "loss": 2.6183,
      "step": 90396
    },
    {
      "epoch": 17.36,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 90408
    },
    {
      "epoch": 17.36,
      "learning_rate": 0.001,
      "loss": 2.6166,
      "step": 90420
    },
    {
      "epoch": 17.36,
      "learning_rate": 0.001,
      "loss": 2.6205,
      "step": 90432
    },
    {
      "epoch": 17.37,
      "learning_rate": 0.001,
      "loss": 2.6204,
      "step": 90444
    },
    {
      "epoch": 17.37,
      "learning_rate": 0.001,
      "loss": 2.6125,
      "step": 90456
    },
    {
      "epoch": 17.37,
      "learning_rate": 0.001,
      "loss": 2.6114,
      "step": 90468
    },
    {
      "epoch": 17.37,
      "learning_rate": 0.001,
      "loss": 2.6136,
      "step": 90480
    },
    {
      "epoch": 17.38,
      "learning_rate": 0.001,
      "loss": 2.6056,
      "step": 90492
    },
    {
      "epoch": 17.38,
      "learning_rate": 0.001,
      "loss": 2.6163,
      "step": 90504
    },
    {
      "epoch": 17.38,
      "learning_rate": 0.001,
      "loss": 2.6144,
      "step": 90516
    },
    {
      "epoch": 17.38,
      "learning_rate": 0.001,
      "loss": 2.62,
      "step": 90528
    },
    {
      "epoch": 17.38,
      "learning_rate": 0.001,
      "loss": 2.6213,
      "step": 90540
    },
    {
      "epoch": 17.39,
      "learning_rate": 0.001,
      "loss": 2.6146,
      "step": 90552
    },
    {
      "epoch": 17.39,
      "learning_rate": 0.001,
      "loss": 2.6233,
      "step": 90564
    },
    {
      "epoch": 17.39,
      "learning_rate": 0.001,
      "loss": 2.617,
      "step": 90576
    },
    {
      "epoch": 17.39,
      "learning_rate": 0.001,
      "loss": 2.6133,
      "step": 90588
    },
    {
      "epoch": 17.4,
      "learning_rate": 0.001,
      "loss": 2.6185,
      "step": 90600
    },
    {
      "epoch": 17.4,
      "learning_rate": 0.001,
      "loss": 2.6202,
      "step": 90612
    },
    {
      "epoch": 17.4,
      "learning_rate": 0.001,
      "loss": 2.6194,
      "step": 90624
    },
    {
      "epoch": 17.4,
      "eval_ag_news_accuracy": 0.316125,
      "eval_ag_news_bleu_score": 4.641612217623085,
      "eval_ag_news_bleu_score_sem": 0.1414392736342318,
      "eval_ag_news_emb_cos_sim": 0.7987799048423767,
      "eval_ag_news_emb_cos_sim_sem": 0.006944879989373176,
      "eval_ag_news_emb_top1_equal": 0.2890625,
      "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6240322589874268,
      "eval_ag_news_n_ngrams_match_1": 13.586,
      "eval_ag_news_n_ngrams_match_2": 2.906,
      "eval_ag_news_n_ngrams_match_3": 0.84,
      "eval_ag_news_num_pred_words": 46.346,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.4884265117241,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3361633313863822,
      "eval_ag_news_runtime": 10.892,
      "eval_ag_news_samples_per_second": 45.905,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.3420781776701675,
      "eval_ag_news_token_set_f1_sem": 0.004369658769599392,
      "eval_ag_news_token_set_precision": 0.32478240547633636,
      "eval_ag_news_token_set_recall": 0.3755447077282753,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 90625
    },
    {
      "epoch": 17.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.112125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9783182036535596,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11312830653422552,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6633878946304321,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008658115137026603,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2985990047454834,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.978,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.622,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.624,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.074680837948137,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2064232804810187,
      "eval_anthropic_toxic_prompts_runtime": 10.3476,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.32,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3449693000200593,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006635802474549386,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4181623826714901,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3224460625316401,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 90625
    },
    {
      "epoch": 17.4,
      "eval_arxiv_accuracy": 0.341375,
      "eval_arxiv_bleu_score": 4.157524255818762,
      "eval_arxiv_bleu_score_sem": 0.12181247753442959,
      "eval_arxiv_emb_cos_sim": 0.750731348991394,
      "eval_arxiv_emb_cos_sim_sem": 0.00741885686228161,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4882240295410156,
      "eval_arxiv_n_ngrams_match_1": 14.638,
      "eval_arxiv_n_ngrams_match_2": 2.78,
      "eval_arxiv_n_ngrams_match_3": 0.61,
      "eval_arxiv_num_pred_words": 40.312,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.72777250564538,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35108606850933954,
      "eval_arxiv_runtime": 11.2193,
      "eval_arxiv_samples_per_second": 44.566,
      "eval_arxiv_steps_per_second": 0.089,
      "eval_arxiv_token_set_f1": 0.34465462107517303,
      "eval_arxiv_token_set_f1_sem": 0.004016034905708582,
      "eval_arxiv_token_set_precision": 0.2946193091793197,
      "eval_arxiv_token_set_recall": 0.4323113110917569,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 90625
    },
    {
      "epoch": 17.4,
      "eval_python_code_alpaca_accuracy": 0.1571875,
      "eval_python_code_alpaca_bleu_score": 4.174079899995925,
      "eval_python_code_alpaca_bleu_score_sem": 0.13273837925236556,
      "eval_python_code_alpaca_emb_cos_sim": 0.7373853325843811,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009413866082743636,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9618608951568604,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.5,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.612,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.786,
      "eval_python_code_alpaca_num_pred_words": 43.024,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.333916692186193,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3193054661167657,
      "eval_python_code_alpaca_runtime": 11.4481,
      "eval_python_code_alpaca_samples_per_second": 43.675,
      "eval_python_code_alpaca_steps_per_second": 0.087,
      "eval_python_code_alpaca_token_set_f1": 0.4565497441294427,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005878026862612197,
      "eval_python_code_alpaca_token_set_precision": 0.5182756178373794,
      "eval_python_code_alpaca_token_set_recall": 0.43228177861391315,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 90625
    },
    {
      "epoch": 17.4,
      "eval_wikibio_accuracy": 0.317,
      "eval_wikibio_bleu_score": 5.748530180680383,
      "eval_wikibio_bleu_score_sem": 0.20994803427481568,
      "eval_wikibio_emb_cos_sim": 0.7392998337745667,
      "eval_wikibio_emb_cos_sim_sem": 0.009507575649140561,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.802290916442871,
      "eval_wikibio_n_ngrams_match_1": 9.934,
      "eval_wikibio_n_ngrams_match_2": 3.204,
      "eval_wikibio_n_ngrams_match_3": 1.154,
      "eval_wikibio_num_pred_words": 36.222,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.803708564072366,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3496231017465844,
      "eval_wikibio_runtime": 10.2385,
      "eval_wikibio_samples_per_second": 48.835,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.3143530622167457,
      "eval_wikibio_token_set_f1_sem": 0.0052439234527126895,
      "eval_wikibio_token_set_precision": 0.3220405710964759,
      "eval_wikibio_token_set_recall": 0.32165786468039875,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 90625
    },
    {
      "epoch": 17.4,
      "eval_nq_accuracy": 0.51896875,
      "eval_nq_bleu_score": 11.045099552220618,
      "eval_nq_bleu_score_sem": 0.45819793811311665,
      "eval_nq_emb_cos_sim": 0.8211745023727417,
      "eval_nq_emb_cos_sim_sem": 0.008357733889144953,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.246525287628174,
      "eval_nq_n_ngrams_match_1": 22.398,
      "eval_nq_n_ngrams_match_2": 8.124,
      "eval_nq_n_ngrams_match_3": 3.622,
      "eval_nq_num_pred_words": 48.816,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.454825892673409,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43370102382407916,
      "eval_nq_runtime": 15.2015,
      "eval_nq_samples_per_second": 32.891,
      "eval_nq_steps_per_second": 0.066,
      "eval_nq_token_set_f1": 0.44947621353492245,
      "eval_nq_token_set_f1_sem": 0.005011708707236234,
      "eval_nq_token_set_precision": 0.4052253033166563,
      "eval_nq_token_set_recall": 0.5162832689652473,
      "eval_nq_true_num_tokens": 64.0,
      "step": 90625
    },
    {
      "epoch": 17.4,
      "learning_rate": 0.001,
      "loss": 2.6188,
      "step": 90636
    },
    {
      "epoch": 17.41,
      "learning_rate": 0.001,
      "loss": 2.6241,
      "step": 90648
    },
    {
      "epoch": 17.41,
      "learning_rate": 0.001,
      "loss": 2.6115,
      "step": 90660
    },
    {
      "epoch": 17.41,
      "learning_rate": 0.001,
      "loss": 2.6143,
      "step": 90672
    },
    {
      "epoch": 17.41,
      "learning_rate": 0.001,
      "loss": 2.6123,
      "step": 90684
    },
    {
      "epoch": 17.41,
      "learning_rate": 0.001,
      "loss": 2.6159,
      "step": 90696
    },
    {
      "epoch": 17.42,
      "learning_rate": 0.001,
      "loss": 2.6135,
      "step": 90708
    },
    {
      "epoch": 17.42,
      "learning_rate": 0.001,
      "loss": 2.6188,
      "step": 90720
    },
    {
      "epoch": 17.42,
      "learning_rate": 0.001,
      "loss": 2.6207,
      "step": 90732
    },
    {
      "epoch": 17.42,
      "learning_rate": 0.001,
      "loss": 2.6234,
      "step": 90744
    },
    {
      "epoch": 17.43,
      "learning_rate": 0.001,
      "loss": 2.6194,
      "step": 90756
    },
    {
      "epoch": 17.43,
      "learning_rate": 0.001,
      "loss": 2.6203,
      "step": 90768
    },
    {
      "epoch": 17.43,
      "learning_rate": 0.001,
      "loss": 2.6155,
      "step": 90780
    },
    {
      "epoch": 17.43,
      "learning_rate": 0.001,
      "loss": 2.6092,
      "step": 90792
    },
    {
      "epoch": 17.44,
      "learning_rate": 0.001,
      "loss": 2.6288,
      "step": 90804
    },
    {
      "epoch": 17.44,
      "learning_rate": 0.001,
      "loss": 2.6211,
      "step": 90816
    },
    {
      "epoch": 17.44,
      "learning_rate": 0.001,
      "loss": 2.6159,
      "step": 90828
    },
    {
      "epoch": 17.44,
      "learning_rate": 0.001,
      "loss": 2.6085,
      "step": 90840
    },
    {
      "epoch": 17.44,
      "learning_rate": 0.001,
      "loss": 2.6203,
      "step": 90852
    },
    {
      "epoch": 17.45,
      "learning_rate": 0.001,
      "loss": 2.6155,
      "step": 90864
    },
    {
      "epoch": 17.45,
      "learning_rate": 0.001,
      "loss": 2.6127,
      "step": 90876
    },
    {
      "epoch": 17.45,
      "learning_rate": 0.001,
      "loss": 2.6183,
      "step": 90888
    },
    {
      "epoch": 17.45,
      "learning_rate": 0.001,
      "loss": 2.6188,
      "step": 90900
    },
    {
      "epoch": 17.46,
      "learning_rate": 0.001,
      "loss": 2.6184,
      "step": 90912
    },
    {
      "epoch": 17.46,
      "learning_rate": 0.001,
      "loss": 2.6121,
      "step": 90924
    },
    {
      "epoch": 17.46,
      "learning_rate": 0.001,
      "loss": 2.6129,
      "step": 90936
    },
    {
      "epoch": 17.46,
      "learning_rate": 0.001,
      "loss": 2.6132,
      "step": 90948
    },
    {
      "epoch": 17.47,
      "learning_rate": 0.001,
      "loss": 2.6081,
      "step": 90960
    },
    {
      "epoch": 17.47,
      "learning_rate": 0.001,
      "loss": 2.6108,
      "step": 90972
    },
    {
      "epoch": 17.47,
      "learning_rate": 0.001,
      "loss": 2.6123,
      "step": 90984
    },
    {
      "epoch": 17.47,
      "learning_rate": 0.001,
      "loss": 2.6201,
      "step": 90996
    },
    {
      "epoch": 17.47,
      "learning_rate": 0.001,
      "loss": 2.6215,
      "step": 91008
    },
    {
      "epoch": 17.48,
      "learning_rate": 0.001,
      "loss": 2.6179,
      "step": 91020
    },
    {
      "epoch": 17.48,
      "learning_rate": 0.001,
      "loss": 2.6265,
      "step": 91032
    },
    {
      "epoch": 17.48,
      "learning_rate": 0.001,
      "loss": 2.6203,
      "step": 91044
    },
    {
      "epoch": 17.48,
      "learning_rate": 0.001,
      "loss": 2.6185,
      "step": 91056
    },
    {
      "epoch": 17.49,
      "learning_rate": 0.001,
      "loss": 2.6203,
      "step": 91068
    },
    {
      "epoch": 17.49,
      "learning_rate": 0.001,
      "loss": 2.6193,
      "step": 91080
    },
    {
      "epoch": 17.49,
      "learning_rate": 0.001,
      "loss": 2.6171,
      "step": 91092
    },
    {
      "epoch": 17.49,
      "learning_rate": 0.001,
      "loss": 2.6078,
      "step": 91104
    },
    {
      "epoch": 17.5,
      "learning_rate": 0.001,
      "loss": 2.6086,
      "step": 91116
    },
    {
      "epoch": 17.5,
      "learning_rate": 0.001,
      "loss": 2.6207,
      "step": 91128
    },
    {
      "epoch": 17.5,
      "learning_rate": 0.001,
      "loss": 2.6108,
      "step": 91140
    },
    {
      "epoch": 17.5,
      "learning_rate": 0.001,
      "loss": 2.6211,
      "step": 91152
    },
    {
      "epoch": 17.5,
      "learning_rate": 0.001,
      "loss": 2.622,
      "step": 91164
    },
    {
      "epoch": 17.51,
      "learning_rate": 0.001,
      "loss": 2.611,
      "step": 91176
    },
    {
      "epoch": 17.51,
      "learning_rate": 0.001,
      "loss": 2.6149,
      "step": 91188
    },
    {
      "epoch": 17.51,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 91200
    },
    {
      "epoch": 17.51,
      "learning_rate": 0.001,
      "loss": 2.6215,
      "step": 91212
    },
    {
      "epoch": 17.52,
      "learning_rate": 0.001,
      "loss": 2.6143,
      "step": 91224
    },
    {
      "epoch": 17.52,
      "learning_rate": 0.001,
      "loss": 2.6056,
      "step": 91236
    },
    {
      "epoch": 17.52,
      "learning_rate": 0.001,
      "loss": 2.6288,
      "step": 91248
    },
    {
      "epoch": 17.52,
      "eval_ag_news_accuracy": 0.31796875,
      "eval_ag_news_bleu_score": 4.6029982173523045,
      "eval_ag_news_bleu_score_sem": 0.1519063419266466,
      "eval_ag_news_emb_cos_sim": 0.7865685224533081,
      "eval_ag_news_emb_cos_sim_sem": 0.008511251150667852,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6188931465148926,
      "eval_ag_news_n_ngrams_match_1": 13.436,
      "eval_ag_news_n_ngrams_match_2": 2.906,
      "eval_ag_news_n_ngrams_match_3": 0.826,
      "eval_ag_news_num_pred_words": 46.484,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.296263468135336,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33267937227613475,
      "eval_ag_news_runtime": 10.3221,
      "eval_ag_news_samples_per_second": 48.44,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3398246223304076,
      "eval_ag_news_token_set_f1_sem": 0.004511663921237267,
      "eval_ag_news_token_set_precision": 0.3192426771543661,
      "eval_ag_news_token_set_recall": 0.38440076752259994,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 91250
    },
    {
      "epoch": 17.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.1125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.905474631709576,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11543800793860712,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6496694087982178,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009517883846324014,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.286526918411255,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.842,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.702,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.618,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.7,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.749797906365554,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20415289339524428,
      "eval_anthropic_toxic_prompts_runtime": 9.824,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.896,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3449498213857672,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006677122648178251,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41048496771632176,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3307222472771074,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 91250
    },
    {
      "epoch": 17.52,
      "eval_arxiv_accuracy": 0.3414375,
      "eval_arxiv_bleu_score": 4.060234639688883,
      "eval_arxiv_bleu_score_sem": 0.1168993043894669,
      "eval_arxiv_emb_cos_sim": 0.7421520948410034,
      "eval_arxiv_emb_cos_sim_sem": 0.007438515074389402,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4817111492156982,
      "eval_arxiv_n_ngrams_match_1": 14.06,
      "eval_arxiv_n_ngrams_match_2": 2.71,
      "eval_arxiv_n_ngrams_match_3": 0.622,
      "eval_arxiv_num_pred_words": 38.91,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.51531305249939,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3380762485498364,
      "eval_arxiv_runtime": 10.0892,
      "eval_arxiv_samples_per_second": 49.558,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.33548037728916624,
      "eval_arxiv_token_set_f1_sem": 0.00429884608294297,
      "eval_arxiv_token_set_precision": 0.279878095000346,
      "eval_arxiv_token_set_recall": 0.44572049453007173,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 91250
    },
    {
      "epoch": 17.52,
      "eval_python_code_alpaca_accuracy": 0.15803125,
      "eval_python_code_alpaca_bleu_score": 4.292263844847255,
      "eval_python_code_alpaca_bleu_score_sem": 0.14895782797027105,
      "eval_python_code_alpaca_emb_cos_sim": 0.7248610258102417,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011929689178610842,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9378304481506348,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.314,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.648,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.846,
      "eval_python_code_alpaca_num_pred_words": 42.422,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.87485188874955,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3164786315472602,
      "eval_python_code_alpaca_runtime": 10.1487,
      "eval_python_code_alpaca_samples_per_second": 49.268,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.45986225768384253,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006071605000027662,
      "eval_python_code_alpaca_token_set_precision": 0.5071217546987413,
      "eval_python_code_alpaca_token_set_recall": 0.4501021686448808,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 91250
    },
    {
      "epoch": 17.52,
      "eval_wikibio_accuracy": 0.3185625,
      "eval_wikibio_bleu_score": 5.86293047052854,
      "eval_wikibio_bleu_score_sem": 0.2097571039965183,
      "eval_wikibio_emb_cos_sim": 0.730091392993927,
      "eval_wikibio_emb_cos_sim_sem": 0.00889433636912902,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7917020320892334,
      "eval_wikibio_n_ngrams_match_1": 9.942,
      "eval_wikibio_n_ngrams_match_2": 3.288,
      "eval_wikibio_n_ngrams_match_3": 1.204,
      "eval_wikibio_num_pred_words": 36.34,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.33179022926705,
      "eval_wikibio_pred_num_tokens": 62.953125,
      "eval_wikibio_rouge_score": 0.34520514630277793,
      "eval_wikibio_runtime": 10.3427,
      "eval_wikibio_samples_per_second": 48.343,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.31497136088705324,
      "eval_wikibio_token_set_f1_sem": 0.005374250899187345,
      "eval_wikibio_token_set_precision": 0.3209662348006424,
      "eval_wikibio_token_set_recall": 0.32667448256406834,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 91250
    },
    {
      "epoch": 17.52,
      "eval_nq_accuracy": 0.51715625,
      "eval_nq_bleu_score": 11.041352901905567,
      "eval_nq_bleu_score_sem": 0.44647207818452034,
      "eval_nq_emb_cos_sim": 0.8222669959068298,
      "eval_nq_emb_cos_sim_sem": 0.008171162303132877,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2518060207366943,
      "eval_nq_n_ngrams_match_1": 22.432,
      "eval_nq_n_ngrams_match_2": 8.094,
      "eval_nq_n_ngrams_match_3": 3.67,
      "eval_nq_num_pred_words": 48.786,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.504886366468106,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.434008837293269,
      "eval_nq_runtime": 11.9339,
      "eval_nq_samples_per_second": 41.898,
      "eval_nq_steps_per_second": 0.084,
      "eval_nq_token_set_f1": 0.45168001599255414,
      "eval_nq_token_set_f1_sem": 0.004802899756250061,
      "eval_nq_token_set_precision": 0.4046170641156355,
      "eval_nq_token_set_recall": 0.5225387277168005,
      "eval_nq_true_num_tokens": 64.0,
      "step": 91250
    },
    {
      "epoch": 17.52,
      "learning_rate": 0.001,
      "loss": 2.6229,
      "step": 91260
    },
    {
      "epoch": 17.53,
      "learning_rate": 0.001,
      "loss": 2.6157,
      "step": 91272
    },
    {
      "epoch": 17.53,
      "learning_rate": 0.001,
      "loss": 2.628,
      "step": 91284
    },
    {
      "epoch": 17.53,
      "learning_rate": 0.001,
      "loss": 2.6244,
      "step": 91296
    },
    {
      "epoch": 17.53,
      "learning_rate": 0.001,
      "loss": 2.6275,
      "step": 91308
    },
    {
      "epoch": 17.53,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 91320
    },
    {
      "epoch": 17.54,
      "learning_rate": 0.001,
      "loss": 2.62,
      "step": 91332
    },
    {
      "epoch": 17.54,
      "learning_rate": 0.001,
      "loss": 2.6294,
      "step": 91344
    },
    {
      "epoch": 17.54,
      "learning_rate": 0.001,
      "loss": 2.6266,
      "step": 91356
    },
    {
      "epoch": 17.54,
      "learning_rate": 0.001,
      "loss": 2.6319,
      "step": 91368
    },
    {
      "epoch": 17.55,
      "learning_rate": 0.001,
      "loss": 2.6238,
      "step": 91380
    },
    {
      "epoch": 17.55,
      "learning_rate": 0.001,
      "loss": 2.6186,
      "step": 91392
    },
    {
      "epoch": 17.55,
      "learning_rate": 0.001,
      "loss": 2.624,
      "step": 91404
    },
    {
      "epoch": 17.55,
      "learning_rate": 0.001,
      "loss": 2.6316,
      "step": 91416
    },
    {
      "epoch": 17.56,
      "learning_rate": 0.001,
      "loss": 2.62,
      "step": 91428
    },
    {
      "epoch": 17.56,
      "learning_rate": 0.001,
      "loss": 2.6328,
      "step": 91440
    },
    {
      "epoch": 17.56,
      "learning_rate": 0.001,
      "loss": 2.6154,
      "step": 91452
    },
    {
      "epoch": 17.56,
      "learning_rate": 0.001,
      "loss": 2.6239,
      "step": 91464
    },
    {
      "epoch": 17.56,
      "learning_rate": 0.001,
      "loss": 2.606,
      "step": 91476
    },
    {
      "epoch": 17.57,
      "learning_rate": 0.001,
      "loss": 2.6158,
      "step": 91488
    },
    {
      "epoch": 17.57,
      "learning_rate": 0.001,
      "loss": 2.6171,
      "step": 91500
    },
    {
      "epoch": 17.57,
      "learning_rate": 0.001,
      "loss": 2.6214,
      "step": 91512
    },
    {
      "epoch": 17.57,
      "learning_rate": 0.001,
      "loss": 2.6246,
      "step": 91524
    },
    {
      "epoch": 17.58,
      "learning_rate": 0.001,
      "loss": 2.6191,
      "step": 91536
    },
    {
      "epoch": 17.58,
      "learning_rate": 0.001,
      "loss": 2.6225,
      "step": 91548
    },
    {
      "epoch": 17.58,
      "learning_rate": 0.001,
      "loss": 2.6137,
      "step": 91560
    },
    {
      "epoch": 17.58,
      "learning_rate": 0.001,
      "loss": 2.6188,
      "step": 91572
    },
    {
      "epoch": 17.59,
      "learning_rate": 0.001,
      "loss": 2.6293,
      "step": 91584
    },
    {
      "epoch": 17.59,
      "learning_rate": 0.001,
      "loss": 2.6309,
      "step": 91596
    },
    {
      "epoch": 17.59,
      "learning_rate": 0.001,
      "loss": 2.6172,
      "step": 91608
    },
    {
      "epoch": 17.59,
      "learning_rate": 0.001,
      "loss": 2.6273,
      "step": 91620
    },
    {
      "epoch": 17.59,
      "learning_rate": 0.001,
      "loss": 2.618,
      "step": 91632
    },
    {
      "epoch": 17.6,
      "learning_rate": 0.001,
      "loss": 2.618,
      "step": 91644
    },
    {
      "epoch": 17.6,
      "learning_rate": 0.001,
      "loss": 2.6259,
      "step": 91656
    },
    {
      "epoch": 17.6,
      "learning_rate": 0.001,
      "loss": 2.6216,
      "step": 91668
    },
    {
      "epoch": 17.6,
      "learning_rate": 0.001,
      "loss": 2.6123,
      "step": 91680
    },
    {
      "epoch": 17.61,
      "learning_rate": 0.001,
      "loss": 2.6238,
      "step": 91692
    },
    {
      "epoch": 17.61,
      "learning_rate": 0.001,
      "loss": 2.6191,
      "step": 91704
    },
    {
      "epoch": 17.61,
      "learning_rate": 0.001,
      "loss": 2.6136,
      "step": 91716
    },
    {
      "epoch": 17.61,
      "learning_rate": 0.001,
      "loss": 2.6225,
      "step": 91728
    },
    {
      "epoch": 17.62,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 91740
    },
    {
      "epoch": 17.62,
      "learning_rate": 0.001,
      "loss": 2.6243,
      "step": 91752
    },
    {
      "epoch": 17.62,
      "learning_rate": 0.001,
      "loss": 2.6137,
      "step": 91764
    },
    {
      "epoch": 17.62,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 91776
    },
    {
      "epoch": 17.62,
      "learning_rate": 0.001,
      "loss": 2.6165,
      "step": 91788
    },
    {
      "epoch": 17.63,
      "learning_rate": 0.001,
      "loss": 2.6198,
      "step": 91800
    },
    {
      "epoch": 17.63,
      "learning_rate": 0.001,
      "loss": 2.6106,
      "step": 91812
    },
    {
      "epoch": 17.63,
      "learning_rate": 0.001,
      "loss": 2.6174,
      "step": 91824
    },
    {
      "epoch": 17.63,
      "learning_rate": 0.001,
      "loss": 2.6133,
      "step": 91836
    },
    {
      "epoch": 17.64,
      "learning_rate": 0.001,
      "loss": 2.6168,
      "step": 91848
    },
    {
      "epoch": 17.64,
      "learning_rate": 0.001,
      "loss": 2.6194,
      "step": 91860
    },
    {
      "epoch": 17.64,
      "learning_rate": 0.001,
      "loss": 2.6125,
      "step": 91872
    },
    {
      "epoch": 17.64,
      "eval_ag_news_accuracy": 0.315875,
      "eval_ag_news_bleu_score": 4.516791984956375,
      "eval_ag_news_bleu_score_sem": 0.14102461811447312,
      "eval_ag_news_emb_cos_sim": 0.792000412940979,
      "eval_ag_news_emb_cos_sim_sem": 0.008333077324221573,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6316728591918945,
      "eval_ag_news_n_ngrams_match_1": 13.566,
      "eval_ag_news_n_ngrams_match_2": 2.942,
      "eval_ag_news_n_ngrams_match_3": 0.768,
      "eval_ag_news_num_pred_words": 46.216,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.77595764741365,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33733370989732975,
      "eval_ag_news_runtime": 10.5509,
      "eval_ag_news_samples_per_second": 47.389,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.34093643159678505,
      "eval_ag_news_token_set_f1_sem": 0.0044661393665224765,
      "eval_ag_news_token_set_precision": 0.3231342489407503,
      "eval_ag_news_token_set_recall": 0.38076027189100853,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 91875
    },
    {
      "epoch": 17.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.11075,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9257154595032304,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12171122561972979,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6466835737228394,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009675006639028118,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.301609992980957,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.886,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.746,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.608,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.58,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.15632523691011,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.8671875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20564758629015595,
      "eval_anthropic_toxic_prompts_runtime": 9.9567,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.217,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35088469545083006,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065935221882133825,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41868826503135637,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3326883772584242,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 91875
    },
    {
      "epoch": 17.64,
      "eval_arxiv_accuracy": 0.3396875,
      "eval_arxiv_bleu_score": 4.111235558623548,
      "eval_arxiv_bleu_score_sem": 0.11936061103201295,
      "eval_arxiv_emb_cos_sim": 0.7424968481063843,
      "eval_arxiv_emb_cos_sim_sem": 0.009177570435111816,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.480323553085327,
      "eval_arxiv_n_ngrams_match_1": 14.51,
      "eval_arxiv_n_ngrams_match_2": 2.776,
      "eval_arxiv_n_ngrams_match_3": 0.588,
      "eval_arxiv_num_pred_words": 39.802,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.470226218322516,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34747550565157614,
      "eval_arxiv_runtime": 10.4863,
      "eval_arxiv_samples_per_second": 47.681,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3425256557378168,
      "eval_arxiv_token_set_f1_sem": 0.004308085624099615,
      "eval_arxiv_token_set_precision": 0.2889703238854412,
      "eval_arxiv_token_set_recall": 0.44020837430223325,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 91875
    },
    {
      "epoch": 17.64,
      "eval_python_code_alpaca_accuracy": 0.1571875,
      "eval_python_code_alpaca_bleu_score": 4.463552230485825,
      "eval_python_code_alpaca_bleu_score_sem": 0.14725582592292868,
      "eval_python_code_alpaca_emb_cos_sim": 0.738200306892395,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011336023158465564,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9708402156829834,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.576,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.778,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.918,
      "eval_python_code_alpaca_num_pred_words": 43.52,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.508303894775644,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3198678981380734,
      "eval_python_code_alpaca_runtime": 9.4479,
      "eval_python_code_alpaca_samples_per_second": 52.922,
      "eval_python_code_alpaca_steps_per_second": 0.106,
      "eval_python_code_alpaca_token_set_f1": 0.4662726139896116,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00580147080001791,
      "eval_python_code_alpaca_token_set_precision": 0.5197490051116582,
      "eval_python_code_alpaca_token_set_recall": 0.44510844452876325,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 91875
    },
    {
      "epoch": 17.64,
      "eval_wikibio_accuracy": 0.31534375,
      "eval_wikibio_bleu_score": 5.556776623621821,
      "eval_wikibio_bleu_score_sem": 0.20818131192861844,
      "eval_wikibio_emb_cos_sim": 0.7368065118789673,
      "eval_wikibio_emb_cos_sim_sem": 0.01115140942527572,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.79640531539917,
      "eval_wikibio_n_ngrams_match_1": 9.658,
      "eval_wikibio_n_ngrams_match_2": 3.196,
      "eval_wikibio_n_ngrams_match_3": 1.126,
      "eval_wikibio_num_pred_words": 35.94,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.54078629694595,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33865040548455105,
      "eval_wikibio_runtime": 10.4336,
      "eval_wikibio_samples_per_second": 47.922,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.3051305001112317,
      "eval_wikibio_token_set_f1_sem": 0.005797644941532922,
      "eval_wikibio_token_set_precision": 0.3133082042377267,
      "eval_wikibio_token_set_recall": 0.3134608068816547,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 91875
    },
    {
      "epoch": 17.64,
      "eval_nq_accuracy": 0.51834375,
      "eval_nq_bleu_score": 11.256371756261961,
      "eval_nq_bleu_score_sem": 0.46382785723270537,
      "eval_nq_emb_cos_sim": 0.8236100077629089,
      "eval_nq_emb_cos_sim_sem": 0.007252571739837942,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2458207607269287,
      "eval_nq_n_ngrams_match_1": 22.554,
      "eval_nq_n_ngrams_match_2": 8.164,
      "eval_nq_n_ngrams_match_3": 3.736,
      "eval_nq_num_pred_words": 48.836,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.448167059424438,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4352164736291856,
      "eval_nq_runtime": 10.0587,
      "eval_nq_samples_per_second": 49.708,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.4530780111559071,
      "eval_nq_token_set_f1_sem": 0.00494427752895844,
      "eval_nq_token_set_precision": 0.4080953600425592,
      "eval_nq_token_set_recall": 0.5205275883457564,
      "eval_nq_true_num_tokens": 64.0,
      "step": 91875
    },
    {
      "epoch": 17.64,
      "learning_rate": 0.001,
      "loss": 2.6256,
      "step": 91884
    },
    {
      "epoch": 17.65,
      "learning_rate": 0.001,
      "loss": 2.629,
      "step": 91896
    },
    {
      "epoch": 17.65,
      "learning_rate": 0.001,
      "loss": 2.6062,
      "step": 91908
    },
    {
      "epoch": 17.65,
      "learning_rate": 0.001,
      "loss": 2.6179,
      "step": 91920
    },
    {
      "epoch": 17.65,
      "learning_rate": 0.001,
      "loss": 2.6205,
      "step": 91932
    },
    {
      "epoch": 17.65,
      "learning_rate": 0.001,
      "loss": 2.6213,
      "step": 91944
    },
    {
      "epoch": 17.66,
      "learning_rate": 0.001,
      "loss": 2.6211,
      "step": 91956
    },
    {
      "epoch": 17.66,
      "learning_rate": 0.001,
      "loss": 2.6248,
      "step": 91968
    },
    {
      "epoch": 17.66,
      "learning_rate": 0.001,
      "loss": 2.6149,
      "step": 91980
    },
    {
      "epoch": 17.66,
      "learning_rate": 0.001,
      "loss": 2.6345,
      "step": 91992
    },
    {
      "epoch": 17.67,
      "learning_rate": 0.001,
      "loss": 2.6224,
      "step": 92004
    },
    {
      "epoch": 17.67,
      "learning_rate": 0.001,
      "loss": 2.6156,
      "step": 92016
    },
    {
      "epoch": 17.67,
      "learning_rate": 0.001,
      "loss": 2.6164,
      "step": 92028
    },
    {
      "epoch": 17.67,
      "learning_rate": 0.001,
      "loss": 2.6179,
      "step": 92040
    },
    {
      "epoch": 17.68,
      "learning_rate": 0.001,
      "loss": 2.6132,
      "step": 92052
    },
    {
      "epoch": 17.68,
      "learning_rate": 0.001,
      "loss": 2.6053,
      "step": 92064
    },
    {
      "epoch": 17.68,
      "learning_rate": 0.001,
      "loss": 2.6062,
      "step": 92076
    },
    {
      "epoch": 17.68,
      "learning_rate": 0.001,
      "loss": 2.6104,
      "step": 92088
    },
    {
      "epoch": 17.68,
      "learning_rate": 0.001,
      "loss": 2.6137,
      "step": 92100
    },
    {
      "epoch": 17.69,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 92112
    },
    {
      "epoch": 17.69,
      "learning_rate": 0.001,
      "loss": 2.6272,
      "step": 92124
    },
    {
      "epoch": 17.69,
      "learning_rate": 0.001,
      "loss": 2.6189,
      "step": 92136
    },
    {
      "epoch": 17.69,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 92148
    },
    {
      "epoch": 17.7,
      "learning_rate": 0.001,
      "loss": 2.6176,
      "step": 92160
    },
    {
      "epoch": 17.7,
      "learning_rate": 0.001,
      "loss": 2.6266,
      "step": 92172
    },
    {
      "epoch": 17.7,
      "learning_rate": 0.001,
      "loss": 2.6233,
      "step": 92184
    },
    {
      "epoch": 17.7,
      "learning_rate": 0.001,
      "loss": 2.6166,
      "step": 92196
    },
    {
      "epoch": 17.71,
      "learning_rate": 0.001,
      "loss": 2.6185,
      "step": 92208
    },
    {
      "epoch": 17.71,
      "learning_rate": 0.001,
      "loss": 2.6131,
      "step": 92220
    },
    {
      "epoch": 17.71,
      "learning_rate": 0.001,
      "loss": 2.6316,
      "step": 92232
    },
    {
      "epoch": 17.71,
      "learning_rate": 0.001,
      "loss": 2.6215,
      "step": 92244
    },
    {
      "epoch": 17.71,
      "learning_rate": 0.001,
      "loss": 2.6143,
      "step": 92256
    },
    {
      "epoch": 17.72,
      "learning_rate": 0.001,
      "loss": 2.6252,
      "step": 92268
    },
    {
      "epoch": 17.72,
      "learning_rate": 0.001,
      "loss": 2.6208,
      "step": 92280
    },
    {
      "epoch": 17.72,
      "learning_rate": 0.001,
      "loss": 2.6159,
      "step": 92292
    },
    {
      "epoch": 17.72,
      "learning_rate": 0.001,
      "loss": 2.6335,
      "step": 92304
    },
    {
      "epoch": 17.73,
      "learning_rate": 0.001,
      "loss": 2.6136,
      "step": 92316
    },
    {
      "epoch": 17.73,
      "learning_rate": 0.001,
      "loss": 2.6184,
      "step": 92328
    },
    {
      "epoch": 17.73,
      "learning_rate": 0.001,
      "loss": 2.6164,
      "step": 92340
    },
    {
      "epoch": 17.73,
      "learning_rate": 0.001,
      "loss": 2.6185,
      "step": 92352
    },
    {
      "epoch": 17.74,
      "learning_rate": 0.001,
      "loss": 2.6112,
      "step": 92364
    },
    {
      "epoch": 17.74,
      "learning_rate": 0.001,
      "loss": 2.6207,
      "step": 92376
    },
    {
      "epoch": 17.74,
      "learning_rate": 0.001,
      "loss": 2.6166,
      "step": 92388
    },
    {
      "epoch": 17.74,
      "learning_rate": 0.001,
      "loss": 2.6164,
      "step": 92400
    },
    {
      "epoch": 17.74,
      "learning_rate": 0.001,
      "loss": 2.6232,
      "step": 92412
    },
    {
      "epoch": 17.75,
      "learning_rate": 0.001,
      "loss": 2.6263,
      "step": 92424
    },
    {
      "epoch": 17.75,
      "learning_rate": 0.001,
      "loss": 2.6137,
      "step": 92436
    },
    {
      "epoch": 17.75,
      "learning_rate": 0.001,
      "loss": 2.6185,
      "step": 92448
    },
    {
      "epoch": 17.75,
      "learning_rate": 0.001,
      "loss": 2.6181,
      "step": 92460
    },
    {
      "epoch": 17.76,
      "learning_rate": 0.001,
      "loss": 2.6138,
      "step": 92472
    },
    {
      "epoch": 17.76,
      "learning_rate": 0.001,
      "loss": 2.624,
      "step": 92484
    },
    {
      "epoch": 17.76,
      "learning_rate": 0.001,
      "loss": 2.6237,
      "step": 92496
    },
    {
      "epoch": 17.76,
      "eval_ag_news_accuracy": 0.31846875,
      "eval_ag_news_bleu_score": 4.863661073142848,
      "eval_ag_news_bleu_score_sem": 0.1577217389501848,
      "eval_ag_news_emb_cos_sim": 0.7971920967102051,
      "eval_ag_news_emb_cos_sim_sem": 0.008862593206755238,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6174674034118652,
      "eval_ag_news_n_ngrams_match_1": 13.882,
      "eval_ag_news_n_ngrams_match_2": 3.134,
      "eval_ag_news_n_ngrams_match_3": 0.886,
      "eval_ag_news_num_pred_words": 46.678,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.24312646658485,
      "eval_ag_news_pred_num_tokens": 62.8203125,
      "eval_ag_news_rouge_score": 0.34383639415423217,
      "eval_ag_news_runtime": 10.1795,
      "eval_ag_news_samples_per_second": 49.118,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3475284364834532,
      "eval_ag_news_token_set_f1_sem": 0.0044591579225977266,
      "eval_ag_news_token_set_precision": 0.32829322948799167,
      "eval_ag_news_token_set_recall": 0.38575414978716244,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 92500
    },
    {
      "epoch": 17.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.11146875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.933481551177838,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11014743869809578,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6661948561668396,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009900308544643676,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3102123737335205,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.122,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.808,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.646,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.54,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.390941970856552,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2083862066969581,
      "eval_anthropic_toxic_prompts_runtime": 10.3347,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.381,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3443945950596044,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006369789903044341,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42680924062817843,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3168531000228397,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 92500
    },
    {
      "epoch": 17.76,
      "eval_arxiv_accuracy": 0.34125,
      "eval_arxiv_bleu_score": 4.274683281880005,
      "eval_arxiv_bleu_score_sem": 0.11992070575359076,
      "eval_arxiv_emb_cos_sim": 0.7586016654968262,
      "eval_arxiv_emb_cos_sim_sem": 0.007546569057259207,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.47928524017334,
      "eval_arxiv_n_ngrams_match_1": 14.936,
      "eval_arxiv_n_ngrams_match_2": 2.934,
      "eval_arxiv_n_ngrams_match_3": 0.67,
      "eval_arxiv_num_pred_words": 40.892,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.436529460101816,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35374131799567277,
      "eval_arxiv_runtime": 10.0127,
      "eval_arxiv_samples_per_second": 49.937,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.34634656403300706,
      "eval_arxiv_token_set_f1_sem": 0.004408067823719984,
      "eval_arxiv_token_set_precision": 0.2966732492359857,
      "eval_arxiv_token_set_recall": 0.4351693458902045,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 92500
    },
    {
      "epoch": 17.76,
      "eval_python_code_alpaca_accuracy": 0.159,
      "eval_python_code_alpaca_bleu_score": 4.662633093449265,
      "eval_python_code_alpaca_bleu_score_sem": 0.14992216460871804,
      "eval_python_code_alpaca_emb_cos_sim": 0.7485463619232178,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009183555946144872,
      "eval_python_code_alpaca_emb_top1_equal": 0.078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9587080478668213,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.846,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.022,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.024,
      "eval_python_code_alpaca_num_pred_words": 44.854,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.273055798304927,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32336975255755696,
      "eval_python_code_alpaca_runtime": 10.6849,
      "eval_python_code_alpaca_samples_per_second": 46.795,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.4798251319211579,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005362554241441699,
      "eval_python_code_alpaca_token_set_precision": 0.5381995679116696,
      "eval_python_code_alpaca_token_set_recall": 0.4534734870417638,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 92500
    },
    {
      "epoch": 17.76,
      "eval_wikibio_accuracy": 0.314875,
      "eval_wikibio_bleu_score": 5.802102949940852,
      "eval_wikibio_bleu_score_sem": 0.1873798564058999,
      "eval_wikibio_emb_cos_sim": 0.7352509498596191,
      "eval_wikibio_emb_cos_sim_sem": 0.010587505665624214,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8203341960906982,
      "eval_wikibio_n_ngrams_match_1": 10.366,
      "eval_wikibio_n_ngrams_match_2": 3.37,
      "eval_wikibio_n_ngrams_match_3": 1.2,
      "eval_wikibio_num_pred_words": 37.412,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.6194516159719,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3602097375353768,
      "eval_wikibio_runtime": 10.3351,
      "eval_wikibio_samples_per_second": 48.379,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3225081604573831,
      "eval_wikibio_token_set_f1_sem": 0.0050452955774064195,
      "eval_wikibio_token_set_precision": 0.3356725988627342,
      "eval_wikibio_token_set_recall": 0.32386358128472925,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 92500
    },
    {
      "epoch": 17.76,
      "eval_nq_accuracy": 0.5194375,
      "eval_nq_bleu_score": 11.440735583902782,
      "eval_nq_bleu_score_sem": 0.48206147354742257,
      "eval_nq_emb_cos_sim": 0.8215693235397339,
      "eval_nq_emb_cos_sim_sem": 0.00816039510042736,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2431929111480713,
      "eval_nq_n_ngrams_match_1": 22.926,
      "eval_nq_n_ngrams_match_2": 8.31,
      "eval_nq_n_ngrams_match_3": 3.812,
      "eval_nq_num_pred_words": 49.48,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.423371291639443,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4424263255194434,
      "eval_nq_runtime": 10.0777,
      "eval_nq_samples_per_second": 49.614,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.457561549292863,
      "eval_nq_token_set_f1_sem": 0.005089719355175184,
      "eval_nq_token_set_precision": 0.4140746048219013,
      "eval_nq_token_set_recall": 0.5210391169790486,
      "eval_nq_true_num_tokens": 64.0,
      "step": 92500
    },
    {
      "epoch": 17.76,
      "learning_rate": 0.001,
      "loss": 2.6206,
      "step": 92508
    },
    {
      "epoch": 17.76,
      "learning_rate": 0.001,
      "loss": 2.6175,
      "step": 92520
    },
    {
      "epoch": 17.77,
      "learning_rate": 0.001,
      "loss": 2.6143,
      "step": 92532
    },
    {
      "epoch": 17.77,
      "learning_rate": 0.001,
      "loss": 2.6146,
      "step": 92544
    },
    {
      "epoch": 17.77,
      "learning_rate": 0.001,
      "loss": 2.6086,
      "step": 92556
    },
    {
      "epoch": 17.77,
      "learning_rate": 0.001,
      "loss": 2.6142,
      "step": 92568
    },
    {
      "epoch": 17.78,
      "learning_rate": 0.001,
      "loss": 2.62,
      "step": 92580
    },
    {
      "epoch": 17.78,
      "learning_rate": 0.001,
      "loss": 2.612,
      "step": 92592
    },
    {
      "epoch": 17.78,
      "learning_rate": 0.001,
      "loss": 2.618,
      "step": 92604
    },
    {
      "epoch": 17.78,
      "learning_rate": 0.001,
      "loss": 2.6164,
      "step": 92616
    },
    {
      "epoch": 17.79,
      "learning_rate": 0.001,
      "loss": 2.625,
      "step": 92628
    },
    {
      "epoch": 17.79,
      "learning_rate": 0.001,
      "loss": 2.6163,
      "step": 92640
    },
    {
      "epoch": 17.79,
      "learning_rate": 0.001,
      "loss": 2.6151,
      "step": 92652
    },
    {
      "epoch": 17.79,
      "learning_rate": 0.001,
      "loss": 2.6236,
      "step": 92664
    },
    {
      "epoch": 17.79,
      "learning_rate": 0.001,
      "loss": 2.6201,
      "step": 92676
    },
    {
      "epoch": 17.8,
      "learning_rate": 0.001,
      "loss": 2.6246,
      "step": 92688
    },
    {
      "epoch": 17.8,
      "learning_rate": 0.001,
      "loss": 2.6206,
      "step": 92700
    },
    {
      "epoch": 17.8,
      "learning_rate": 0.001,
      "loss": 2.6025,
      "step": 92712
    },
    {
      "epoch": 17.8,
      "learning_rate": 0.001,
      "loss": 2.617,
      "step": 92724
    },
    {
      "epoch": 17.81,
      "learning_rate": 0.001,
      "loss": 2.62,
      "step": 92736
    },
    {
      "epoch": 17.81,
      "learning_rate": 0.001,
      "loss": 2.618,
      "step": 92748
    },
    {
      "epoch": 17.81,
      "learning_rate": 0.001,
      "loss": 2.617,
      "step": 92760
    },
    {
      "epoch": 17.81,
      "learning_rate": 0.001,
      "loss": 2.6165,
      "step": 92772
    },
    {
      "epoch": 17.82,
      "learning_rate": 0.001,
      "loss": 2.6093,
      "step": 92784
    },
    {
      "epoch": 17.82,
      "learning_rate": 0.001,
      "loss": 2.6139,
      "step": 92796
    },
    {
      "epoch": 17.82,
      "learning_rate": 0.001,
      "loss": 2.607,
      "step": 92808
    },
    {
      "epoch": 17.82,
      "learning_rate": 0.001,
      "loss": 2.6267,
      "step": 92820
    },
    {
      "epoch": 17.82,
      "learning_rate": 0.001,
      "loss": 2.6203,
      "step": 92832
    },
    {
      "epoch": 17.83,
      "learning_rate": 0.001,
      "loss": 2.618,
      "step": 92844
    },
    {
      "epoch": 17.83,
      "learning_rate": 0.001,
      "loss": 2.6199,
      "step": 92856
    },
    {
      "epoch": 17.83,
      "learning_rate": 0.001,
      "loss": 2.6177,
      "step": 92868
    },
    {
      "epoch": 17.83,
      "learning_rate": 0.001,
      "loss": 2.6215,
      "step": 92880
    },
    {
      "epoch": 17.84,
      "learning_rate": 0.001,
      "loss": 2.6193,
      "step": 92892
    },
    {
      "epoch": 17.84,
      "learning_rate": 0.001,
      "loss": 2.627,
      "step": 92904
    },
    {
      "epoch": 17.84,
      "learning_rate": 0.001,
      "loss": 2.6171,
      "step": 92916
    },
    {
      "epoch": 17.84,
      "learning_rate": 0.001,
      "loss": 2.6184,
      "step": 92928
    },
    {
      "epoch": 17.85,
      "learning_rate": 0.001,
      "loss": 2.6226,
      "step": 92940
    },
    {
      "epoch": 17.85,
      "learning_rate": 0.001,
      "loss": 2.6156,
      "step": 92952
    },
    {
      "epoch": 17.85,
      "learning_rate": 0.001,
      "loss": 2.6134,
      "step": 92964
    },
    {
      "epoch": 17.85,
      "learning_rate": 0.001,
      "loss": 2.6086,
      "step": 92976
    },
    {
      "epoch": 17.85,
      "learning_rate": 0.001,
      "loss": 2.6049,
      "step": 92988
    },
    {
      "epoch": 17.86,
      "learning_rate": 0.001,
      "loss": 2.6202,
      "step": 93000
    },
    {
      "epoch": 17.86,
      "learning_rate": 0.001,
      "loss": 2.6233,
      "step": 93012
    },
    {
      "epoch": 17.86,
      "learning_rate": 0.001,
      "loss": 2.6227,
      "step": 93024
    },
    {
      "epoch": 17.86,
      "learning_rate": 0.001,
      "loss": 2.6335,
      "step": 93036
    },
    {
      "epoch": 17.87,
      "learning_rate": 0.001,
      "loss": 2.6117,
      "step": 93048
    },
    {
      "epoch": 17.87,
      "learning_rate": 0.001,
      "loss": 2.6058,
      "step": 93060
    },
    {
      "epoch": 17.87,
      "learning_rate": 0.001,
      "loss": 2.6109,
      "step": 93072
    },
    {
      "epoch": 17.87,
      "learning_rate": 0.001,
      "loss": 2.615,
      "step": 93084
    },
    {
      "epoch": 17.88,
      "learning_rate": 0.001,
      "loss": 2.614,
      "step": 93096
    },
    {
      "epoch": 17.88,
      "learning_rate": 0.001,
      "loss": 2.6219,
      "step": 93108
    },
    {
      "epoch": 17.88,
      "learning_rate": 0.001,
      "loss": 2.6199,
      "step": 93120
    },
    {
      "epoch": 17.88,
      "eval_ag_news_accuracy": 0.31634375,
      "eval_ag_news_bleu_score": 4.837314789298008,
      "eval_ag_news_bleu_score_sem": 0.158196908235372,
      "eval_ag_news_emb_cos_sim": 0.7934457063674927,
      "eval_ag_news_emb_cos_sim_sem": 0.008294545226514996,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.62282133102417,
      "eval_ag_news_n_ngrams_match_1": 13.692,
      "eval_ag_news_n_ngrams_match_2": 3.03,
      "eval_ag_news_n_ngrams_match_3": 0.91,
      "eval_ag_news_num_pred_words": 46.386,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.44305820218369,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34048091626799803,
      "eval_ag_news_runtime": 11.2359,
      "eval_ag_news_samples_per_second": 44.5,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.3415275626137268,
      "eval_ag_news_token_set_f1_sem": 0.004491170834461802,
      "eval_ag_news_token_set_precision": 0.32531268967090105,
      "eval_ag_news_token_set_recall": 0.37416549819451767,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 93125
    },
    {
      "epoch": 17.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.111375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9985999175076103,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11096133851322557,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.670669436454773,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009866299989162431,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2762463092803955,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.208,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.826,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.662,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.19,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.476202463374108,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21131656501208573,
      "eval_anthropic_toxic_prompts_runtime": 14.6574,
      "eval_anthropic_toxic_prompts_samples_per_second": 34.112,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.068,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3548089564822094,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00666789748832613,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43700918760625473,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3255814772326906,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 93125
    },
    {
      "epoch": 17.88,
      "eval_arxiv_accuracy": 0.3411875,
      "eval_arxiv_bleu_score": 4.19622171537826,
      "eval_arxiv_bleu_score_sem": 0.12138220514264243,
      "eval_arxiv_emb_cos_sim": 0.7518219947814941,
      "eval_arxiv_emb_cos_sim_sem": 0.008517964353382229,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4892160892486572,
      "eval_arxiv_n_ngrams_match_1": 14.716,
      "eval_arxiv_n_ngrams_match_2": 2.828,
      "eval_arxiv_n_ngrams_match_3": 0.64,
      "eval_arxiv_num_pred_words": 41.174,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.76025652044602,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34704146581365936,
      "eval_arxiv_runtime": 10.5427,
      "eval_arxiv_samples_per_second": 47.426,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3422334342223793,
      "eval_arxiv_token_set_f1_sem": 0.004257148718547928,
      "eval_arxiv_token_set_precision": 0.2932820736842361,
      "eval_arxiv_token_set_recall": 0.4266267073825341,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 93125
    },
    {
      "epoch": 17.88,
      "eval_python_code_alpaca_accuracy": 0.15846875,
      "eval_python_code_alpaca_bleu_score": 4.301077048283588,
      "eval_python_code_alpaca_bleu_score_sem": 0.14803714003648313,
      "eval_python_code_alpaca_emb_cos_sim": 0.732406497001648,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010670660080647192,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.952526569366455,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.51,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.684,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.83,
      "eval_python_code_alpaca_num_pred_words": 43.302,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.154287278960982,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31905578682799896,
      "eval_python_code_alpaca_runtime": 9.8969,
      "eval_python_code_alpaca_samples_per_second": 50.521,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.45926936281659464,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005632772764396882,
      "eval_python_code_alpaca_token_set_precision": 0.5169281572811251,
      "eval_python_code_alpaca_token_set_recall": 0.43414018076482214,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 93125
    },
    {
      "epoch": 17.88,
      "eval_wikibio_accuracy": 0.31653125,
      "eval_wikibio_bleu_score": 5.873538809761199,
      "eval_wikibio_bleu_score_sem": 0.19959044635202375,
      "eval_wikibio_emb_cos_sim": 0.7330521941184998,
      "eval_wikibio_emb_cos_sim_sem": 0.009765970733371264,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8268773555755615,
      "eval_wikibio_n_ngrams_match_1": 10.268,
      "eval_wikibio_n_ngrams_match_2": 3.394,
      "eval_wikibio_n_ngrams_match_3": 1.24,
      "eval_wikibio_num_pred_words": 37.502,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.918925648238435,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35415308537137075,
      "eval_wikibio_runtime": 10.4693,
      "eval_wikibio_samples_per_second": 47.759,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.3200020451912247,
      "eval_wikibio_token_set_f1_sem": 0.005271703323204975,
      "eval_wikibio_token_set_precision": 0.33303423938450916,
      "eval_wikibio_token_set_recall": 0.3221129450327614,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 93125
    },
    {
      "epoch": 17.88,
      "eval_nq_accuracy": 0.52028125,
      "eval_nq_bleu_score": 11.282592584182439,
      "eval_nq_bleu_score_sem": 0.4686162741904307,
      "eval_nq_emb_cos_sim": 0.8212836980819702,
      "eval_nq_emb_cos_sim_sem": 0.007685375751562789,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2405052185058594,
      "eval_nq_n_ngrams_match_1": 22.62,
      "eval_nq_n_ngrams_match_2": 8.214,
      "eval_nq_n_ngrams_match_3": 3.722,
      "eval_nq_num_pred_words": 49.15,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.39807817124667,
      "eval_nq_pred_num_tokens": 62.9453125,
      "eval_nq_rouge_score": 0.4364852563185425,
      "eval_nq_runtime": 10.808,
      "eval_nq_samples_per_second": 46.262,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.4522558518762581,
      "eval_nq_token_set_f1_sem": 0.004922945423360847,
      "eval_nq_token_set_precision": 0.40761799053898984,
      "eval_nq_token_set_recall": 0.5183607239670648,
      "eval_nq_true_num_tokens": 64.0,
      "step": 93125
    },
    {
      "epoch": 17.88,
      "learning_rate": 0.001,
      "loss": 2.6237,
      "step": 93132
    },
    {
      "epoch": 17.88,
      "learning_rate": 0.001,
      "loss": 2.6273,
      "step": 93144
    },
    {
      "epoch": 17.89,
      "learning_rate": 0.001,
      "loss": 2.6215,
      "step": 93156
    },
    {
      "epoch": 17.89,
      "learning_rate": 0.001,
      "loss": 2.6119,
      "step": 93168
    },
    {
      "epoch": 17.89,
      "learning_rate": 0.001,
      "loss": 2.6199,
      "step": 93180
    },
    {
      "epoch": 17.89,
      "learning_rate": 0.001,
      "loss": 2.6234,
      "step": 93192
    },
    {
      "epoch": 17.9,
      "learning_rate": 0.001,
      "loss": 2.6257,
      "step": 93204
    },
    {
      "epoch": 17.9,
      "learning_rate": 0.001,
      "loss": 2.6225,
      "step": 93216
    },
    {
      "epoch": 17.9,
      "learning_rate": 0.001,
      "loss": 2.6186,
      "step": 93228
    },
    {
      "epoch": 17.9,
      "learning_rate": 0.001,
      "loss": 2.6217,
      "step": 93240
    },
    {
      "epoch": 17.91,
      "learning_rate": 0.001,
      "loss": 2.6167,
      "step": 93252
    },
    {
      "epoch": 17.91,
      "learning_rate": 0.001,
      "loss": 2.6303,
      "step": 93264
    },
    {
      "epoch": 17.91,
      "learning_rate": 0.001,
      "loss": 2.6195,
      "step": 93276
    },
    {
      "epoch": 17.91,
      "learning_rate": 0.001,
      "loss": 2.6265,
      "step": 93288
    },
    {
      "epoch": 17.91,
      "learning_rate": 0.001,
      "loss": 2.6109,
      "step": 93300
    },
    {
      "epoch": 17.92,
      "learning_rate": 0.001,
      "loss": 2.6246,
      "step": 93312
    },
    {
      "epoch": 17.92,
      "learning_rate": 0.001,
      "loss": 2.629,
      "step": 93324
    },
    {
      "epoch": 17.92,
      "learning_rate": 0.001,
      "loss": 2.61,
      "step": 93336
    },
    {
      "epoch": 17.92,
      "learning_rate": 0.001,
      "loss": 2.6157,
      "step": 93348
    },
    {
      "epoch": 17.93,
      "learning_rate": 0.001,
      "loss": 2.6135,
      "step": 93360
    },
    {
      "epoch": 17.93,
      "learning_rate": 0.001,
      "loss": 2.6127,
      "step": 93372
    },
    {
      "epoch": 17.93,
      "learning_rate": 0.001,
      "loss": 2.6211,
      "step": 93384
    },
    {
      "epoch": 17.93,
      "learning_rate": 0.001,
      "loss": 2.629,
      "step": 93396
    },
    {
      "epoch": 17.94,
      "learning_rate": 0.001,
      "loss": 2.6163,
      "step": 93408
    },
    {
      "epoch": 17.94,
      "learning_rate": 0.001,
      "loss": 2.6233,
      "step": 93420
    },
    {
      "epoch": 17.94,
      "learning_rate": 0.001,
      "loss": 2.6155,
      "step": 93432
    },
    {
      "epoch": 17.94,
      "learning_rate": 0.001,
      "loss": 2.6162,
      "step": 93444
    },
    {
      "epoch": 17.94,
      "learning_rate": 0.001,
      "loss": 2.6221,
      "step": 93456
    },
    {
      "epoch": 17.95,
      "learning_rate": 0.001,
      "loss": 2.6153,
      "step": 93468
    },
    {
      "epoch": 17.95,
      "learning_rate": 0.001,
      "loss": 2.6181,
      "step": 93480
    },
    {
      "epoch": 17.95,
      "learning_rate": 0.001,
      "loss": 2.6173,
      "step": 93492
    },
    {
      "epoch": 17.95,
      "learning_rate": 0.001,
      "loss": 2.6198,
      "step": 93504
    },
    {
      "epoch": 17.96,
      "learning_rate": 0.001,
      "loss": 2.6162,
      "step": 93516
    },
    {
      "epoch": 17.96,
      "learning_rate": 0.001,
      "loss": 2.6087,
      "step": 93528
    },
    {
      "epoch": 17.96,
      "learning_rate": 0.001,
      "loss": 2.6197,
      "step": 93540
    },
    {
      "epoch": 17.96,
      "learning_rate": 0.001,
      "loss": 2.6284,
      "step": 93552
    },
    {
      "epoch": 17.97,
      "learning_rate": 0.001,
      "loss": 2.6104,
      "step": 93564
    },
    {
      "epoch": 17.97,
      "learning_rate": 0.001,
      "loss": 2.6175,
      "step": 93576
    },
    {
      "epoch": 17.97,
      "learning_rate": 0.001,
      "loss": 2.6229,
      "step": 93588
    },
    {
      "epoch": 17.97,
      "learning_rate": 0.001,
      "loss": 2.619,
      "step": 93600
    },
    {
      "epoch": 17.97,
      "learning_rate": 0.001,
      "loss": 2.6118,
      "step": 93612
    },
    {
      "epoch": 17.98,
      "learning_rate": 0.001,
      "loss": 2.6205,
      "step": 93624
    },
    {
      "epoch": 17.98,
      "learning_rate": 0.001,
      "loss": 2.6155,
      "step": 93636
    },
    {
      "epoch": 17.98,
      "learning_rate": 0.001,
      "loss": 2.6202,
      "step": 93648
    },
    {
      "epoch": 17.98,
      "learning_rate": 0.001,
      "loss": 2.6176,
      "step": 93660
    },
    {
      "epoch": 17.99,
      "learning_rate": 0.001,
      "loss": 2.6097,
      "step": 93672
    },
    {
      "epoch": 17.99,
      "learning_rate": 0.001,
      "loss": 2.6208,
      "step": 93684
    },
    {
      "epoch": 17.99,
      "learning_rate": 0.001,
      "loss": 2.6108,
      "step": 93696
    },
    {
      "epoch": 17.99,
      "learning_rate": 0.001,
      "loss": 2.6232,
      "step": 93708
    },
    {
      "epoch": 18.0,
      "learning_rate": 0.001,
      "loss": 2.6141,
      "step": 93720
    },
    {
      "epoch": 18.0,
      "learning_rate": 0.001,
      "loss": 2.6085,
      "step": 93732
    },
    {
      "epoch": 18.0,
      "learning_rate": 0.001,
      "loss": 2.6281,
      "step": 93744
    },
    {
      "epoch": 18.0,
      "eval_ag_news_accuracy": 0.3144375,
      "eval_ag_news_bleu_score": 4.574494344549086,
      "eval_ag_news_bleu_score_sem": 0.1435212392138452,
      "eval_ag_news_emb_cos_sim": 0.793586790561676,
      "eval_ag_news_emb_cos_sim_sem": 0.007458764000920855,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6420984268188477,
      "eval_ag_news_n_ngrams_match_1": 13.376,
      "eval_ag_news_n_ngrams_match_2": 2.882,
      "eval_ag_news_n_ngrams_match_3": 0.798,
      "eval_ag_news_num_pred_words": 46.32,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 38.17185358256012,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33207449811473866,
      "eval_ag_news_runtime": 11.2693,
      "eval_ag_news_samples_per_second": 44.368,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.3363561703668599,
      "eval_ag_news_token_set_f1_sem": 0.004340804453614188,
      "eval_ag_news_token_set_precision": 0.3172241798990339,
      "eval_ag_news_token_set_recall": 0.37277672892563407,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 93750
    },
    {
      "epoch": 18.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.1110625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.103489918119735,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11824354450817112,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6579990386962891,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009372159800508583,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2885220050811768,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.088,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.894,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.064,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.803219344071017,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21089693352220318,
      "eval_anthropic_toxic_prompts_runtime": 10.5303,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.482,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3536362828815651,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006643735997345817,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4300869633408589,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3266644538346673,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 93750
    },
    {
      "epoch": 18.0,
      "eval_arxiv_accuracy": 0.3426875,
      "eval_arxiv_bleu_score": 4.077023537311002,
      "eval_arxiv_bleu_score_sem": 0.10955470998965908,
      "eval_arxiv_emb_cos_sim": 0.7496046423912048,
      "eval_arxiv_emb_cos_sim_sem": 0.00712119540321793,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4744839668273926,
      "eval_arxiv_n_ngrams_match_1": 14.696,
      "eval_arxiv_n_ngrams_match_2": 2.752,
      "eval_arxiv_n_ngrams_match_3": 0.566,
      "eval_arxiv_num_pred_words": 40.868,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.281166085241175,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34951689833591937,
      "eval_arxiv_runtime": 11.7101,
      "eval_arxiv_samples_per_second": 42.698,
      "eval_arxiv_steps_per_second": 0.085,
      "eval_arxiv_token_set_f1": 0.34436050614848424,
      "eval_arxiv_token_set_f1_sem": 0.004123611640607539,
      "eval_arxiv_token_set_precision": 0.2943533131553631,
      "eval_arxiv_token_set_recall": 0.4335014152226167,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 93750
    },
    {
      "epoch": 18.0,
      "eval_python_code_alpaca_accuracy": 0.15496875,
      "eval_python_code_alpaca_bleu_score": 4.313322415328112,
      "eval_python_code_alpaca_bleu_score_sem": 0.13878868386029222,
      "eval_python_code_alpaca_emb_cos_sim": 0.7393602132797241,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010233132329166585,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.95182466506958,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.572,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.678,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.858,
      "eval_python_code_alpaca_num_pred_words": 43.4,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.140847519680477,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32137747886032575,
      "eval_python_code_alpaca_runtime": 10.6162,
      "eval_python_code_alpaca_samples_per_second": 47.098,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.4648644059294784,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005675868936735658,
      "eval_python_code_alpaca_token_set_precision": 0.5258356326515998,
      "eval_python_code_alpaca_token_set_recall": 0.4371973371560503,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 93750
    },
    {
      "epoch": 18.0,
      "eval_wikibio_accuracy": 0.317875,
      "eval_wikibio_bleu_score": 5.756219311939541,
      "eval_wikibio_bleu_score_sem": 0.20848427871141167,
      "eval_wikibio_emb_cos_sim": 0.7319413423538208,
      "eval_wikibio_emb_cos_sim_sem": 0.009697173905711325,
      "eval_wikibio_emb_top1_equal": 0.1328125,
      "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7880618572235107,
      "eval_wikibio_n_ngrams_match_1": 9.79,
      "eval_wikibio_n_ngrams_match_2": 3.26,
      "eval_wikibio_n_ngrams_match_3": 1.194,
      "eval_wikibio_num_pred_words": 36.05,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.17070812211405,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3384459607199994,
      "eval_wikibio_runtime": 11.2505,
      "eval_wikibio_samples_per_second": 44.443,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.3108921579174648,
      "eval_wikibio_token_set_f1_sem": 0.0056522056164034756,
      "eval_wikibio_token_set_precision": 0.3179812734568046,
      "eval_wikibio_token_set_recall": 0.3198856093096847,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 93750
    },
    {
      "epoch": 18.0,
      "eval_nq_accuracy": 0.51978125,
      "eval_nq_bleu_score": 11.54615205377183,
      "eval_nq_bleu_score_sem": 0.4849643492232392,
      "eval_nq_emb_cos_sim": 0.8235622048377991,
      "eval_nq_emb_cos_sim_sem": 0.008237769470303774,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.240114212036133,
      "eval_nq_n_ngrams_match_1": 22.596,
      "eval_nq_n_ngrams_match_2": 8.252,
      "eval_nq_n_ngrams_match_3": 3.846,
      "eval_nq_num_pred_words": 48.882,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.394404180202658,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4356193735828876,
      "eval_nq_runtime": 11.7636,
      "eval_nq_samples_per_second": 42.504,
      "eval_nq_steps_per_second": 0.085,
      "eval_nq_token_set_f1": 0.4528296441176887,
      "eval_nq_token_set_f1_sem": 0.004923525097325228,
      "eval_nq_token_set_precision": 0.4080762323221383,
      "eval_nq_token_set_recall": 0.5193516682844476,
      "eval_nq_true_num_tokens": 64.0,
      "step": 93750
    },
    {
      "epoch": 18.0,
      "learning_rate": 0.001,
      "loss": 2.6004,
      "step": 93756
    },
    {
      "epoch": 18.0,
      "learning_rate": 0.001,
      "loss": 2.5943,
      "step": 93768
    },
    {
      "epoch": 18.01,
      "learning_rate": 0.001,
      "loss": 2.6037,
      "step": 93780
    },
    {
      "epoch": 18.01,
      "learning_rate": 0.001,
      "loss": 2.6072,
      "step": 93792
    },
    {
      "epoch": 18.01,
      "learning_rate": 0.001,
      "loss": 2.5967,
      "step": 93804
    },
    {
      "epoch": 18.01,
      "learning_rate": 0.001,
      "loss": 2.5966,
      "step": 93816
    },
    {
      "epoch": 18.02,
      "learning_rate": 0.001,
      "loss": 2.5948,
      "step": 93828
    },
    {
      "epoch": 18.02,
      "learning_rate": 0.001,
      "loss": 2.5989,
      "step": 93840
    },
    {
      "epoch": 18.02,
      "learning_rate": 0.001,
      "loss": 2.5943,
      "step": 93852
    },
    {
      "epoch": 18.02,
      "learning_rate": 0.001,
      "loss": 2.5957,
      "step": 93864
    },
    {
      "epoch": 18.03,
      "learning_rate": 0.001,
      "loss": 2.6068,
      "step": 93876
    },
    {
      "epoch": 18.03,
      "learning_rate": 0.001,
      "loss": 2.6083,
      "step": 93888
    },
    {
      "epoch": 18.03,
      "learning_rate": 0.001,
      "loss": 2.6072,
      "step": 93900
    },
    {
      "epoch": 18.03,
      "learning_rate": 0.001,
      "loss": 2.6034,
      "step": 93912
    },
    {
      "epoch": 18.03,
      "learning_rate": 0.001,
      "loss": 2.6041,
      "step": 93924
    },
    {
      "epoch": 18.04,
      "learning_rate": 0.001,
      "loss": 2.6046,
      "step": 93936
    },
    {
      "epoch": 18.04,
      "learning_rate": 0.001,
      "loss": 2.5969,
      "step": 93948
    },
    {
      "epoch": 18.04,
      "learning_rate": 0.001,
      "loss": 2.6026,
      "step": 93960
    },
    {
      "epoch": 18.04,
      "learning_rate": 0.001,
      "loss": 2.6032,
      "step": 93972
    },
    {
      "epoch": 18.05,
      "learning_rate": 0.001,
      "loss": 2.6057,
      "step": 93984
    },
    {
      "epoch": 18.05,
      "learning_rate": 0.001,
      "loss": 2.5933,
      "step": 93996
    },
    {
      "epoch": 18.05,
      "learning_rate": 0.001,
      "loss": 2.6049,
      "step": 94008
    },
    {
      "epoch": 18.05,
      "learning_rate": 0.001,
      "loss": 2.6041,
      "step": 94020
    },
    {
      "epoch": 18.06,
      "learning_rate": 0.001,
      "loss": 2.5942,
      "step": 94032
    },
    {
      "epoch": 18.06,
      "learning_rate": 0.001,
      "loss": 2.5981,
      "step": 94044
    },
    {
      "epoch": 18.06,
      "learning_rate": 0.001,
      "loss": 2.5913,
      "step": 94056
    },
    {
      "epoch": 18.06,
      "learning_rate": 0.001,
      "loss": 2.6033,
      "step": 94068
    },
    {
      "epoch": 18.06,
      "learning_rate": 0.001,
      "loss": 2.6011,
      "step": 94080
    },
    {
      "epoch": 18.07,
      "learning_rate": 0.001,
      "loss": 2.6016,
      "step": 94092
    },
    {
      "epoch": 18.07,
      "learning_rate": 0.001,
      "loss": 2.602,
      "step": 94104
    },
    {
      "epoch": 18.07,
      "learning_rate": 0.001,
      "loss": 2.6126,
      "step": 94116
    },
    {
      "epoch": 18.07,
      "learning_rate": 0.001,
      "loss": 2.6116,
      "step": 94128
    },
    {
      "epoch": 18.08,
      "learning_rate": 0.001,
      "loss": 2.6027,
      "step": 94140
    },
    {
      "epoch": 18.08,
      "learning_rate": 0.001,
      "loss": 2.6098,
      "step": 94152
    },
    {
      "epoch": 18.08,
      "learning_rate": 0.001,
      "loss": 2.5967,
      "step": 94164
    },
    {
      "epoch": 18.08,
      "learning_rate": 0.001,
      "loss": 2.6029,
      "step": 94176
    },
    {
      "epoch": 18.09,
      "learning_rate": 0.001,
      "loss": 2.5989,
      "step": 94188
    },
    {
      "epoch": 18.09,
      "learning_rate": 0.001,
      "loss": 2.5977,
      "step": 94200
    },
    {
      "epoch": 18.09,
      "learning_rate": 0.001,
      "loss": 2.6058,
      "step": 94212
    },
    {
      "epoch": 18.09,
      "learning_rate": 0.001,
      "loss": 2.6043,
      "step": 94224
    },
    {
      "epoch": 18.09,
      "learning_rate": 0.001,
      "loss": 2.6086,
      "step": 94236
    },
    {
      "epoch": 18.1,
      "learning_rate": 0.001,
      "loss": 2.6047,
      "step": 94248
    },
    {
      "epoch": 18.1,
      "learning_rate": 0.001,
      "loss": 2.612,
      "step": 94260
    },
    {
      "epoch": 18.1,
      "learning_rate": 0.001,
      "loss": 2.6017,
      "step": 94272
    },
    {
      "epoch": 18.1,
      "learning_rate": 0.001,
      "loss": 2.6039,
      "step": 94284
    },
    {
      "epoch": 18.11,
      "learning_rate": 0.001,
      "loss": 2.6159,
      "step": 94296
    },
    {
      "epoch": 18.11,
      "learning_rate": 0.001,
      "loss": 2.5975,
      "step": 94308
    },
    {
      "epoch": 18.11,
      "learning_rate": 0.001,
      "loss": 2.607,
      "step": 94320
    },
    {
      "epoch": 18.11,
      "learning_rate": 0.001,
      "loss": 2.6046,
      "step": 94332
    },
    {
      "epoch": 18.12,
      "learning_rate": 0.001,
      "loss": 2.6034,
      "step": 94344
    },
    {
      "epoch": 18.12,
      "learning_rate": 0.001,
      "loss": 2.6036,
      "step": 94356
    },
    {
      "epoch": 18.12,
      "learning_rate": 0.001,
      "loss": 2.6064,
      "step": 94368
    },
    {
      "epoch": 18.12,
      "eval_ag_news_accuracy": 0.3145,
      "eval_ag_news_bleu_score": 4.8065221613972495,
      "eval_ag_news_bleu_score_sem": 0.15450619368618343,
      "eval_ag_news_emb_cos_sim": 0.7999658584594727,
      "eval_ag_news_emb_cos_sim_sem": 0.007387206554683453,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.629249334335327,
      "eval_ag_news_n_ngrams_match_1": 13.714,
      "eval_ag_news_n_ngrams_match_2": 3.122,
      "eval_ag_news_n_ngrams_match_3": 0.9,
      "eval_ag_news_num_pred_words": 46.654,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.684517523536975,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33747093125143024,
      "eval_ag_news_runtime": 11.3175,
      "eval_ag_news_samples_per_second": 44.179,
      "eval_ag_news_steps_per_second": 0.088,
      "eval_ag_news_token_set_f1": 0.34560309648479715,
      "eval_ag_news_token_set_f1_sem": 0.004468138765622358,
      "eval_ag_news_token_set_precision": 0.32737728981366854,
      "eval_ag_news_token_set_recall": 0.38181135263852184,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 94375
    },
    {
      "epoch": 18.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.11028125,
      "eval_anthropic_toxic_prompts_bleu_score": 2.975410458576226,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11331345214952575,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6517369151115417,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010762586967061009,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.313065767288208,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.89,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.784,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.63,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.876,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.469210720801932,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20248132834157756,
      "eval_anthropic_toxic_prompts_runtime": 10.8508,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.079,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.092,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3423611679932183,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006497291578651429,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41437085722812783,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3227532348392972,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 94375
    },
    {
      "epoch": 18.12,
      "eval_arxiv_accuracy": 0.34103125,
      "eval_arxiv_bleu_score": 4.138464325490081,
      "eval_arxiv_bleu_score_sem": 0.12211145482656759,
      "eval_arxiv_emb_cos_sim": 0.7511368989944458,
      "eval_arxiv_emb_cos_sim_sem": 0.008781480938251514,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4760892391204834,
      "eval_arxiv_n_ngrams_match_1": 14.762,
      "eval_arxiv_n_ngrams_match_2": 2.748,
      "eval_arxiv_n_ngrams_match_3": 0.608,
      "eval_arxiv_num_pred_words": 40.776,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.33302776166574,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35050254989762897,
      "eval_arxiv_runtime": 11.1117,
      "eval_arxiv_samples_per_second": 44.998,
      "eval_arxiv_steps_per_second": 0.09,
      "eval_arxiv_token_set_f1": 0.34647991406169265,
      "eval_arxiv_token_set_f1_sem": 0.004276592512614056,
      "eval_arxiv_token_set_precision": 0.29635678169109236,
      "eval_arxiv_token_set_recall": 0.43613273544248893,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 94375
    },
    {
      "epoch": 18.12,
      "eval_python_code_alpaca_accuracy": 0.1585625,
      "eval_python_code_alpaca_bleu_score": 4.331375591198053,
      "eval_python_code_alpaca_bleu_score_sem": 0.13471041632555336,
      "eval_python_code_alpaca_emb_cos_sim": 0.7365143299102783,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010550341364860718,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9513700008392334,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.258,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.708,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.874,
      "eval_python_code_alpaca_num_pred_words": 42.838,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.132146839068767,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3116014285408749,
      "eval_python_code_alpaca_runtime": 10.5574,
      "eval_python_code_alpaca_samples_per_second": 47.36,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.4536628685738009,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005900277468102476,
      "eval_python_code_alpaca_token_set_precision": 0.5088881956056012,
      "eval_python_code_alpaca_token_set_recall": 0.4367756972270833,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 94375
    },
    {
      "epoch": 18.12,
      "eval_wikibio_accuracy": 0.3124375,
      "eval_wikibio_bleu_score": 5.463337977675738,
      "eval_wikibio_bleu_score_sem": 0.19585233495029952,
      "eval_wikibio_emb_cos_sim": 0.7196528911590576,
      "eval_wikibio_emb_cos_sim_sem": 0.009854681288408093,
      "eval_wikibio_emb_top1_equal": 0.1171875,
      "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8302905559539795,
      "eval_wikibio_n_ngrams_match_1": 9.738,
      "eval_wikibio_n_ngrams_match_2": 3.178,
      "eval_wikibio_n_ngrams_match_3": 1.102,
      "eval_wikibio_num_pred_words": 36.014,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.07592392350552,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3391892565673838,
      "eval_wikibio_runtime": 10.9082,
      "eval_wikibio_samples_per_second": 45.837,
      "eval_wikibio_steps_per_second": 0.092,
      "eval_wikibio_token_set_f1": 0.3073926674740038,
      "eval_wikibio_token_set_f1_sem": 0.005887596659974254,
      "eval_wikibio_token_set_precision": 0.3140139764942013,
      "eval_wikibio_token_set_recall": 0.31815646353670773,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 94375
    },
    {
      "epoch": 18.12,
      "eval_nq_accuracy": 0.51971875,
      "eval_nq_bleu_score": 11.383309887274182,
      "eval_nq_bleu_score_sem": 0.48382154270143746,
      "eval_nq_emb_cos_sim": 0.8248996734619141,
      "eval_nq_emb_cos_sim_sem": 0.007240632986452724,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.243713617324829,
      "eval_nq_n_ngrams_match_1": 22.716,
      "eval_nq_n_ngrams_match_2": 8.26,
      "eval_nq_n_ngrams_match_3": 3.8,
      "eval_nq_num_pred_words": 49.358,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.428279377001168,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43690430494291854,
      "eval_nq_runtime": 11.5556,
      "eval_nq_samples_per_second": 43.269,
      "eval_nq_steps_per_second": 0.087,
      "eval_nq_token_set_f1": 0.45260147590760574,
      "eval_nq_token_set_f1_sem": 0.005032885431939979,
      "eval_nq_token_set_precision": 0.40921770882235925,
      "eval_nq_token_set_recall": 0.5149665276817206,
      "eval_nq_true_num_tokens": 64.0,
      "step": 94375
    },
    {
      "epoch": 18.12,
      "learning_rate": 0.001,
      "loss": 2.6184,
      "step": 94380
    },
    {
      "epoch": 18.12,
      "learning_rate": 0.001,
      "loss": 2.6144,
      "step": 94392
    },
    {
      "epoch": 18.13,
      "learning_rate": 0.001,
      "loss": 2.6055,
      "step": 94404
    },
    {
      "epoch": 18.13,
      "learning_rate": 0.001,
      "loss": 2.6063,
      "step": 94416
    },
    {
      "epoch": 18.13,
      "learning_rate": 0.001,
      "loss": 2.6035,
      "step": 94428
    },
    {
      "epoch": 18.13,
      "learning_rate": 0.001,
      "loss": 2.6109,
      "step": 94440
    },
    {
      "epoch": 18.14,
      "learning_rate": 0.001,
      "loss": 2.6147,
      "step": 94452
    },
    {
      "epoch": 18.14,
      "learning_rate": 0.001,
      "loss": 2.6091,
      "step": 94464
    },
    {
      "epoch": 18.14,
      "learning_rate": 0.001,
      "loss": 2.6069,
      "step": 94476
    },
    {
      "epoch": 18.14,
      "learning_rate": 0.001,
      "loss": 2.6091,
      "step": 94488
    },
    {
      "epoch": 18.15,
      "learning_rate": 0.001,
      "loss": 2.6069,
      "step": 94500
    },
    {
      "epoch": 18.15,
      "learning_rate": 0.001,
      "loss": 2.5967,
      "step": 94512
    },
    {
      "epoch": 18.15,
      "learning_rate": 0.001,
      "loss": 2.5919,
      "step": 94524
    },
    {
      "epoch": 18.15,
      "learning_rate": 0.001,
      "loss": 2.6039,
      "step": 94536
    },
    {
      "epoch": 18.15,
      "learning_rate": 0.001,
      "loss": 2.6032,
      "step": 94548
    },
    {
      "epoch": 18.16,
      "learning_rate": 0.001,
      "loss": 2.5983,
      "step": 94560
    },
    {
      "epoch": 18.16,
      "learning_rate": 0.001,
      "loss": 2.6032,
      "step": 94572
    },
    {
      "epoch": 18.16,
      "learning_rate": 0.001,
      "loss": 2.6019,
      "step": 94584
    },
    {
      "epoch": 18.16,
      "learning_rate": 0.001,
      "loss": 2.6008,
      "step": 94596
    },
    {
      "epoch": 18.17,
      "learning_rate": 0.001,
      "loss": 2.6038,
      "step": 94608
    },
    {
      "epoch": 18.17,
      "learning_rate": 0.001,
      "loss": 2.6119,
      "step": 94620
    },
    {
      "epoch": 18.17,
      "learning_rate": 0.001,
      "loss": 2.5952,
      "step": 94632
    },
    {
      "epoch": 18.17,
      "learning_rate": 0.001,
      "loss": 2.6052,
      "step": 94644
    },
    {
      "epoch": 18.18,
      "learning_rate": 0.001,
      "loss": 2.6077,
      "step": 94656
    },
    {
      "epoch": 18.18,
      "learning_rate": 0.001,
      "loss": 2.606,
      "step": 94668
    },
    {
      "epoch": 18.18,
      "learning_rate": 0.001,
      "loss": 2.6067,
      "step": 94680
    },
    {
      "epoch": 18.18,
      "learning_rate": 0.001,
      "loss": 2.6005,
      "step": 94692
    },
    {
      "epoch": 18.18,
      "learning_rate": 0.001,
      "loss": 2.6127,
      "step": 94704
    },
    {
      "epoch": 18.19,
      "learning_rate": 0.001,
      "loss": 2.6138,
      "step": 94716
    },
    {
      "epoch": 18.19,
      "learning_rate": 0.001,
      "loss": 2.6006,
      "step": 94728
    },
    {
      "epoch": 18.19,
      "learning_rate": 0.001,
      "loss": 2.6043,
      "step": 94740
    },
    {
      "epoch": 18.19,
      "learning_rate": 0.001,
      "loss": 2.6068,
      "step": 94752
    },
    {
      "epoch": 18.2,
      "learning_rate": 0.001,
      "loss": 2.6113,
      "step": 94764
    },
    {
      "epoch": 18.2,
      "learning_rate": 0.001,
      "loss": 2.607,
      "step": 94776
    },
    {
      "epoch": 18.2,
      "learning_rate": 0.001,
      "loss": 2.6117,
      "step": 94788
    },
    {
      "epoch": 18.2,
      "learning_rate": 0.001,
      "loss": 2.5984,
      "step": 94800
    },
    {
      "epoch": 18.21,
      "learning_rate": 0.001,
      "loss": 2.6152,
      "step": 94812
    },
    {
      "epoch": 18.21,
      "learning_rate": 0.001,
      "loss": 2.6095,
      "step": 94824
    },
    {
      "epoch": 18.21,
      "learning_rate": 0.001,
      "loss": 2.5988,
      "step": 94836
    },
    {
      "epoch": 18.21,
      "learning_rate": 0.001,
      "loss": 2.6058,
      "step": 94848
    },
    {
      "epoch": 18.21,
      "learning_rate": 0.001,
      "loss": 2.612,
      "step": 94860
    },
    {
      "epoch": 18.22,
      "learning_rate": 0.001,
      "loss": 2.6034,
      "step": 94872
    },
    {
      "epoch": 18.22,
      "learning_rate": 0.001,
      "loss": 2.6033,
      "step": 94884
    },
    {
      "epoch": 18.22,
      "learning_rate": 0.001,
      "loss": 2.6052,
      "step": 94896
    },
    {
      "epoch": 18.22,
      "learning_rate": 0.001,
      "loss": 2.6066,
      "step": 94908
    },
    {
      "epoch": 18.23,
      "learning_rate": 0.001,
      "loss": 2.6096,
      "step": 94920
    },
    {
      "epoch": 18.23,
      "learning_rate": 0.001,
      "loss": 2.6054,
      "step": 94932
    },
    {
      "epoch": 18.23,
      "learning_rate": 0.001,
      "loss": 2.6092,
      "step": 94944
    },
    {
      "epoch": 18.23,
      "learning_rate": 0.001,
      "loss": 2.6025,
      "step": 94956
    },
    {
      "epoch": 18.24,
      "learning_rate": 0.001,
      "loss": 2.6085,
      "step": 94968
    },
    {
      "epoch": 18.24,
      "learning_rate": 0.001,
      "loss": 2.6029,
      "step": 94980
    },
    {
      "epoch": 18.24,
      "learning_rate": 0.001,
      "loss": 2.6043,
      "step": 94992
    },
    {
      "epoch": 18.24,
      "eval_ag_news_accuracy": 0.3154375,
      "eval_ag_news_bleu_score": 4.783146663537894,
      "eval_ag_news_bleu_score_sem": 0.15633000150700754,
      "eval_ag_news_emb_cos_sim": 0.7934670448303223,
      "eval_ag_news_emb_cos_sim_sem": 0.008029166936321644,
      "eval_ag_news_emb_top1_equal": 0.1953125,
      "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6278457641601562,
      "eval_ag_news_n_ngrams_match_1": 13.704,
      "eval_ag_news_n_ngrams_match_2": 2.988,
      "eval_ag_news_n_ngrams_match_3": 0.884,
      "eval_ag_news_num_pred_words": 46.606,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.63166176073855,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33781881099991656,
      "eval_ag_news_runtime": 11.0857,
      "eval_ag_news_samples_per_second": 45.103,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.34362471604045963,
      "eval_ag_news_token_set_f1_sem": 0.004504580187691374,
      "eval_ag_news_token_set_precision": 0.32633752948756295,
      "eval_ag_news_token_set_recall": 0.37860933095874616,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 95000
    },
    {
      "epoch": 18.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.1110625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8900259276062106,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1136314521746121,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6522161960601807,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01041171906301937,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.28578782081604,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.004,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.626,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.608,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.73003449950339,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2038527069402539,
      "eval_anthropic_toxic_prompts_runtime": 10.5468,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.408,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3549872699450282,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065296038233517645,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42315681055129517,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3393227364247595,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 95000
    },
    {
      "epoch": 18.24,
      "eval_arxiv_accuracy": 0.34046875,
      "eval_arxiv_bleu_score": 4.205228001935656,
      "eval_arxiv_bleu_score_sem": 0.115815289133178,
      "eval_arxiv_emb_cos_sim": 0.7570281624794006,
      "eval_arxiv_emb_cos_sim_sem": 0.007559703727470923,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.479388952255249,
      "eval_arxiv_n_ngrams_match_1": 14.94,
      "eval_arxiv_n_ngrams_match_2": 2.88,
      "eval_arxiv_n_ngrams_match_3": 0.618,
      "eval_arxiv_num_pred_words": 40.93,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.4398936945549,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3540739966092163,
      "eval_arxiv_runtime": 10.9365,
      "eval_arxiv_samples_per_second": 45.718,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.34910766433947127,
      "eval_arxiv_token_set_f1_sem": 0.004090998537275445,
      "eval_arxiv_token_set_precision": 0.29942840317882663,
      "eval_arxiv_token_set_recall": 0.43679318830735475,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 95000
    },
    {
      "epoch": 18.24,
      "eval_python_code_alpaca_accuracy": 0.15625,
      "eval_python_code_alpaca_bleu_score": 4.3538373229199445,
      "eval_python_code_alpaca_bleu_score_sem": 0.1407386582802524,
      "eval_python_code_alpaca_emb_cos_sim": 0.7455194592475891,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009571122559982862,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9710068702697754,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.658,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.682,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.836,
      "eval_python_code_alpaca_num_pred_words": 43.674,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.51155531402468,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3226625391226031,
      "eval_python_code_alpaca_runtime": 10.4435,
      "eval_python_code_alpaca_samples_per_second": 47.877,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.4712154631934445,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005424239542164947,
      "eval_python_code_alpaca_token_set_precision": 0.5265152919415134,
      "eval_python_code_alpaca_token_set_recall": 0.45126111251285717,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 95000
    },
    {
      "epoch": 18.24,
      "eval_wikibio_accuracy": 0.31821875,
      "eval_wikibio_bleu_score": 5.486517132495208,
      "eval_wikibio_bleu_score_sem": 0.20212476746254804,
      "eval_wikibio_emb_cos_sim": 0.7030634880065918,
      "eval_wikibio_emb_cos_sim_sem": 0.012838128936859731,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7831408977508545,
      "eval_wikibio_n_ngrams_match_1": 9.25,
      "eval_wikibio_n_ngrams_match_2": 3.1,
      "eval_wikibio_n_ngrams_match_3": 1.096,
      "eval_wikibio_num_pred_words": 34.852,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.95387979682376,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3253598902247037,
      "eval_wikibio_runtime": 10.4965,
      "eval_wikibio_samples_per_second": 47.635,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.300403327658984,
      "eval_wikibio_token_set_f1_sem": 0.005999880441165681,
      "eval_wikibio_token_set_precision": 0.30070859794356924,
      "eval_wikibio_token_set_recall": 0.32042129537599146,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 95000
    },
    {
      "epoch": 18.24,
      "eval_nq_accuracy": 0.51909375,
      "eval_nq_bleu_score": 11.440870655561664,
      "eval_nq_bleu_score_sem": 0.49320907865324376,
      "eval_nq_emb_cos_sim": 0.8287546038627625,
      "eval_nq_emb_cos_sim_sem": 0.006802683766487008,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2402915954589844,
      "eval_nq_n_ngrams_match_1": 22.792,
      "eval_nq_n_ngrams_match_2": 8.188,
      "eval_nq_n_ngrams_match_3": 3.812,
      "eval_nq_num_pred_words": 48.918,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.396070739577427,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4416670845091659,
      "eval_nq_runtime": 12.1546,
      "eval_nq_samples_per_second": 41.137,
      "eval_nq_steps_per_second": 0.082,
      "eval_nq_token_set_f1": 0.4556921188722675,
      "eval_nq_token_set_f1_sem": 0.005034046263896343,
      "eval_nq_token_set_precision": 0.41308612085370944,
      "eval_nq_token_set_recall": 0.516794620460981,
      "eval_nq_true_num_tokens": 64.0,
      "step": 95000
    },
    {
      "epoch": 18.24,
      "learning_rate": 0.001,
      "loss": 2.6112,
      "step": 95004
    },
    {
      "epoch": 18.24,
      "learning_rate": 0.001,
      "loss": 2.5988,
      "step": 95016
    },
    {
      "epoch": 18.25,
      "learning_rate": 0.001,
      "loss": 2.6152,
      "step": 95028
    },
    {
      "epoch": 18.25,
      "learning_rate": 0.001,
      "loss": 2.6205,
      "step": 95040
    },
    {
      "epoch": 18.25,
      "learning_rate": 0.001,
      "loss": 2.6002,
      "step": 95052
    },
    {
      "epoch": 18.25,
      "learning_rate": 0.001,
      "loss": 2.5957,
      "step": 95064
    },
    {
      "epoch": 18.26,
      "learning_rate": 0.001,
      "loss": 2.6038,
      "step": 95076
    },
    {
      "epoch": 18.26,
      "learning_rate": 0.001,
      "loss": 2.6009,
      "step": 95088
    },
    {
      "epoch": 18.26,
      "learning_rate": 0.001,
      "loss": 2.597,
      "step": 95100
    },
    {
      "epoch": 18.26,
      "learning_rate": 0.001,
      "loss": 2.6073,
      "step": 95112
    },
    {
      "epoch": 18.26,
      "learning_rate": 0.001,
      "loss": 2.5937,
      "step": 95124
    },
    {
      "epoch": 18.27,
      "learning_rate": 0.001,
      "loss": 2.606,
      "step": 95136
    },
    {
      "epoch": 18.27,
      "learning_rate": 0.001,
      "loss": 2.6134,
      "step": 95148
    },
    {
      "epoch": 18.27,
      "learning_rate": 0.001,
      "loss": 2.6033,
      "step": 95160
    },
    {
      "epoch": 18.27,
      "learning_rate": 0.001,
      "loss": 2.6043,
      "step": 95172
    },
    {
      "epoch": 18.28,
      "learning_rate": 0.001,
      "loss": 2.61,
      "step": 95184
    },
    {
      "epoch": 18.28,
      "learning_rate": 0.001,
      "loss": 2.6113,
      "step": 95196
    },
    {
      "epoch": 18.28,
      "learning_rate": 0.001,
      "loss": 2.6035,
      "step": 95208
    },
    {
      "epoch": 18.28,
      "learning_rate": 0.001,
      "loss": 2.6044,
      "step": 95220
    },
    {
      "epoch": 18.29,
      "learning_rate": 0.001,
      "loss": 2.6115,
      "step": 95232
    },
    {
      "epoch": 18.29,
      "learning_rate": 0.001,
      "loss": 2.6202,
      "step": 95244
    },
    {
      "epoch": 18.29,
      "learning_rate": 0.001,
      "loss": 2.6102,
      "step": 95256
    },
    {
      "epoch": 18.29,
      "learning_rate": 0.001,
      "loss": 2.6039,
      "step": 95268
    },
    {
      "epoch": 18.29,
      "learning_rate": 0.001,
      "loss": 2.6029,
      "step": 95280
    },
    {
      "epoch": 18.3,
      "learning_rate": 0.001,
      "loss": 2.6149,
      "step": 95292
    },
    {
      "epoch": 18.3,
      "learning_rate": 0.001,
      "loss": 2.6131,
      "step": 95304
    },
    {
      "epoch": 18.3,
      "learning_rate": 0.001,
      "loss": 2.6151,
      "step": 95316
    },
    {
      "epoch": 18.3,
      "learning_rate": 0.001,
      "loss": 2.6,
      "step": 95328
    },
    {
      "epoch": 18.31,
      "learning_rate": 0.001,
      "loss": 2.6045,
      "step": 95340
    },
    {
      "epoch": 18.31,
      "learning_rate": 0.001,
      "loss": 2.6142,
      "step": 95352
    },
    {
      "epoch": 18.31,
      "learning_rate": 0.001,
      "loss": 2.6059,
      "step": 95364
    },
    {
      "epoch": 18.31,
      "learning_rate": 0.001,
      "loss": 2.6091,
      "step": 95376
    },
    {
      "epoch": 18.32,
      "learning_rate": 0.001,
      "loss": 2.5962,
      "step": 95388
    },
    {
      "epoch": 18.32,
      "learning_rate": 0.001,
      "loss": 2.6112,
      "step": 95400
    },
    {
      "epoch": 18.32,
      "learning_rate": 0.001,
      "loss": 2.6074,
      "step": 95412
    },
    {
      "epoch": 18.32,
      "learning_rate": 0.001,
      "loss": 2.5956,
      "step": 95424
    },
    {
      "epoch": 18.32,
      "learning_rate": 0.001,
      "loss": 2.6096,
      "step": 95436
    },
    {
      "epoch": 18.33,
      "learning_rate": 0.001,
      "loss": 2.617,
      "step": 95448
    },
    {
      "epoch": 18.33,
      "learning_rate": 0.001,
      "loss": 2.6029,
      "step": 95460
    },
    {
      "epoch": 18.33,
      "learning_rate": 0.001,
      "loss": 2.617,
      "step": 95472
    },
    {
      "epoch": 18.33,
      "learning_rate": 0.001,
      "loss": 2.613,
      "step": 95484
    },
    {
      "epoch": 18.34,
      "learning_rate": 0.001,
      "loss": 2.6111,
      "step": 95496
    },
    {
      "epoch": 18.34,
      "learning_rate": 0.001,
      "loss": 2.6016,
      "step": 95508
    },
    {
      "epoch": 18.34,
      "learning_rate": 0.001,
      "loss": 2.6088,
      "step": 95520
    },
    {
      "epoch": 18.34,
      "learning_rate": 0.001,
      "loss": 2.6126,
      "step": 95532
    },
    {
      "epoch": 18.35,
      "learning_rate": 0.001,
      "loss": 2.6079,
      "step": 95544
    },
    {
      "epoch": 18.35,
      "learning_rate": 0.001,
      "loss": 2.6067,
      "step": 95556
    },
    {
      "epoch": 18.35,
      "learning_rate": 0.001,
      "loss": 2.6024,
      "step": 95568
    },
    {
      "epoch": 18.35,
      "learning_rate": 0.001,
      "loss": 2.6035,
      "step": 95580
    },
    {
      "epoch": 18.35,
      "learning_rate": 0.001,
      "loss": 2.6027,
      "step": 95592
    },
    {
      "epoch": 18.36,
      "learning_rate": 0.001,
      "loss": 2.6139,
      "step": 95604
    },
    {
      "epoch": 18.36,
      "learning_rate": 0.001,
      "loss": 2.6109,
      "step": 95616
    },
    {
      "epoch": 18.36,
      "eval_ag_news_accuracy": 0.31484375,
      "eval_ag_news_bleu_score": 4.6691743342691145,
      "eval_ag_news_bleu_score_sem": 0.15190602213001148,
      "eval_ag_news_emb_cos_sim": 0.7999417781829834,
      "eval_ag_news_emb_cos_sim_sem": 0.007424792982476567,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.621875524520874,
      "eval_ag_news_n_ngrams_match_1": 13.69,
      "eval_ag_news_n_ngrams_match_2": 2.924,
      "eval_ag_news_n_ngrams_match_3": 0.854,
      "eval_ag_news_num_pred_words": 47.118,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.40766105629688,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33940541760522486,
      "eval_ag_news_runtime": 13.7506,
      "eval_ag_news_samples_per_second": 36.362,
      "eval_ag_news_steps_per_second": 0.073,
      "eval_ag_news_token_set_f1": 0.3432769915525782,
      "eval_ag_news_token_set_f1_sem": 0.004322283844567325,
      "eval_ag_news_token_set_precision": 0.32585468129250833,
      "eval_ag_news_token_set_recall": 0.3809083381372029,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 95625
    },
    {
      "epoch": 18.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.11215625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.908802248532186,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11383107203567008,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6600983738899231,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010381937704734868,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.297804594039917,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.96,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.822,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.638,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.514,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.053180962636926,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2029485573796751,
      "eval_anthropic_toxic_prompts_runtime": 12.1911,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.013,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.082,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34661106786176654,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067618686121811815,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4214624193266087,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3258114548960384,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 95625
    },
    {
      "epoch": 18.36,
      "eval_arxiv_accuracy": 0.34303125,
      "eval_arxiv_bleu_score": 4.205971410617861,
      "eval_arxiv_bleu_score_sem": 0.11394144713960389,
      "eval_arxiv_emb_cos_sim": 0.7626786231994629,
      "eval_arxiv_emb_cos_sim_sem": 0.007923761619462946,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4587795734405518,
      "eval_arxiv_n_ngrams_match_1": 14.976,
      "eval_arxiv_n_ngrams_match_2": 2.86,
      "eval_arxiv_n_ngrams_match_3": 0.62,
      "eval_arxiv_num_pred_words": 41.592,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.778169916606007,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35088619096447243,
      "eval_arxiv_runtime": 11.8202,
      "eval_arxiv_samples_per_second": 42.301,
      "eval_arxiv_steps_per_second": 0.085,
      "eval_arxiv_token_set_f1": 0.3453943120528482,
      "eval_arxiv_token_set_f1_sem": 0.0042224657524059335,
      "eval_arxiv_token_set_precision": 0.2990934165056068,
      "eval_arxiv_token_set_recall": 0.4227365150516084,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 95625
    },
    {
      "epoch": 18.36,
      "eval_python_code_alpaca_accuracy": 0.15903125,
      "eval_python_code_alpaca_bleu_score": 4.330235826821296,
      "eval_python_code_alpaca_bleu_score_sem": 0.13610601802067374,
      "eval_python_code_alpaca_emb_cos_sim": 0.7526489496231079,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008678642627884932,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9327666759490967,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.512,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.702,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.852,
      "eval_python_code_alpaca_num_pred_words": 43.332,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.779515522982695,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31768589357022037,
      "eval_python_code_alpaca_runtime": 10.8573,
      "eval_python_code_alpaca_samples_per_second": 46.052,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.46245710827433906,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005804947650448553,
      "eval_python_code_alpaca_token_set_precision": 0.5227187278007261,
      "eval_python_code_alpaca_token_set_recall": 0.4397650423476415,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 95625
    },
    {
      "epoch": 18.36,
      "eval_wikibio_accuracy": 0.31603125,
      "eval_wikibio_bleu_score": 5.542624458993231,
      "eval_wikibio_bleu_score_sem": 0.20689460327959286,
      "eval_wikibio_emb_cos_sim": 0.7380982637405396,
      "eval_wikibio_emb_cos_sim_sem": 0.00941926087935303,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7812247276306152,
      "eval_wikibio_n_ngrams_match_1": 9.95,
      "eval_wikibio_n_ngrams_match_2": 3.204,
      "eval_wikibio_n_ngrams_match_3": 1.114,
      "eval_wikibio_num_pred_words": 37.102,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.86973732707724,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3423724842590822,
      "eval_wikibio_runtime": 10.8719,
      "eval_wikibio_samples_per_second": 45.99,
      "eval_wikibio_steps_per_second": 0.092,
      "eval_wikibio_token_set_f1": 0.3123894949471945,
      "eval_wikibio_token_set_f1_sem": 0.005552994663794415,
      "eval_wikibio_token_set_precision": 0.3201717083277487,
      "eval_wikibio_token_set_recall": 0.3240208272144956,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 95625
    },
    {
      "epoch": 18.36,
      "eval_nq_accuracy": 0.5205625,
      "eval_nq_bleu_score": 11.345577577378592,
      "eval_nq_bleu_score_sem": 0.4706911061260375,
      "eval_nq_emb_cos_sim": 0.82608962059021,
      "eval_nq_emb_cos_sim_sem": 0.007153023517833242,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.236826181411743,
      "eval_nq_n_ngrams_match_1": 22.714,
      "eval_nq_n_ngrams_match_2": 8.268,
      "eval_nq_n_ngrams_match_3": 3.744,
      "eval_nq_num_pred_words": 48.984,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.363565818082948,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43735779080035103,
      "eval_nq_runtime": 10.9841,
      "eval_nq_samples_per_second": 45.52,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4541292172230679,
      "eval_nq_token_set_f1_sem": 0.005104829263973392,
      "eval_nq_token_set_precision": 0.41178179183125596,
      "eval_nq_token_set_recall": 0.5140669627958959,
      "eval_nq_true_num_tokens": 64.0,
      "step": 95625
    },
    {
      "epoch": 18.36,
      "learning_rate": 0.001,
      "loss": 2.605,
      "step": 95628
    },
    {
      "epoch": 18.36,
      "learning_rate": 0.001,
      "loss": 2.6075,
      "step": 95640
    },
    {
      "epoch": 18.37,
      "learning_rate": 0.001,
      "loss": 2.5935,
      "step": 95652
    },
    {
      "epoch": 18.37,
      "learning_rate": 0.001,
      "loss": 2.609,
      "step": 95664
    },
    {
      "epoch": 18.37,
      "learning_rate": 0.001,
      "loss": 2.6216,
      "step": 95676
    },
    {
      "epoch": 18.37,
      "learning_rate": 0.001,
      "loss": 2.6099,
      "step": 95688
    },
    {
      "epoch": 18.38,
      "learning_rate": 0.001,
      "loss": 2.6087,
      "step": 95700
    },
    {
      "epoch": 18.38,
      "learning_rate": 0.001,
      "loss": 2.6047,
      "step": 95712
    },
    {
      "epoch": 18.38,
      "learning_rate": 0.001,
      "loss": 2.6215,
      "step": 95724
    },
    {
      "epoch": 18.38,
      "learning_rate": 0.001,
      "loss": 2.6056,
      "step": 95736
    },
    {
      "epoch": 18.38,
      "learning_rate": 0.001,
      "loss": 2.6079,
      "step": 95748
    },
    {
      "epoch": 18.39,
      "learning_rate": 0.001,
      "loss": 2.6124,
      "step": 95760
    },
    {
      "epoch": 18.39,
      "learning_rate": 0.001,
      "loss": 2.6174,
      "step": 95772
    },
    {
      "epoch": 18.39,
      "learning_rate": 0.001,
      "loss": 2.6194,
      "step": 95784
    },
    {
      "epoch": 18.39,
      "learning_rate": 0.001,
      "loss": 2.6152,
      "step": 95796
    },
    {
      "epoch": 18.4,
      "learning_rate": 0.001,
      "loss": 2.611,
      "step": 95808
    },
    {
      "epoch": 18.4,
      "learning_rate": 0.001,
      "loss": 2.5986,
      "step": 95820
    },
    {
      "epoch": 18.4,
      "learning_rate": 0.001,
      "loss": 2.6141,
      "step": 95832
    },
    {
      "epoch": 18.4,
      "learning_rate": 0.001,
      "loss": 2.6113,
      "step": 95844
    },
    {
      "epoch": 18.41,
      "learning_rate": 0.001,
      "loss": 2.617,
      "step": 95856
    },
    {
      "epoch": 18.41,
      "learning_rate": 0.001,
      "loss": 2.6106,
      "step": 95868
    },
    {
      "epoch": 18.41,
      "learning_rate": 0.001,
      "loss": 2.6008,
      "step": 95880
    },
    {
      "epoch": 18.41,
      "learning_rate": 0.001,
      "loss": 2.6066,
      "step": 95892
    },
    {
      "epoch": 18.41,
      "learning_rate": 0.001,
      "loss": 2.5966,
      "step": 95904
    },
    {
      "epoch": 18.42,
      "learning_rate": 0.001,
      "loss": 2.6114,
      "step": 95916
    },
    {
      "epoch": 18.42,
      "learning_rate": 0.001,
      "loss": 2.614,
      "step": 95928
    },
    {
      "epoch": 18.42,
      "learning_rate": 0.001,
      "loss": 2.6143,
      "step": 95940
    },
    {
      "epoch": 18.42,
      "learning_rate": 0.001,
      "loss": 2.6024,
      "step": 95952
    },
    {
      "epoch": 18.43,
      "learning_rate": 0.001,
      "loss": 2.6139,
      "step": 95964
    },
    {
      "epoch": 18.43,
      "learning_rate": 0.001,
      "loss": 2.6074,
      "step": 95976
    },
    {
      "epoch": 18.43,
      "learning_rate": 0.001,
      "loss": 2.6075,
      "step": 95988
    },
    {
      "epoch": 18.43,
      "learning_rate": 0.001,
      "loss": 2.6061,
      "step": 96000
    },
    {
      "epoch": 18.44,
      "learning_rate": 0.001,
      "loss": 2.6003,
      "step": 96012
    },
    {
      "epoch": 18.44,
      "learning_rate": 0.001,
      "loss": 2.6047,
      "step": 96024
    },
    {
      "epoch": 18.44,
      "learning_rate": 0.001,
      "loss": 2.6124,
      "step": 96036
    },
    {
      "epoch": 18.44,
      "learning_rate": 0.001,
      "loss": 2.5965,
      "step": 96048
    },
    {
      "epoch": 18.44,
      "learning_rate": 0.001,
      "loss": 2.6062,
      "step": 96060
    },
    {
      "epoch": 18.45,
      "learning_rate": 0.001,
      "loss": 2.6042,
      "step": 96072
    },
    {
      "epoch": 18.45,
      "learning_rate": 0.001,
      "loss": 2.612,
      "step": 96084
    },
    {
      "epoch": 18.45,
      "learning_rate": 0.001,
      "loss": 2.6099,
      "step": 96096
    },
    {
      "epoch": 18.45,
      "learning_rate": 0.001,
      "loss": 2.6141,
      "step": 96108
    },
    {
      "epoch": 18.46,
      "learning_rate": 0.001,
      "loss": 2.6099,
      "step": 96120
    },
    {
      "epoch": 18.46,
      "learning_rate": 0.001,
      "loss": 2.5956,
      "step": 96132
    },
    {
      "epoch": 18.46,
      "learning_rate": 0.001,
      "loss": 2.6142,
      "step": 96144
    },
    {
      "epoch": 18.46,
      "learning_rate": 0.001,
      "loss": 2.5985,
      "step": 96156
    },
    {
      "epoch": 18.47,
      "learning_rate": 0.001,
      "loss": 2.6056,
      "step": 96168
    },
    {
      "epoch": 18.47,
      "learning_rate": 0.001,
      "loss": 2.5973,
      "step": 96180
    },
    {
      "epoch": 18.47,
      "learning_rate": 0.001,
      "loss": 2.6124,
      "step": 96192
    },
    {
      "epoch": 18.47,
      "learning_rate": 0.001,
      "loss": 2.6116,
      "step": 96204
    },
    {
      "epoch": 18.47,
      "learning_rate": 0.001,
      "loss": 2.6044,
      "step": 96216
    },
    {
      "epoch": 18.48,
      "learning_rate": 0.001,
      "loss": 2.6078,
      "step": 96228
    },
    {
      "epoch": 18.48,
      "learning_rate": 0.001,
      "loss": 2.6053,
      "step": 96240
    },
    {
      "epoch": 18.48,
      "eval_ag_news_accuracy": 0.31578125,
      "eval_ag_news_bleu_score": 4.742773185263093,
      "eval_ag_news_bleu_score_sem": 0.14767151838953968,
      "eval_ag_news_emb_cos_sim": 0.797610878944397,
      "eval_ag_news_emb_cos_sim_sem": 0.007353679395237545,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.614140510559082,
      "eval_ag_news_n_ngrams_match_1": 13.82,
      "eval_ag_news_n_ngrams_match_2": 3.034,
      "eval_ag_news_n_ngrams_match_3": 0.85,
      "eval_ag_news_num_pred_words": 46.712,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.11942845443713,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.340488347183705,
      "eval_ag_news_runtime": 11.1712,
      "eval_ag_news_samples_per_second": 44.758,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.34519639819405223,
      "eval_ag_news_token_set_f1_sem": 0.004369930500342174,
      "eval_ag_news_token_set_precision": 0.3280042408013696,
      "eval_ag_news_token_set_recall": 0.37912332160173745,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 96250
    },
    {
      "epoch": 18.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.1115,
      "eval_anthropic_toxic_prompts_bleu_score": 2.986602460368294,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11548935349472776,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.675180196762085,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009086568803731716,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.291154623031616,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.968,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.65,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.616,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.873874944238842,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20837498481236738,
      "eval_anthropic_toxic_prompts_runtime": 10.5555,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.369,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34639541159399195,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064582298334139,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4221812706910981,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32334515757111487,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 96250
    },
    {
      "epoch": 18.48,
      "eval_arxiv_accuracy": 0.3426875,
      "eval_arxiv_bleu_score": 4.164852716596905,
      "eval_arxiv_bleu_score_sem": 0.12307658990951294,
      "eval_arxiv_emb_cos_sim": 0.7445935010910034,
      "eval_arxiv_emb_cos_sim_sem": 0.009263523664293722,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.461890935897827,
      "eval_arxiv_n_ngrams_match_1": 14.508,
      "eval_arxiv_n_ngrams_match_2": 2.79,
      "eval_arxiv_n_ngrams_match_3": 0.618,
      "eval_arxiv_num_pred_words": 39.576,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.877197296594794,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3475244341900761,
      "eval_arxiv_runtime": 10.6587,
      "eval_arxiv_samples_per_second": 46.91,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.3401370926019041,
      "eval_arxiv_token_set_f1_sem": 0.004517323822640119,
      "eval_arxiv_token_set_precision": 0.29032364400816774,
      "eval_arxiv_token_set_recall": 0.4309384057692911,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 96250
    },
    {
      "epoch": 18.48,
      "eval_python_code_alpaca_accuracy": 0.15696875,
      "eval_python_code_alpaca_bleu_score": 4.380251750551483,
      "eval_python_code_alpaca_bleu_score_sem": 0.1424218613949205,
      "eval_python_code_alpaca_emb_cos_sim": 0.7421876192092896,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010029880469311815,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.933974504470825,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.544,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.69,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.874,
      "eval_python_code_alpaca_num_pred_words": 42.79,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.802211661217957,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32092510218041737,
      "eval_python_code_alpaca_runtime": 10.9966,
      "eval_python_code_alpaca_samples_per_second": 45.468,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4613884551477792,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0059963971616164785,
      "eval_python_code_alpaca_token_set_precision": 0.5199470829741978,
      "eval_python_code_alpaca_token_set_recall": 0.43742905957189904,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 96250
    },
    {
      "epoch": 18.48,
      "eval_wikibio_accuracy": 0.31628125,
      "eval_wikibio_bleu_score": 5.921618232308941,
      "eval_wikibio_bleu_score_sem": 0.21290516918769772,
      "eval_wikibio_emb_cos_sim": 0.7494040727615356,
      "eval_wikibio_emb_cos_sim_sem": 0.008387679883556452,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7998430728912354,
      "eval_wikibio_n_ngrams_match_1": 10.242,
      "eval_wikibio_n_ngrams_match_2": 3.402,
      "eval_wikibio_n_ngrams_match_3": 1.224,
      "eval_wikibio_num_pred_words": 36.68,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.69417021603945,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3592193326205584,
      "eval_wikibio_runtime": 11.0505,
      "eval_wikibio_samples_per_second": 45.247,
      "eval_wikibio_steps_per_second": 0.09,
      "eval_wikibio_token_set_f1": 0.3231720357029772,
      "eval_wikibio_token_set_f1_sem": 0.0051847495570598555,
      "eval_wikibio_token_set_precision": 0.3333367686439334,
      "eval_wikibio_token_set_recall": 0.32798864990216137,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 96250
    },
    {
      "epoch": 18.48,
      "eval_nq_accuracy": 0.52121875,
      "eval_nq_bleu_score": 11.53860695635088,
      "eval_nq_bleu_score_sem": 0.47311330166610505,
      "eval_nq_emb_cos_sim": 0.8242154121398926,
      "eval_nq_emb_cos_sim_sem": 0.007524452498503116,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.235832929611206,
      "eval_nq_n_ngrams_match_1": 22.966,
      "eval_nq_n_ngrams_match_2": 8.292,
      "eval_nq_n_ngrams_match_3": 3.822,
      "eval_nq_num_pred_words": 49.316,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.354270056754755,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4425937425243177,
      "eval_nq_runtime": 10.9955,
      "eval_nq_samples_per_second": 45.473,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4588045710573252,
      "eval_nq_token_set_f1_sem": 0.004750594426790293,
      "eval_nq_token_set_precision": 0.4151368708397352,
      "eval_nq_token_set_recall": 0.5214414714224554,
      "eval_nq_true_num_tokens": 64.0,
      "step": 96250
    },
    {
      "epoch": 18.48,
      "learning_rate": 0.001,
      "loss": 2.603,
      "step": 96252
    },
    {
      "epoch": 18.48,
      "learning_rate": 0.001,
      "loss": 2.6207,
      "step": 96264
    },
    {
      "epoch": 18.49,
      "learning_rate": 0.001,
      "loss": 2.5999,
      "step": 96276
    },
    {
      "epoch": 18.49,
      "learning_rate": 0.001,
      "loss": 2.5944,
      "step": 96288
    },
    {
      "epoch": 18.49,
      "learning_rate": 0.001,
      "loss": 2.6081,
      "step": 96300
    },
    {
      "epoch": 18.49,
      "learning_rate": 0.001,
      "loss": 2.6095,
      "step": 96312
    },
    {
      "epoch": 18.5,
      "learning_rate": 0.001,
      "loss": 2.6171,
      "step": 96324
    },
    {
      "epoch": 18.5,
      "learning_rate": 0.001,
      "loss": 2.6027,
      "step": 96336
    },
    {
      "epoch": 18.5,
      "learning_rate": 0.001,
      "loss": 2.6214,
      "step": 96348
    },
    {
      "epoch": 18.5,
      "learning_rate": 0.001,
      "loss": 2.6028,
      "step": 96360
    },
    {
      "epoch": 18.5,
      "learning_rate": 0.001,
      "loss": 2.6122,
      "step": 96372
    },
    {
      "epoch": 18.51,
      "learning_rate": 0.001,
      "loss": 2.6161,
      "step": 96384
    },
    {
      "epoch": 18.51,
      "learning_rate": 0.001,
      "loss": 2.609,
      "step": 96396
    },
    {
      "epoch": 18.51,
      "learning_rate": 0.001,
      "loss": 2.6114,
      "step": 96408
    },
    {
      "epoch": 18.51,
      "learning_rate": 0.001,
      "loss": 2.6113,
      "step": 96420
    },
    {
      "epoch": 18.52,
      "learning_rate": 0.001,
      "loss": 2.6072,
      "step": 96432
    },
    {
      "epoch": 18.52,
      "learning_rate": 0.001,
      "loss": 2.6151,
      "step": 96444
    },
    {
      "epoch": 18.52,
      "learning_rate": 0.001,
      "loss": 2.6153,
      "step": 96456
    },
    {
      "epoch": 18.52,
      "learning_rate": 0.001,
      "loss": 2.6078,
      "step": 96468
    },
    {
      "epoch": 18.53,
      "learning_rate": 0.001,
      "loss": 2.6103,
      "step": 96480
    },
    {
      "epoch": 18.53,
      "learning_rate": 0.001,
      "loss": 2.6051,
      "step": 96492
    },
    {
      "epoch": 18.53,
      "learning_rate": 0.001,
      "loss": 2.6097,
      "step": 96504
    },
    {
      "epoch": 18.53,
      "learning_rate": 0.001,
      "loss": 2.6099,
      "step": 96516
    },
    {
      "epoch": 18.53,
      "learning_rate": 0.001,
      "loss": 2.6046,
      "step": 96528
    },
    {
      "epoch": 18.54,
      "learning_rate": 0.001,
      "loss": 2.6159,
      "step": 96540
    },
    {
      "epoch": 18.54,
      "learning_rate": 0.001,
      "loss": 2.6068,
      "step": 96552
    },
    {
      "epoch": 18.54,
      "learning_rate": 0.001,
      "loss": 2.6094,
      "step": 96564
    },
    {
      "epoch": 18.54,
      "learning_rate": 0.001,
      "loss": 2.606,
      "step": 96576
    },
    {
      "epoch": 18.55,
      "learning_rate": 0.001,
      "loss": 2.6045,
      "step": 96588
    },
    {
      "epoch": 18.55,
      "learning_rate": 0.001,
      "loss": 2.6083,
      "step": 96600
    },
    {
      "epoch": 18.55,
      "learning_rate": 0.001,
      "loss": 2.6131,
      "step": 96612
    },
    {
      "epoch": 18.55,
      "learning_rate": 0.001,
      "loss": 2.6072,
      "step": 96624
    },
    {
      "epoch": 18.56,
      "learning_rate": 0.001,
      "loss": 2.6046,
      "step": 96636
    },
    {
      "epoch": 18.56,
      "learning_rate": 0.001,
      "loss": 2.6,
      "step": 96648
    },
    {
      "epoch": 18.56,
      "learning_rate": 0.001,
      "loss": 2.6041,
      "step": 96660
    },
    {
      "epoch": 18.56,
      "learning_rate": 0.001,
      "loss": 2.6175,
      "step": 96672
    },
    {
      "epoch": 18.56,
      "learning_rate": 0.001,
      "loss": 2.6146,
      "step": 96684
    },
    {
      "epoch": 18.57,
      "learning_rate": 0.001,
      "loss": 2.6225,
      "step": 96696
    },
    {
      "epoch": 18.57,
      "learning_rate": 0.001,
      "loss": 2.6086,
      "step": 96708
    },
    {
      "epoch": 18.57,
      "learning_rate": 0.001,
      "loss": 2.5985,
      "step": 96720
    },
    {
      "epoch": 18.57,
      "learning_rate": 0.001,
      "loss": 2.6153,
      "step": 96732
    },
    {
      "epoch": 18.58,
      "learning_rate": 0.001,
      "loss": 2.6064,
      "step": 96744
    },
    {
      "epoch": 18.58,
      "learning_rate": 0.001,
      "loss": 2.6024,
      "step": 96756
    },
    {
      "epoch": 18.58,
      "learning_rate": 0.001,
      "loss": 2.6106,
      "step": 96768
    },
    {
      "epoch": 18.58,
      "learning_rate": 0.001,
      "loss": 2.6136,
      "step": 96780
    },
    {
      "epoch": 18.59,
      "learning_rate": 0.001,
      "loss": 2.6163,
      "step": 96792
    },
    {
      "epoch": 18.59,
      "learning_rate": 0.001,
      "loss": 2.6076,
      "step": 96804
    },
    {
      "epoch": 18.59,
      "learning_rate": 0.001,
      "loss": 2.6039,
      "step": 96816
    },
    {
      "epoch": 18.59,
      "learning_rate": 0.001,
      "loss": 2.6025,
      "step": 96828
    },
    {
      "epoch": 18.59,
      "learning_rate": 0.001,
      "loss": 2.6188,
      "step": 96840
    },
    {
      "epoch": 18.6,
      "learning_rate": 0.001,
      "loss": 2.6186,
      "step": 96852
    },
    {
      "epoch": 18.6,
      "learning_rate": 0.001,
      "loss": 2.6255,
      "step": 96864
    },
    {
      "epoch": 18.6,
      "eval_ag_news_accuracy": 0.318,
      "eval_ag_news_bleu_score": 4.64113285841821,
      "eval_ag_news_bleu_score_sem": 0.14392170725688638,
      "eval_ag_news_emb_cos_sim": 0.7965982556343079,
      "eval_ag_news_emb_cos_sim_sem": 0.008046987150707657,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6175808906555176,
      "eval_ag_news_n_ngrams_match_1": 13.668,
      "eval_ag_news_n_ngrams_match_2": 2.98,
      "eval_ag_news_n_ngrams_match_3": 0.836,
      "eval_ag_news_num_pred_words": 46.512,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.247353326195324,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3365215909701731,
      "eval_ag_news_runtime": 11.4947,
      "eval_ag_news_samples_per_second": 43.498,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.3421697712491056,
      "eval_ag_news_token_set_f1_sem": 0.004293380207232115,
      "eval_ag_news_token_set_precision": 0.3263349073328334,
      "eval_ag_news_token_set_recall": 0.37532573581921147,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 96875
    },
    {
      "epoch": 18.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.11290625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.908590531022566,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10859501232520109,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6552438735961914,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010150922925512562,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.272285223007202,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.93,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.746,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.602,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.01,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.371535375766104,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20447244582343949,
      "eval_anthropic_toxic_prompts_runtime": 10.8011,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.292,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.093,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3496548281326282,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006531837971644752,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4181213031442762,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3295576475243872,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 96875
    },
    {
      "epoch": 18.6,
      "eval_arxiv_accuracy": 0.3429375,
      "eval_arxiv_bleu_score": 4.060446591726533,
      "eval_arxiv_bleu_score_sem": 0.11535480298229991,
      "eval_arxiv_emb_cos_sim": 0.7464751601219177,
      "eval_arxiv_emb_cos_sim_sem": 0.009393524301436558,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4663569927215576,
      "eval_arxiv_n_ngrams_match_1": 14.526,
      "eval_arxiv_n_ngrams_match_2": 2.732,
      "eval_arxiv_n_ngrams_match_3": 0.56,
      "eval_arxiv_num_pred_words": 40.552,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.01988105081965,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34454573858859844,
      "eval_arxiv_runtime": 11.2522,
      "eval_arxiv_samples_per_second": 44.436,
      "eval_arxiv_steps_per_second": 0.089,
      "eval_arxiv_token_set_f1": 0.3404924784033526,
      "eval_arxiv_token_set_f1_sem": 0.004156770269250585,
      "eval_arxiv_token_set_precision": 0.2909309788150816,
      "eval_arxiv_token_set_recall": 0.426627479222847,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 96875
    },
    {
      "epoch": 18.6,
      "eval_python_code_alpaca_accuracy": 0.158375,
      "eval_python_code_alpaca_bleu_score": 4.398422704861474,
      "eval_python_code_alpaca_bleu_score_sem": 0.13905094607098825,
      "eval_python_code_alpaca_emb_cos_sim": 0.7390080690383911,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009435710662617931,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.922437906265259,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.466,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.678,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.886,
      "eval_python_code_alpaca_num_pred_words": 43.454,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.58654452443671,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31563238790502973,
      "eval_python_code_alpaca_runtime": 11.1047,
      "eval_python_code_alpaca_samples_per_second": 45.026,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.46549719927832806,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005822094713937672,
      "eval_python_code_alpaca_token_set_precision": 0.5142298271878352,
      "eval_python_code_alpaca_token_set_recall": 0.44985480221678237,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 96875
    },
    {
      "epoch": 18.6,
      "eval_wikibio_accuracy": 0.31534375,
      "eval_wikibio_bleu_score": 5.599645037244387,
      "eval_wikibio_bleu_score_sem": 0.20597665951295108,
      "eval_wikibio_emb_cos_sim": 0.7339651584625244,
      "eval_wikibio_emb_cos_sim_sem": 0.009255253003817967,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.796966314315796,
      "eval_wikibio_n_ngrams_match_1": 9.772,
      "eval_wikibio_n_ngrams_match_2": 3.148,
      "eval_wikibio_n_ngrams_match_3": 1.11,
      "eval_wikibio_num_pred_words": 36.32,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.565780640048395,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3400387721418663,
      "eval_wikibio_runtime": 10.7954,
      "eval_wikibio_samples_per_second": 46.316,
      "eval_wikibio_steps_per_second": 0.093,
      "eval_wikibio_token_set_f1": 0.3101382149910275,
      "eval_wikibio_token_set_f1_sem": 0.005630899690839322,
      "eval_wikibio_token_set_precision": 0.3173958890489613,
      "eval_wikibio_token_set_recall": 0.3181035535584828,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 96875
    },
    {
      "epoch": 18.6,
      "eval_nq_accuracy": 0.52065625,
      "eval_nq_bleu_score": 11.279353191465116,
      "eval_nq_bleu_score_sem": 0.4598831181620717,
      "eval_nq_emb_cos_sim": 0.819989025592804,
      "eval_nq_emb_cos_sim_sem": 0.007851256527678556,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2385220527648926,
      "eval_nq_n_ngrams_match_1": 22.504,
      "eval_nq_n_ngrams_match_2": 8.152,
      "eval_nq_n_ngrams_match_3": 3.74,
      "eval_nq_num_pred_words": 49.114,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.37945869344422,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4346066736600779,
      "eval_nq_runtime": 11.1562,
      "eval_nq_samples_per_second": 44.818,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.4526704917998639,
      "eval_nq_token_set_f1_sem": 0.0051285257330828836,
      "eval_nq_token_set_precision": 0.4100285015794644,
      "eval_nq_token_set_recall": 0.5144674801327048,
      "eval_nq_true_num_tokens": 64.0,
      "step": 96875
    },
    {
      "epoch": 18.6,
      "learning_rate": 0.001,
      "loss": 2.6136,
      "step": 96876
    },
    {
      "epoch": 18.6,
      "learning_rate": 0.001,
      "loss": 2.6038,
      "step": 96888
    },
    {
      "epoch": 18.61,
      "learning_rate": 0.001,
      "loss": 2.6057,
      "step": 96900
    },
    {
      "epoch": 18.61,
      "learning_rate": 0.001,
      "loss": 2.6103,
      "step": 96912
    },
    {
      "epoch": 18.61,
      "learning_rate": 0.001,
      "loss": 2.6101,
      "step": 96924
    },
    {
      "epoch": 18.61,
      "learning_rate": 0.001,
      "loss": 2.6121,
      "step": 96936
    },
    {
      "epoch": 18.62,
      "learning_rate": 0.001,
      "loss": 2.6057,
      "step": 96948
    },
    {
      "epoch": 18.62,
      "learning_rate": 0.001,
      "loss": 2.6066,
      "step": 96960
    },
    {
      "epoch": 18.62,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 96972
    },
    {
      "epoch": 18.62,
      "learning_rate": 0.001,
      "loss": 2.613,
      "step": 96984
    },
    {
      "epoch": 18.62,
      "learning_rate": 0.001,
      "loss": 2.6131,
      "step": 96996
    },
    {
      "epoch": 18.63,
      "learning_rate": 0.001,
      "loss": 2.6114,
      "step": 97008
    },
    {
      "epoch": 18.63,
      "learning_rate": 0.001,
      "loss": 2.6172,
      "step": 97020
    },
    {
      "epoch": 18.63,
      "learning_rate": 0.001,
      "loss": 2.616,
      "step": 97032
    },
    {
      "epoch": 18.63,
      "learning_rate": 0.001,
      "loss": 2.6174,
      "step": 97044
    },
    {
      "epoch": 18.64,
      "learning_rate": 0.001,
      "loss": 2.6212,
      "step": 97056
    },
    {
      "epoch": 18.64,
      "learning_rate": 0.001,
      "loss": 2.6131,
      "step": 97068
    },
    {
      "epoch": 18.64,
      "learning_rate": 0.001,
      "loss": 2.6058,
      "step": 97080
    },
    {
      "epoch": 18.64,
      "learning_rate": 0.001,
      "loss": 2.6178,
      "step": 97092
    },
    {
      "epoch": 18.65,
      "learning_rate": 0.001,
      "loss": 2.6171,
      "step": 97104
    },
    {
      "epoch": 18.65,
      "learning_rate": 0.001,
      "loss": 2.6169,
      "step": 97116
    },
    {
      "epoch": 18.65,
      "learning_rate": 0.001,
      "loss": 2.608,
      "step": 97128
    },
    {
      "epoch": 18.65,
      "learning_rate": 0.001,
      "loss": 2.6083,
      "step": 97140
    },
    {
      "epoch": 18.65,
      "learning_rate": 0.001,
      "loss": 2.6132,
      "step": 97152
    },
    {
      "epoch": 18.66,
      "learning_rate": 0.001,
      "loss": 2.6058,
      "step": 97164
    },
    {
      "epoch": 18.66,
      "learning_rate": 0.001,
      "loss": 2.6055,
      "step": 97176
    },
    {
      "epoch": 18.66,
      "learning_rate": 0.001,
      "loss": 2.6118,
      "step": 97188
    },
    {
      "epoch": 18.66,
      "learning_rate": 0.001,
      "loss": 2.6261,
      "step": 97200
    },
    {
      "epoch": 18.67,
      "learning_rate": 0.001,
      "loss": 2.6129,
      "step": 97212
    },
    {
      "epoch": 18.67,
      "learning_rate": 0.001,
      "loss": 2.6032,
      "step": 97224
    },
    {
      "epoch": 18.67,
      "learning_rate": 0.001,
      "loss": 2.6204,
      "step": 97236
    },
    {
      "epoch": 18.67,
      "learning_rate": 0.001,
      "loss": 2.5961,
      "step": 97248
    },
    {
      "epoch": 18.68,
      "learning_rate": 0.001,
      "loss": 2.6098,
      "step": 97260
    },
    {
      "epoch": 18.68,
      "learning_rate": 0.001,
      "loss": 2.6017,
      "step": 97272
    },
    {
      "epoch": 18.68,
      "learning_rate": 0.001,
      "loss": 2.6181,
      "step": 97284
    },
    {
      "epoch": 18.68,
      "learning_rate": 0.001,
      "loss": 2.6091,
      "step": 97296
    },
    {
      "epoch": 18.68,
      "learning_rate": 0.001,
      "loss": 2.6009,
      "step": 97308
    },
    {
      "epoch": 18.69,
      "learning_rate": 0.001,
      "loss": 2.6159,
      "step": 97320
    },
    {
      "epoch": 18.69,
      "learning_rate": 0.001,
      "loss": 2.6171,
      "step": 97332
    },
    {
      "epoch": 18.69,
      "learning_rate": 0.001,
      "loss": 2.6073,
      "step": 97344
    },
    {
      "epoch": 18.69,
      "learning_rate": 0.001,
      "loss": 2.6011,
      "step": 97356
    },
    {
      "epoch": 18.7,
      "learning_rate": 0.001,
      "loss": 2.6134,
      "step": 97368
    },
    {
      "epoch": 18.7,
      "learning_rate": 0.001,
      "loss": 2.6242,
      "step": 97380
    },
    {
      "epoch": 18.7,
      "learning_rate": 0.001,
      "loss": 2.6147,
      "step": 97392
    },
    {
      "epoch": 18.7,
      "learning_rate": 0.001,
      "loss": 2.6124,
      "step": 97404
    },
    {
      "epoch": 18.71,
      "learning_rate": 0.001,
      "loss": 2.6087,
      "step": 97416
    },
    {
      "epoch": 18.71,
      "learning_rate": 0.001,
      "loss": 2.6023,
      "step": 97428
    },
    {
      "epoch": 18.71,
      "learning_rate": 0.001,
      "loss": 2.6037,
      "step": 97440
    },
    {
      "epoch": 18.71,
      "learning_rate": 0.001,
      "loss": 2.6026,
      "step": 97452
    },
    {
      "epoch": 18.71,
      "learning_rate": 0.001,
      "loss": 2.6107,
      "step": 97464
    },
    {
      "epoch": 18.72,
      "learning_rate": 0.001,
      "loss": 2.6013,
      "step": 97476
    },
    {
      "epoch": 18.72,
      "learning_rate": 0.001,
      "loss": 2.6075,
      "step": 97488
    },
    {
      "epoch": 18.72,
      "learning_rate": 0.001,
      "loss": 2.6195,
      "step": 97500
    },
    {
      "epoch": 18.72,
      "eval_ag_news_accuracy": 0.317375,
      "eval_ag_news_bleu_score": 4.656647181707273,
      "eval_ag_news_bleu_score_sem": 0.14977508035398074,
      "eval_ag_news_emb_cos_sim": 0.8073785305023193,
      "eval_ag_news_emb_cos_sim_sem": 0.007234375496537871,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6215946674346924,
      "eval_ag_news_n_ngrams_match_1": 13.906,
      "eval_ag_news_n_ngrams_match_2": 3.052,
      "eval_ag_news_n_ngrams_match_3": 0.826,
      "eval_ag_news_num_pred_words": 46.916,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.39715632484492,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.341368211273631,
      "eval_ag_news_runtime": 11.2426,
      "eval_ag_news_samples_per_second": 44.474,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.3464814273669507,
      "eval_ag_news_token_set_f1_sem": 0.004372458516239519,
      "eval_ag_news_token_set_precision": 0.3322177730898681,
      "eval_ag_news_token_set_recall": 0.38016865945177786,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 97500
    },
    {
      "epoch": 18.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.1115,
      "eval_anthropic_toxic_prompts_bleu_score": 2.878984964061356,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10305757242988817,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6545363664627075,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010594322918370851,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3094899654388428,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.098,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.784,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.606,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.01,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.37116167276371,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20810156779703265,
      "eval_anthropic_toxic_prompts_runtime": 10.6781,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.825,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34719224782010083,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064782320772376785,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43294426130365504,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31978646034245195,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 97500
    },
    {
      "epoch": 18.72,
      "eval_arxiv_accuracy": 0.34496875,
      "eval_arxiv_bleu_score": 4.185301798018157,
      "eval_arxiv_bleu_score_sem": 0.12049250810336702,
      "eval_arxiv_emb_cos_sim": 0.7472431659698486,
      "eval_arxiv_emb_cos_sim_sem": 0.010150255161381468,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4600818157196045,
      "eval_arxiv_n_ngrams_match_1": 14.714,
      "eval_arxiv_n_ngrams_match_2": 2.794,
      "eval_arxiv_n_ngrams_match_3": 0.62,
      "eval_arxiv_num_pred_words": 40.526,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.81957974998821,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34878776997319716,
      "eval_arxiv_runtime": 10.8051,
      "eval_arxiv_samples_per_second": 46.274,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.343695321821954,
      "eval_arxiv_token_set_f1_sem": 0.004273697233242668,
      "eval_arxiv_token_set_precision": 0.2964980753229387,
      "eval_arxiv_token_set_recall": 0.4292485531260051,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 97500
    },
    {
      "epoch": 18.72,
      "eval_python_code_alpaca_accuracy": 0.156375,
      "eval_python_code_alpaca_bleu_score": 4.517288316973969,
      "eval_python_code_alpaca_bleu_score_sem": 0.14832750578827467,
      "eval_python_code_alpaca_emb_cos_sim": 0.7501220107078552,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009231878625051627,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.935863971710205,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.7,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.816,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.922,
      "eval_python_code_alpaca_num_pred_words": 43.316,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.837771408088813,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3275757944672905,
      "eval_python_code_alpaca_runtime": 11.9482,
      "eval_python_code_alpaca_samples_per_second": 41.847,
      "eval_python_code_alpaca_steps_per_second": 0.084,
      "eval_python_code_alpaca_token_set_f1": 0.4627990740990327,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005695619749963124,
      "eval_python_code_alpaca_token_set_precision": 0.5301099673004801,
      "eval_python_code_alpaca_token_set_recall": 0.434069374935861,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 97500
    },
    {
      "epoch": 18.72,
      "eval_wikibio_accuracy": 0.31596875,
      "eval_wikibio_bleu_score": 5.7350808101546376,
      "eval_wikibio_bleu_score_sem": 0.20187394227353678,
      "eval_wikibio_emb_cos_sim": 0.7440035343170166,
      "eval_wikibio_emb_cos_sim_sem": 0.009912491618041355,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.784790515899658,
      "eval_wikibio_n_ngrams_match_1": 10.23,
      "eval_wikibio_n_ngrams_match_2": 3.386,
      "eval_wikibio_n_ngrams_match_3": 1.164,
      "eval_wikibio_num_pred_words": 36.79,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.02644675207413,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35233759034088086,
      "eval_wikibio_runtime": 11.4649,
      "eval_wikibio_samples_per_second": 43.611,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.31787960493167655,
      "eval_wikibio_token_set_f1_sem": 0.005363299164133791,
      "eval_wikibio_token_set_precision": 0.33064591379734604,
      "eval_wikibio_token_set_recall": 0.318451294053591,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 97500
    },
    {
      "epoch": 18.72,
      "eval_nq_accuracy": 0.520375,
      "eval_nq_bleu_score": 11.471308349842047,
      "eval_nq_bleu_score_sem": 0.47913630570444005,
      "eval_nq_emb_cos_sim": 0.8260822296142578,
      "eval_nq_emb_cos_sim_sem": 0.007098766694365054,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2381880283355713,
      "eval_nq_n_ngrams_match_1": 22.806,
      "eval_nq_n_ngrams_match_2": 8.282,
      "eval_nq_n_ngrams_match_3": 3.832,
      "eval_nq_num_pred_words": 49.224,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.376326248292528,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43946255256202194,
      "eval_nq_runtime": 11.4775,
      "eval_nq_samples_per_second": 43.563,
      "eval_nq_steps_per_second": 0.087,
      "eval_nq_token_set_f1": 0.4543488653194585,
      "eval_nq_token_set_f1_sem": 0.004926261914301938,
      "eval_nq_token_set_precision": 0.4124802873188905,
      "eval_nq_token_set_recall": 0.5141252058303717,
      "eval_nq_true_num_tokens": 64.0,
      "step": 97500
    },
    {
      "epoch": 18.72,
      "learning_rate": 0.001,
      "loss": 2.6049,
      "step": 97512
    },
    {
      "epoch": 18.73,
      "learning_rate": 0.001,
      "loss": 2.6167,
      "step": 97524
    },
    {
      "epoch": 18.73,
      "learning_rate": 0.001,
      "loss": 2.6102,
      "step": 97536
    },
    {
      "epoch": 18.73,
      "learning_rate": 0.001,
      "loss": 2.6116,
      "step": 97548
    },
    {
      "epoch": 18.73,
      "learning_rate": 0.001,
      "loss": 2.6251,
      "step": 97560
    },
    {
      "epoch": 18.74,
      "learning_rate": 0.001,
      "loss": 2.6118,
      "step": 97572
    },
    {
      "epoch": 18.74,
      "learning_rate": 0.001,
      "loss": 2.6041,
      "step": 97584
    },
    {
      "epoch": 18.74,
      "learning_rate": 0.001,
      "loss": 2.593,
      "step": 97596
    },
    {
      "epoch": 18.74,
      "learning_rate": 0.001,
      "loss": 2.6046,
      "step": 97608
    },
    {
      "epoch": 18.74,
      "learning_rate": 0.001,
      "loss": 2.6124,
      "step": 97620
    },
    {
      "epoch": 18.75,
      "learning_rate": 0.001,
      "loss": 2.6093,
      "step": 97632
    },
    {
      "epoch": 18.75,
      "learning_rate": 0.001,
      "loss": 2.6122,
      "step": 97644
    },
    {
      "epoch": 18.75,
      "learning_rate": 0.001,
      "loss": 2.6156,
      "step": 97656
    },
    {
      "epoch": 18.75,
      "learning_rate": 0.001,
      "loss": 2.6121,
      "step": 97668
    },
    {
      "epoch": 18.76,
      "learning_rate": 0.001,
      "loss": 2.6202,
      "step": 97680
    },
    {
      "epoch": 18.76,
      "learning_rate": 0.001,
      "loss": 2.6125,
      "step": 97692
    },
    {
      "epoch": 18.76,
      "learning_rate": 0.001,
      "loss": 2.6084,
      "step": 97704
    },
    {
      "epoch": 18.76,
      "learning_rate": 0.001,
      "loss": 2.6156,
      "step": 97716
    },
    {
      "epoch": 18.76,
      "learning_rate": 0.001,
      "loss": 2.6145,
      "step": 97728
    },
    {
      "epoch": 18.77,
      "learning_rate": 0.001,
      "loss": 2.606,
      "step": 97740
    },
    {
      "epoch": 18.77,
      "learning_rate": 0.001,
      "loss": 2.608,
      "step": 97752
    },
    {
      "epoch": 18.77,
      "learning_rate": 0.001,
      "loss": 2.6111,
      "step": 97764
    },
    {
      "epoch": 18.77,
      "learning_rate": 0.001,
      "loss": 2.6094,
      "step": 97776
    },
    {
      "epoch": 18.78,
      "learning_rate": 0.001,
      "loss": 2.6085,
      "step": 97788
    },
    {
      "epoch": 18.78,
      "learning_rate": 0.001,
      "loss": 2.6035,
      "step": 97800
    },
    {
      "epoch": 18.78,
      "learning_rate": 0.001,
      "loss": 2.6077,
      "step": 97812
    },
    {
      "epoch": 18.78,
      "learning_rate": 0.001,
      "loss": 2.6106,
      "step": 97824
    },
    {
      "epoch": 18.79,
      "learning_rate": 0.001,
      "loss": 2.6105,
      "step": 97836
    },
    {
      "epoch": 18.79,
      "learning_rate": 0.001,
      "loss": 2.6003,
      "step": 97848
    },
    {
      "epoch": 18.79,
      "learning_rate": 0.001,
      "loss": 2.6069,
      "step": 97860
    },
    {
      "epoch": 18.79,
      "learning_rate": 0.001,
      "loss": 2.6067,
      "step": 97872
    },
    {
      "epoch": 18.79,
      "learning_rate": 0.001,
      "loss": 2.6129,
      "step": 97884
    },
    {
      "epoch": 18.8,
      "learning_rate": 0.001,
      "loss": 2.6108,
      "step": 97896
    },
    {
      "epoch": 18.8,
      "learning_rate": 0.001,
      "loss": 2.6032,
      "step": 97908
    },
    {
      "epoch": 18.8,
      "learning_rate": 0.001,
      "loss": 2.6162,
      "step": 97920
    },
    {
      "epoch": 18.8,
      "learning_rate": 0.001,
      "loss": 2.6087,
      "step": 97932
    },
    {
      "epoch": 18.81,
      "learning_rate": 0.001,
      "loss": 2.6236,
      "step": 97944
    },
    {
      "epoch": 18.81,
      "learning_rate": 0.001,
      "loss": 2.5985,
      "step": 97956
    },
    {
      "epoch": 18.81,
      "learning_rate": 0.001,
      "loss": 2.6083,
      "step": 97968
    },
    {
      "epoch": 18.81,
      "learning_rate": 0.001,
      "loss": 2.6107,
      "step": 97980
    },
    {
      "epoch": 18.82,
      "learning_rate": 0.001,
      "loss": 2.6052,
      "step": 97992
    },
    {
      "epoch": 18.82,
      "learning_rate": 0.001,
      "loss": 2.6117,
      "step": 98004
    },
    {
      "epoch": 18.82,
      "learning_rate": 0.001,
      "loss": 2.6164,
      "step": 98016
    },
    {
      "epoch": 18.82,
      "learning_rate": 0.001,
      "loss": 2.6024,
      "step": 98028
    },
    {
      "epoch": 18.82,
      "learning_rate": 0.001,
      "loss": 2.6109,
      "step": 98040
    },
    {
      "epoch": 18.83,
      "learning_rate": 0.001,
      "loss": 2.608,
      "step": 98052
    },
    {
      "epoch": 18.83,
      "learning_rate": 0.001,
      "loss": 2.6123,
      "step": 98064
    },
    {
      "epoch": 18.83,
      "learning_rate": 0.001,
      "loss": 2.6167,
      "step": 98076
    },
    {
      "epoch": 18.83,
      "learning_rate": 0.001,
      "loss": 2.6024,
      "step": 98088
    },
    {
      "epoch": 18.84,
      "learning_rate": 0.001,
      "loss": 2.6192,
      "step": 98100
    },
    {
      "epoch": 18.84,
      "learning_rate": 0.001,
      "loss": 2.6136,
      "step": 98112
    },
    {
      "epoch": 18.84,
      "learning_rate": 0.001,
      "loss": 2.6011,
      "step": 98124
    },
    {
      "epoch": 18.84,
      "eval_ag_news_accuracy": 0.3163125,
      "eval_ag_news_bleu_score": 4.893583411326613,
      "eval_ag_news_bleu_score_sem": 0.1602519833001547,
      "eval_ag_news_emb_cos_sim": 0.799976110458374,
      "eval_ag_news_emb_cos_sim_sem": 0.007589054397647205,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.618302345275879,
      "eval_ag_news_n_ngrams_match_1": 13.816,
      "eval_ag_news_n_ngrams_match_2": 3.046,
      "eval_ag_news_n_ngrams_match_3": 0.9,
      "eval_ag_news_num_pred_words": 46.352,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.27423529724385,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3430798967658262,
      "eval_ag_news_runtime": 12.3353,
      "eval_ag_news_samples_per_second": 40.534,
      "eval_ag_news_steps_per_second": 0.081,
      "eval_ag_news_token_set_f1": 0.34465290333492693,
      "eval_ag_news_token_set_f1_sem": 0.004500714902887902,
      "eval_ag_news_token_set_precision": 0.32818629919558523,
      "eval_ag_news_token_set_recall": 0.37674486971843524,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 98125
    },
    {
      "epoch": 18.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.113,
      "eval_anthropic_toxic_prompts_bleu_score": 2.978666812159319,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11180006814330817,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6559618711471558,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01104389940016951,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2950711250305176,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.898,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.804,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.402,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.979332907536705,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20266106536294742,
      "eval_anthropic_toxic_prompts_runtime": 10.5559,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.367,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34200163667029326,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006734019669357172,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.41423840448185606,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3220011582193597,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 98125
    },
    {
      "epoch": 18.84,
      "eval_arxiv_accuracy": 0.34328125,
      "eval_arxiv_bleu_score": 4.196377540087479,
      "eval_arxiv_bleu_score_sem": 0.1167786961682254,
      "eval_arxiv_emb_cos_sim": 0.7467535734176636,
      "eval_arxiv_emb_cos_sim_sem": 0.00884328929159274,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.457857847213745,
      "eval_arxiv_n_ngrams_match_1": 14.758,
      "eval_arxiv_n_ngrams_match_2": 2.84,
      "eval_arxiv_n_ngrams_match_3": 0.628,
      "eval_arxiv_num_pred_words": 39.878,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.748892638844115,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3547328129271238,
      "eval_arxiv_runtime": 10.8097,
      "eval_arxiv_samples_per_second": 46.255,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.3460723869447406,
      "eval_arxiv_token_set_f1_sem": 0.004204699816453167,
      "eval_arxiv_token_set_precision": 0.29641136100843846,
      "eval_arxiv_token_set_recall": 0.4356137055554718,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 98125
    },
    {
      "epoch": 18.84,
      "eval_python_code_alpaca_accuracy": 0.159,
      "eval_python_code_alpaca_bleu_score": 4.515639884363277,
      "eval_python_code_alpaca_bleu_score_sem": 0.14828602698629056,
      "eval_python_code_alpaca_emb_cos_sim": 0.7527774572372437,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008795256372269496,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9217469692230225,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.66,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.788,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.89,
      "eval_python_code_alpaca_num_pred_words": 42.986,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.57370682786837,
      "eval_python_code_alpaca_pred_num_tokens": 62.8984375,
      "eval_python_code_alpaca_rouge_score": 0.32571275162015967,
      "eval_python_code_alpaca_runtime": 10.4971,
      "eval_python_code_alpaca_samples_per_second": 47.632,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.4754397297562614,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0056081020812104206,
      "eval_python_code_alpaca_token_set_precision": 0.5303777641992341,
      "eval_python_code_alpaca_token_set_recall": 0.45439303003643294,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 98125
    },
    {
      "epoch": 18.84,
      "eval_wikibio_accuracy": 0.318375,
      "eval_wikibio_bleu_score": 5.489666883691228,
      "eval_wikibio_bleu_score_sem": 0.1946471806560131,
      "eval_wikibio_emb_cos_sim": 0.7177799940109253,
      "eval_wikibio_emb_cos_sim_sem": 0.011559783411387933,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.772843837738037,
      "eval_wikibio_n_ngrams_match_1": 9.522,
      "eval_wikibio_n_ngrams_match_2": 3.154,
      "eval_wikibio_n_ngrams_match_3": 1.114,
      "eval_wikibio_num_pred_words": 35.61,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.5036062839656,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3313459483886061,
      "eval_wikibio_runtime": 10.6348,
      "eval_wikibio_samples_per_second": 47.015,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.3013661245541971,
      "eval_wikibio_token_set_f1_sem": 0.005964780329996843,
      "eval_wikibio_token_set_precision": 0.30579228178006457,
      "eval_wikibio_token_set_recall": 0.3159434351039579,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 98125
    },
    {
      "epoch": 18.84,
      "eval_nq_accuracy": 0.521375,
      "eval_nq_bleu_score": 11.239000215810501,
      "eval_nq_bleu_score_sem": 0.4673154678829763,
      "eval_nq_emb_cos_sim": 0.8265373706817627,
      "eval_nq_emb_cos_sim_sem": 0.007860237757678424,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2307910919189453,
      "eval_nq_n_ngrams_match_1": 22.588,
      "eval_nq_n_ngrams_match_2": 8.166,
      "eval_nq_n_ngrams_match_3": 3.694,
      "eval_nq_num_pred_words": 48.918,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.30722603920483,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43660076894666,
      "eval_nq_runtime": 11.0284,
      "eval_nq_samples_per_second": 45.337,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.45180013152361204,
      "eval_nq_token_set_f1_sem": 0.004944779576918083,
      "eval_nq_token_set_precision": 0.408124341946281,
      "eval_nq_token_set_recall": 0.5149068385648281,
      "eval_nq_true_num_tokens": 64.0,
      "step": 98125
    },
    {
      "epoch": 18.84,
      "learning_rate": 0.001,
      "loss": 2.6088,
      "step": 98136
    },
    {
      "epoch": 18.85,
      "learning_rate": 0.001,
      "loss": 2.6091,
      "step": 98148
    },
    {
      "epoch": 18.85,
      "learning_rate": 0.001,
      "loss": 2.6115,
      "step": 98160
    },
    {
      "epoch": 18.85,
      "learning_rate": 0.001,
      "loss": 2.6132,
      "step": 98172
    },
    {
      "epoch": 18.85,
      "learning_rate": 0.001,
      "loss": 2.6056,
      "step": 98184
    },
    {
      "epoch": 18.85,
      "learning_rate": 0.001,
      "loss": 2.6079,
      "step": 98196
    },
    {
      "epoch": 18.86,
      "learning_rate": 0.001,
      "loss": 2.6147,
      "step": 98208
    },
    {
      "epoch": 18.86,
      "learning_rate": 0.001,
      "loss": 2.6173,
      "step": 98220
    },
    {
      "epoch": 18.86,
      "learning_rate": 0.001,
      "loss": 2.6103,
      "step": 98232
    },
    {
      "epoch": 18.86,
      "learning_rate": 0.001,
      "loss": 2.6103,
      "step": 98244
    },
    {
      "epoch": 18.87,
      "learning_rate": 0.001,
      "loss": 2.6104,
      "step": 98256
    },
    {
      "epoch": 18.87,
      "learning_rate": 0.001,
      "loss": 2.6145,
      "step": 98268
    },
    {
      "epoch": 18.87,
      "learning_rate": 0.001,
      "loss": 2.6055,
      "step": 98280
    },
    {
      "epoch": 18.87,
      "learning_rate": 0.001,
      "loss": 2.6117,
      "step": 98292
    },
    {
      "epoch": 18.88,
      "learning_rate": 0.001,
      "loss": 2.6127,
      "step": 98304
    },
    {
      "epoch": 18.88,
      "learning_rate": 0.001,
      "loss": 2.597,
      "step": 98316
    },
    {
      "epoch": 18.88,
      "learning_rate": 0.001,
      "loss": 2.6118,
      "step": 98328
    },
    {
      "epoch": 18.88,
      "learning_rate": 0.001,
      "loss": 2.6107,
      "step": 98340
    },
    {
      "epoch": 18.88,
      "learning_rate": 0.001,
      "loss": 2.6129,
      "step": 98352
    },
    {
      "epoch": 18.89,
      "learning_rate": 0.001,
      "loss": 2.6034,
      "step": 98364
    },
    {
      "epoch": 18.89,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 98376
    },
    {
      "epoch": 18.89,
      "learning_rate": 0.001,
      "loss": 2.606,
      "step": 98388
    },
    {
      "epoch": 18.89,
      "learning_rate": 0.001,
      "loss": 2.5963,
      "step": 98400
    },
    {
      "epoch": 18.9,
      "learning_rate": 0.001,
      "loss": 2.618,
      "step": 98412
    },
    {
      "epoch": 18.9,
      "learning_rate": 0.001,
      "loss": 2.6036,
      "step": 98424
    },
    {
      "epoch": 18.9,
      "learning_rate": 0.001,
      "loss": 2.6207,
      "step": 98436
    },
    {
      "epoch": 18.9,
      "learning_rate": 0.001,
      "loss": 2.6092,
      "step": 98448
    },
    {
      "epoch": 18.91,
      "learning_rate": 0.001,
      "loss": 2.6078,
      "step": 98460
    },
    {
      "epoch": 18.91,
      "learning_rate": 0.001,
      "loss": 2.6122,
      "step": 98472
    },
    {
      "epoch": 18.91,
      "learning_rate": 0.001,
      "loss": 2.6079,
      "step": 98484
    },
    {
      "epoch": 18.91,
      "learning_rate": 0.001,
      "loss": 2.6117,
      "step": 98496
    },
    {
      "epoch": 18.91,
      "learning_rate": 0.001,
      "loss": 2.6041,
      "step": 98508
    },
    {
      "epoch": 18.92,
      "learning_rate": 0.001,
      "loss": 2.6091,
      "step": 98520
    },
    {
      "epoch": 18.92,
      "learning_rate": 0.001,
      "loss": 2.6116,
      "step": 98532
    },
    {
      "epoch": 18.92,
      "learning_rate": 0.001,
      "loss": 2.6173,
      "step": 98544
    },
    {
      "epoch": 18.92,
      "learning_rate": 0.001,
      "loss": 2.6082,
      "step": 98556
    },
    {
      "epoch": 18.93,
      "learning_rate": 0.001,
      "loss": 2.6039,
      "step": 98568
    },
    {
      "epoch": 18.93,
      "learning_rate": 0.001,
      "loss": 2.614,
      "step": 98580
    },
    {
      "epoch": 18.93,
      "learning_rate": 0.001,
      "loss": 2.6188,
      "step": 98592
    },
    {
      "epoch": 18.93,
      "learning_rate": 0.001,
      "loss": 2.6268,
      "step": 98604
    },
    {
      "epoch": 18.94,
      "learning_rate": 0.001,
      "loss": 2.6102,
      "step": 98616
    },
    {
      "epoch": 18.94,
      "learning_rate": 0.001,
      "loss": 2.6184,
      "step": 98628
    },
    {
      "epoch": 18.94,
      "learning_rate": 0.001,
      "loss": 2.6138,
      "step": 98640
    },
    {
      "epoch": 18.94,
      "learning_rate": 0.001,
      "loss": 2.6227,
      "step": 98652
    },
    {
      "epoch": 18.94,
      "learning_rate": 0.001,
      "loss": 2.6038,
      "step": 98664
    },
    {
      "epoch": 18.95,
      "learning_rate": 0.001,
      "loss": 2.6143,
      "step": 98676
    },
    {
      "epoch": 18.95,
      "learning_rate": 0.001,
      "loss": 2.6167,
      "step": 98688
    },
    {
      "epoch": 18.95,
      "learning_rate": 0.001,
      "loss": 2.618,
      "step": 98700
    },
    {
      "epoch": 18.95,
      "learning_rate": 0.001,
      "loss": 2.6055,
      "step": 98712
    },
    {
      "epoch": 18.96,
      "learning_rate": 0.001,
      "loss": 2.6141,
      "step": 98724
    },
    {
      "epoch": 18.96,
      "learning_rate": 0.001,
      "loss": 2.6051,
      "step": 98736
    },
    {
      "epoch": 18.96,
      "learning_rate": 0.001,
      "loss": 2.6072,
      "step": 98748
    },
    {
      "epoch": 18.96,
      "eval_ag_news_accuracy": 0.317,
      "eval_ag_news_bleu_score": 4.639039641028421,
      "eval_ag_news_bleu_score_sem": 0.14148395008546424,
      "eval_ag_news_emb_cos_sim": 0.7964121103286743,
      "eval_ag_news_emb_cos_sim_sem": 0.007701351382781942,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6184489727020264,
      "eval_ag_news_n_ngrams_match_1": 13.676,
      "eval_ag_news_n_ngrams_match_2": 3.044,
      "eval_ag_news_n_ngrams_match_3": 0.858,
      "eval_ag_news_num_pred_words": 46.496,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.27970112313729,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33992006339498293,
      "eval_ag_news_runtime": 10.7139,
      "eval_ag_news_samples_per_second": 46.668,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.3419462202753087,
      "eval_ag_news_token_set_f1_sem": 0.004548638895730784,
      "eval_ag_news_token_set_precision": 0.3240014773087771,
      "eval_ag_news_token_set_recall": 0.3785108601336519,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 98750
    },
    {
      "epoch": 18.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.11096875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0972500817187685,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1276785346267627,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6599158048629761,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009879960836436025,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2998158931732178,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.92,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.852,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.044,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.107647758208152,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20362812406492492,
      "eval_anthropic_toxic_prompts_runtime": 11.5543,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.274,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3520620159799845,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065037939618324625,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4215860478372545,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33424358581275976,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 98750
    },
    {
      "epoch": 18.96,
      "eval_arxiv_accuracy": 0.3435,
      "eval_arxiv_bleu_score": 4.225538367916897,
      "eval_arxiv_bleu_score_sem": 0.11828177777566976,
      "eval_arxiv_emb_cos_sim": 0.7533184289932251,
      "eval_arxiv_emb_cos_sim_sem": 0.007184629044404857,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.44754695892334,
      "eval_arxiv_n_ngrams_match_1": 14.748,
      "eval_arxiv_n_ngrams_match_2": 2.838,
      "eval_arxiv_n_ngrams_match_3": 0.634,
      "eval_arxiv_num_pred_words": 40.39,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.423215250431277,
      "eval_arxiv_pred_num_tokens": 62.96875,
      "eval_arxiv_rouge_score": 0.35245989038110115,
      "eval_arxiv_runtime": 10.721,
      "eval_arxiv_samples_per_second": 46.638,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.34649696710682854,
      "eval_arxiv_token_set_f1_sem": 0.004167536004941654,
      "eval_arxiv_token_set_precision": 0.29497048474841564,
      "eval_arxiv_token_set_recall": 0.43866842880839385,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 98750
    },
    {
      "epoch": 18.96,
      "eval_python_code_alpaca_accuracy": 0.15796875,
      "eval_python_code_alpaca_bleu_score": 4.283376921883962,
      "eval_python_code_alpaca_bleu_score_sem": 0.14634451987151456,
      "eval_python_code_alpaca_emb_cos_sim": 0.7345660924911499,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010276673974945387,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.933269739151001,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.234,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.592,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.83,
      "eval_python_code_alpaca_num_pred_words": 43.082,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.788965182880695,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3084574351601737,
      "eval_python_code_alpaca_runtime": 10.7032,
      "eval_python_code_alpaca_samples_per_second": 46.715,
      "eval_python_code_alpaca_steps_per_second": 0.093,
      "eval_python_code_alpaca_token_set_f1": 0.4612948208534952,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006030784850113751,
      "eval_python_code_alpaca_token_set_precision": 0.5039474581549664,
      "eval_python_code_alpaca_token_set_recall": 0.4529264734351371,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 98750
    },
    {
      "epoch": 18.96,
      "eval_wikibio_accuracy": 0.31671875,
      "eval_wikibio_bleu_score": 5.675754276078412,
      "eval_wikibio_bleu_score_sem": 0.20724148934950135,
      "eval_wikibio_emb_cos_sim": 0.7287104725837708,
      "eval_wikibio_emb_cos_sim_sem": 0.01082472656828975,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8048083782196045,
      "eval_wikibio_n_ngrams_match_1": 9.8,
      "eval_wikibio_n_ngrams_match_2": 3.222,
      "eval_wikibio_n_ngrams_match_3": 1.174,
      "eval_wikibio_num_pred_words": 36.288,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.916642281352416,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3458388164725216,
      "eval_wikibio_runtime": 10.6871,
      "eval_wikibio_samples_per_second": 46.786,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.30751947622658277,
      "eval_wikibio_token_set_f1_sem": 0.0056847269734363725,
      "eval_wikibio_token_set_precision": 0.3154249383294301,
      "eval_wikibio_token_set_recall": 0.3165907024421341,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 98750
    },
    {
      "epoch": 18.96,
      "eval_nq_accuracy": 0.5194375,
      "eval_nq_bleu_score": 11.596588212619437,
      "eval_nq_bleu_score_sem": 0.47057519459940067,
      "eval_nq_emb_cos_sim": 0.8211898803710938,
      "eval_nq_emb_cos_sim_sem": 0.007924290430702856,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.231948137283325,
      "eval_nq_n_ngrams_match_1": 22.838,
      "eval_nq_n_ngrams_match_2": 8.412,
      "eval_nq_n_ngrams_match_3": 3.864,
      "eval_nq_num_pred_words": 49.208,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.318001154395166,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43907608242608354,
      "eval_nq_runtime": 11.0048,
      "eval_nq_samples_per_second": 45.435,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4584433124858162,
      "eval_nq_token_set_f1_sem": 0.004895067730193653,
      "eval_nq_token_set_precision": 0.4145020439959143,
      "eval_nq_token_set_recall": 0.5216332944121967,
      "eval_nq_true_num_tokens": 64.0,
      "step": 98750
    },
    {
      "epoch": 18.96,
      "learning_rate": 0.001,
      "loss": 2.6086,
      "step": 98760
    },
    {
      "epoch": 18.97,
      "learning_rate": 0.001,
      "loss": 2.6068,
      "step": 98772
    },
    {
      "epoch": 18.97,
      "learning_rate": 0.001,
      "loss": 2.6221,
      "step": 98784
    },
    {
      "epoch": 18.97,
      "learning_rate": 0.001,
      "loss": 2.6123,
      "step": 98796
    },
    {
      "epoch": 18.97,
      "learning_rate": 0.001,
      "loss": 2.628,
      "step": 98808
    },
    {
      "epoch": 18.97,
      "learning_rate": 0.001,
      "loss": 2.6204,
      "step": 98820
    },
    {
      "epoch": 18.98,
      "learning_rate": 0.001,
      "loss": 2.6194,
      "step": 98832
    },
    {
      "epoch": 18.98,
      "learning_rate": 0.001,
      "loss": 2.6176,
      "step": 98844
    },
    {
      "epoch": 18.98,
      "learning_rate": 0.001,
      "loss": 2.6202,
      "step": 98856
    },
    {
      "epoch": 18.98,
      "learning_rate": 0.001,
      "loss": 2.6173,
      "step": 98868
    },
    {
      "epoch": 18.99,
      "learning_rate": 0.001,
      "loss": 2.6294,
      "step": 98880
    },
    {
      "epoch": 18.99,
      "learning_rate": 0.001,
      "loss": 2.6163,
      "step": 98892
    },
    {
      "epoch": 18.99,
      "learning_rate": 0.001,
      "loss": 2.6058,
      "step": 98904
    },
    {
      "epoch": 18.99,
      "learning_rate": 0.001,
      "loss": 2.6067,
      "step": 98916
    },
    {
      "epoch": 19.0,
      "learning_rate": 0.001,
      "loss": 2.617,
      "step": 98928
    },
    {
      "epoch": 19.0,
      "learning_rate": 0.001,
      "loss": 2.6157,
      "step": 98940
    },
    {
      "epoch": 19.0,
      "learning_rate": 0.001,
      "loss": 2.6122,
      "step": 98952
    },
    {
      "epoch": 19.0,
      "learning_rate": 0.001,
      "loss": 2.5956,
      "step": 98964
    },
    {
      "epoch": 19.0,
      "learning_rate": 0.001,
      "loss": 2.6057,
      "step": 98976
    },
    {
      "epoch": 19.01,
      "learning_rate": 0.001,
      "loss": 2.5963,
      "step": 98988
    },
    {
      "epoch": 19.01,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 99000
    },
    {
      "epoch": 19.01,
      "learning_rate": 0.001,
      "loss": 2.5905,
      "step": 99012
    },
    {
      "epoch": 19.01,
      "learning_rate": 0.001,
      "loss": 2.5911,
      "step": 99024
    },
    {
      "epoch": 19.02,
      "learning_rate": 0.001,
      "loss": 2.5849,
      "step": 99036
    },
    {
      "epoch": 19.02,
      "learning_rate": 0.001,
      "loss": 2.5854,
      "step": 99048
    },
    {
      "epoch": 19.02,
      "learning_rate": 0.001,
      "loss": 2.6008,
      "step": 99060
    },
    {
      "epoch": 19.02,
      "learning_rate": 0.001,
      "loss": 2.6004,
      "step": 99072
    },
    {
      "epoch": 19.03,
      "learning_rate": 0.001,
      "loss": 2.5953,
      "step": 99084
    },
    {
      "epoch": 19.03,
      "learning_rate": 0.001,
      "loss": 2.6051,
      "step": 99096
    },
    {
      "epoch": 19.03,
      "learning_rate": 0.001,
      "loss": 2.5995,
      "step": 99108
    },
    {
      "epoch": 19.03,
      "learning_rate": 0.001,
      "loss": 2.5966,
      "step": 99120
    },
    {
      "epoch": 19.03,
      "learning_rate": 0.001,
      "loss": 2.5986,
      "step": 99132
    },
    {
      "epoch": 19.04,
      "learning_rate": 0.001,
      "loss": 2.603,
      "step": 99144
    },
    {
      "epoch": 19.04,
      "learning_rate": 0.001,
      "loss": 2.5982,
      "step": 99156
    },
    {
      "epoch": 19.04,
      "learning_rate": 0.001,
      "loss": 2.6001,
      "step": 99168
    },
    {
      "epoch": 19.04,
      "learning_rate": 0.001,
      "loss": 2.5978,
      "step": 99180
    },
    {
      "epoch": 19.05,
      "learning_rate": 0.001,
      "loss": 2.5911,
      "step": 99192
    },
    {
      "epoch": 19.05,
      "learning_rate": 0.001,
      "loss": 2.5974,
      "step": 99204
    },
    {
      "epoch": 19.05,
      "learning_rate": 0.001,
      "loss": 2.5962,
      "step": 99216
    },
    {
      "epoch": 19.05,
      "learning_rate": 0.001,
      "loss": 2.5901,
      "step": 99228
    },
    {
      "epoch": 19.06,
      "learning_rate": 0.001,
      "loss": 2.5936,
      "step": 99240
    },
    {
      "epoch": 19.06,
      "learning_rate": 0.001,
      "loss": 2.5901,
      "step": 99252
    },
    {
      "epoch": 19.06,
      "learning_rate": 0.001,
      "loss": 2.5891,
      "step": 99264
    },
    {
      "epoch": 19.06,
      "learning_rate": 0.001,
      "loss": 2.5897,
      "step": 99276
    },
    {
      "epoch": 19.06,
      "learning_rate": 0.001,
      "loss": 2.5926,
      "step": 99288
    },
    {
      "epoch": 19.07,
      "learning_rate": 0.001,
      "loss": 2.6003,
      "step": 99300
    },
    {
      "epoch": 19.07,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 99312
    },
    {
      "epoch": 19.07,
      "learning_rate": 0.001,
      "loss": 2.5963,
      "step": 99324
    },
    {
      "epoch": 19.07,
      "learning_rate": 0.001,
      "loss": 2.6078,
      "step": 99336
    },
    {
      "epoch": 19.08,
      "learning_rate": 0.001,
      "loss": 2.5982,
      "step": 99348
    },
    {
      "epoch": 19.08,
      "learning_rate": 0.001,
      "loss": 2.5907,
      "step": 99360
    },
    {
      "epoch": 19.08,
      "learning_rate": 0.001,
      "loss": 2.6052,
      "step": 99372
    },
    {
      "epoch": 19.08,
      "eval_ag_news_accuracy": 0.317625,
      "eval_ag_news_bleu_score": 4.712855237771482,
      "eval_ag_news_bleu_score_sem": 0.15325488685056488,
      "eval_ag_news_emb_cos_sim": 0.7918837070465088,
      "eval_ag_news_emb_cos_sim_sem": 0.008232398381296666,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6211390495300293,
      "eval_ag_news_n_ngrams_match_1": 13.594,
      "eval_ag_news_n_ngrams_match_2": 2.928,
      "eval_ag_news_n_ngrams_match_3": 0.854,
      "eval_ag_news_num_pred_words": 46.238,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.38012139184477,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.33735291546956647,
      "eval_ag_news_runtime": 11.0364,
      "eval_ag_news_samples_per_second": 45.304,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.34168681669394485,
      "eval_ag_news_token_set_f1_sem": 0.004451713103573308,
      "eval_ag_news_token_set_precision": 0.3239396791391766,
      "eval_ag_news_token_set_recall": 0.3780113150446624,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 99375
    },
    {
      "epoch": 19.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11165625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8753528057104587,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11200038990985718,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.658082127571106,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009482624714822111,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2821171283721924,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.878,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.706,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.616,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.5,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.632096623720326,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20352867002517688,
      "eval_anthropic_toxic_prompts_runtime": 10.6035,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.154,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34948299988562537,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00660868124375518,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4169705698003172,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32867003411548057,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 99375
    },
    {
      "epoch": 19.08,
      "eval_arxiv_accuracy": 0.3436875,
      "eval_arxiv_bleu_score": 4.216196809279154,
      "eval_arxiv_bleu_score_sem": 0.11575719857956805,
      "eval_arxiv_emb_cos_sim": 0.7440145015716553,
      "eval_arxiv_emb_cos_sim_sem": 0.009824001675414702,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4522480964660645,
      "eval_arxiv_n_ngrams_match_1": 14.718,
      "eval_arxiv_n_ngrams_match_2": 2.814,
      "eval_arxiv_n_ngrams_match_3": 0.63,
      "eval_arxiv_num_pred_words": 40.53,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.57128788957074,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3484484329841866,
      "eval_arxiv_runtime": 10.9272,
      "eval_arxiv_samples_per_second": 45.758,
      "eval_arxiv_steps_per_second": 0.092,
      "eval_arxiv_token_set_f1": 0.34736528048826953,
      "eval_arxiv_token_set_f1_sem": 0.00430616281862336,
      "eval_arxiv_token_set_precision": 0.29433303516015946,
      "eval_arxiv_token_set_recall": 0.44004642289141843,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 99375
    },
    {
      "epoch": 19.08,
      "eval_python_code_alpaca_accuracy": 0.15725,
      "eval_python_code_alpaca_bleu_score": 4.166026646744993,
      "eval_python_code_alpaca_bleu_score_sem": 0.12735284290301357,
      "eval_python_code_alpaca_emb_cos_sim": 0.7433856725692749,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009554076135393729,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9262959957122803,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.504,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.636,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.78,
      "eval_python_code_alpaca_num_pred_words": 43.364,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.65839158279246,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3208857702007807,
      "eval_python_code_alpaca_runtime": 10.6558,
      "eval_python_code_alpaca_samples_per_second": 46.923,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.47143257862849414,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005703811651620781,
      "eval_python_code_alpaca_token_set_precision": 0.5195892191390774,
      "eval_python_code_alpaca_token_set_recall": 0.4500885233241683,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 99375
    },
    {
      "epoch": 19.08,
      "eval_wikibio_accuracy": 0.318125,
      "eval_wikibio_bleu_score": 5.860758974171116,
      "eval_wikibio_bleu_score_sem": 0.19817267944771857,
      "eval_wikibio_emb_cos_sim": 0.7324365377426147,
      "eval_wikibio_emb_cos_sim_sem": 0.009563551273774585,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8286211490631104,
      "eval_wikibio_n_ngrams_match_1": 10.298,
      "eval_wikibio_n_ngrams_match_2": 3.386,
      "eval_wikibio_n_ngrams_match_3": 1.222,
      "eval_wikibio_num_pred_words": 37.216,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.99906862783368,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3545744978027274,
      "eval_wikibio_runtime": 15.6524,
      "eval_wikibio_samples_per_second": 31.944,
      "eval_wikibio_steps_per_second": 0.064,
      "eval_wikibio_token_set_f1": 0.3235636798898564,
      "eval_wikibio_token_set_f1_sem": 0.005042012481950076,
      "eval_wikibio_token_set_precision": 0.3329991077921081,
      "eval_wikibio_token_set_recall": 0.3292837209337521,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 99375
    },
    {
      "epoch": 19.08,
      "eval_nq_accuracy": 0.5224375,
      "eval_nq_bleu_score": 11.598419595691514,
      "eval_nq_bleu_score_sem": 0.47969006378495227,
      "eval_nq_emb_cos_sim": 0.8233333826065063,
      "eval_nq_emb_cos_sim_sem": 0.007962877846148103,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2288315296173096,
      "eval_nq_n_ngrams_match_1": 22.732,
      "eval_nq_n_ngrams_match_2": 8.392,
      "eval_nq_n_ngrams_match_3": 3.9,
      "eval_nq_num_pred_words": 49.068,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.289005807595395,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4385976121395514,
      "eval_nq_runtime": 11.1057,
      "eval_nq_samples_per_second": 45.022,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.45589403224100833,
      "eval_nq_token_set_f1_sem": 0.005147651387653305,
      "eval_nq_token_set_precision": 0.41086280519093743,
      "eval_nq_token_set_recall": 0.5211958959232188,
      "eval_nq_true_num_tokens": 64.0,
      "step": 99375
    },
    {
      "epoch": 19.08,
      "learning_rate": 0.001,
      "loss": 2.5921,
      "step": 99384
    },
    {
      "epoch": 19.09,
      "learning_rate": 0.001,
      "loss": 2.5944,
      "step": 99396
    },
    {
      "epoch": 19.09,
      "learning_rate": 0.001,
      "loss": 2.5987,
      "step": 99408
    },
    {
      "epoch": 19.09,
      "learning_rate": 0.001,
      "loss": 2.5999,
      "step": 99420
    },
    {
      "epoch": 19.09,
      "learning_rate": 0.001,
      "loss": 2.5997,
      "step": 99432
    },
    {
      "epoch": 19.09,
      "learning_rate": 0.001,
      "loss": 2.5934,
      "step": 99444
    },
    {
      "epoch": 19.1,
      "learning_rate": 0.001,
      "loss": 2.5931,
      "step": 99456
    },
    {
      "epoch": 19.1,
      "learning_rate": 0.001,
      "loss": 2.5979,
      "step": 99468
    },
    {
      "epoch": 19.1,
      "learning_rate": 0.001,
      "loss": 2.5973,
      "step": 99480
    },
    {
      "epoch": 19.1,
      "learning_rate": 0.001,
      "loss": 2.6049,
      "step": 99492
    },
    {
      "epoch": 19.11,
      "learning_rate": 0.001,
      "loss": 2.6011,
      "step": 99504
    },
    {
      "epoch": 19.11,
      "learning_rate": 0.001,
      "loss": 2.5911,
      "step": 99516
    },
    {
      "epoch": 19.11,
      "learning_rate": 0.001,
      "loss": 2.5884,
      "step": 99528
    },
    {
      "epoch": 19.11,
      "learning_rate": 0.001,
      "loss": 2.5947,
      "step": 99540
    },
    {
      "epoch": 19.12,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 99552
    },
    {
      "epoch": 19.12,
      "learning_rate": 0.001,
      "loss": 2.5863,
      "step": 99564
    },
    {
      "epoch": 19.12,
      "learning_rate": 0.001,
      "loss": 2.5904,
      "step": 99576
    },
    {
      "epoch": 19.12,
      "learning_rate": 0.001,
      "loss": 2.5975,
      "step": 99588
    },
    {
      "epoch": 19.12,
      "learning_rate": 0.001,
      "loss": 2.5994,
      "step": 99600
    },
    {
      "epoch": 19.13,
      "learning_rate": 0.001,
      "loss": 2.6068,
      "step": 99612
    },
    {
      "epoch": 19.13,
      "learning_rate": 0.001,
      "loss": 2.5928,
      "step": 99624
    },
    {
      "epoch": 19.13,
      "learning_rate": 0.001,
      "loss": 2.5895,
      "step": 99636
    },
    {
      "epoch": 19.13,
      "learning_rate": 0.001,
      "loss": 2.5998,
      "step": 99648
    },
    {
      "epoch": 19.14,
      "learning_rate": 0.001,
      "loss": 2.5972,
      "step": 99660
    },
    {
      "epoch": 19.14,
      "learning_rate": 0.001,
      "loss": 2.601,
      "step": 99672
    },
    {
      "epoch": 19.14,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 99684
    },
    {
      "epoch": 19.14,
      "learning_rate": 0.001,
      "loss": 2.5965,
      "step": 99696
    },
    {
      "epoch": 19.15,
      "learning_rate": 0.001,
      "loss": 2.6021,
      "step": 99708
    },
    {
      "epoch": 19.15,
      "learning_rate": 0.001,
      "loss": 2.5908,
      "step": 99720
    },
    {
      "epoch": 19.15,
      "learning_rate": 0.001,
      "loss": 2.5906,
      "step": 99732
    },
    {
      "epoch": 19.15,
      "learning_rate": 0.001,
      "loss": 2.6066,
      "step": 99744
    },
    {
      "epoch": 19.15,
      "learning_rate": 0.001,
      "loss": 2.6026,
      "step": 99756
    },
    {
      "epoch": 19.16,
      "learning_rate": 0.001,
      "loss": 2.5869,
      "step": 99768
    },
    {
      "epoch": 19.16,
      "learning_rate": 0.001,
      "loss": 2.5925,
      "step": 99780
    },
    {
      "epoch": 19.16,
      "learning_rate": 0.001,
      "loss": 2.5909,
      "step": 99792
    },
    {
      "epoch": 19.16,
      "learning_rate": 0.001,
      "loss": 2.5873,
      "step": 99804
    },
    {
      "epoch": 19.17,
      "learning_rate": 0.001,
      "loss": 2.5933,
      "step": 99816
    },
    {
      "epoch": 19.17,
      "learning_rate": 0.001,
      "loss": 2.5898,
      "step": 99828
    },
    {
      "epoch": 19.17,
      "learning_rate": 0.001,
      "loss": 2.6027,
      "step": 99840
    },
    {
      "epoch": 19.17,
      "learning_rate": 0.001,
      "loss": 2.5929,
      "step": 99852
    },
    {
      "epoch": 19.18,
      "learning_rate": 0.001,
      "loss": 2.5975,
      "step": 99864
    },
    {
      "epoch": 19.18,
      "learning_rate": 0.001,
      "loss": 2.6031,
      "step": 99876
    },
    {
      "epoch": 19.18,
      "learning_rate": 0.001,
      "loss": 2.6079,
      "step": 99888
    },
    {
      "epoch": 19.18,
      "learning_rate": 0.001,
      "loss": 2.5843,
      "step": 99900
    },
    {
      "epoch": 19.18,
      "learning_rate": 0.001,
      "loss": 2.5978,
      "step": 99912
    },
    {
      "epoch": 19.19,
      "learning_rate": 0.001,
      "loss": 2.5982,
      "step": 99924
    },
    {
      "epoch": 19.19,
      "learning_rate": 0.001,
      "loss": 2.5919,
      "step": 99936
    },
    {
      "epoch": 19.19,
      "learning_rate": 0.001,
      "loss": 2.5967,
      "step": 99948
    },
    {
      "epoch": 19.19,
      "learning_rate": 0.001,
      "loss": 2.5942,
      "step": 99960
    },
    {
      "epoch": 19.2,
      "learning_rate": 0.001,
      "loss": 2.5979,
      "step": 99972
    },
    {
      "epoch": 19.2,
      "learning_rate": 0.001,
      "loss": 2.5973,
      "step": 99984
    },
    {
      "epoch": 19.2,
      "learning_rate": 0.001,
      "loss": 2.608,
      "step": 99996
    },
    {
      "epoch": 19.2,
      "eval_ag_news_accuracy": 0.31725,
      "eval_ag_news_bleu_score": 4.669043197200898,
      "eval_ag_news_bleu_score_sem": 0.1508911951991967,
      "eval_ag_news_emb_cos_sim": 0.8073618412017822,
      "eval_ag_news_emb_cos_sim_sem": 0.0064244289719918935,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.624054193496704,
      "eval_ag_news_n_ngrams_match_1": 13.868,
      "eval_ag_news_n_ngrams_match_2": 3.068,
      "eval_ag_news_n_ngrams_match_3": 0.85,
      "eval_ag_news_num_pred_words": 46.62,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.48924881098155,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34281124891200276,
      "eval_ag_news_runtime": 11.0121,
      "eval_ag_news_samples_per_second": 45.405,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.34887217658055775,
      "eval_ag_news_token_set_f1_sem": 0.004424521074341578,
      "eval_ag_news_token_set_precision": 0.3319184970220668,
      "eval_ag_news_token_set_recall": 0.3841653317804411,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 100000
    },
    {
      "epoch": 19.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.1116875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9737272823415504,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10541475031914696,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6692122220993042,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009298105812041418,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1640625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.03286167651298939,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3108067512512207,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.998,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.872,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.666,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.548,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.40722737031086,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20394249011862936,
      "eval_anthropic_toxic_prompts_runtime": 10.5835,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.244,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3476096325210851,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006783717493097351,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42239181915534135,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3264111233596592,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 100000
    },
    {
      "epoch": 19.2,
      "eval_arxiv_accuracy": 0.3455625,
      "eval_arxiv_bleu_score": 4.39862195394743,
      "eval_arxiv_bleu_score_sem": 0.13023088493931498,
      "eval_arxiv_emb_cos_sim": 0.7599332928657532,
      "eval_arxiv_emb_cos_sim_sem": 0.008488366729015453,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4565768241882324,
      "eval_arxiv_n_ngrams_match_1": 14.918,
      "eval_arxiv_n_ngrams_match_2": 2.89,
      "eval_arxiv_n_ngrams_match_3": 0.694,
      "eval_arxiv_num_pred_words": 40.35,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.708247615502884,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35304818574208807,
      "eval_arxiv_runtime": 11.2435,
      "eval_arxiv_samples_per_second": 44.47,
      "eval_arxiv_steps_per_second": 0.089,
      "eval_arxiv_token_set_f1": 0.3500005621030236,
      "eval_arxiv_token_set_f1_sem": 0.004217378558848126,
      "eval_arxiv_token_set_precision": 0.2987826934657279,
      "eval_arxiv_token_set_recall": 0.4412166514947108,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 100000
    },
    {
      "epoch": 19.2,
      "eval_python_code_alpaca_accuracy": 0.15534375,
      "eval_python_code_alpaca_bleu_score": 4.153210658729626,
      "eval_python_code_alpaca_bleu_score_sem": 0.1297311063473779,
      "eval_python_code_alpaca_emb_cos_sim": 0.7312737703323364,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011410933913337615,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.951820135116577,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.462,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.666,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.812,
      "eval_python_code_alpaca_num_pred_words": 43.754,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.140760812737167,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.31531171332182556,
      "eval_python_code_alpaca_runtime": 10.547,
      "eval_python_code_alpaca_samples_per_second": 47.407,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.46519926251865085,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005925378375173091,
      "eval_python_code_alpaca_token_set_precision": 0.517281067157932,
      "eval_python_code_alpaca_token_set_recall": 0.4425828900974685,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 100000
    },
    {
      "epoch": 19.2,
      "eval_wikibio_accuracy": 0.31925,
      "eval_wikibio_bleu_score": 5.865238303211395,
      "eval_wikibio_bleu_score_sem": 0.20600500077897738,
      "eval_wikibio_emb_cos_sim": 0.7343774437904358,
      "eval_wikibio_emb_cos_sim_sem": 0.008475737162441361,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.823119640350342,
      "eval_wikibio_n_ngrams_match_1": 10.14,
      "eval_wikibio_n_ngrams_match_2": 3.288,
      "eval_wikibio_n_ngrams_match_3": 1.202,
      "eval_wikibio_num_pred_words": 37.136,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.7466991938484,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35239889671330227,
      "eval_wikibio_runtime": 10.9485,
      "eval_wikibio_samples_per_second": 45.668,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.3188697627561331,
      "eval_wikibio_token_set_f1_sem": 0.0051094826566727486,
      "eval_wikibio_token_set_precision": 0.3285978075009286,
      "eval_wikibio_token_set_recall": 0.32486593198486036,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 100000
    },
    {
      "epoch": 19.2,
      "eval_nq_accuracy": 0.5225625,
      "eval_nq_bleu_score": 11.372865626136264,
      "eval_nq_bleu_score_sem": 0.4754612606808897,
      "eval_nq_emb_cos_sim": 0.8224873542785645,
      "eval_nq_emb_cos_sim_sem": 0.007489033340685773,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.229804277420044,
      "eval_nq_n_ngrams_match_1": 22.954,
      "eval_nq_n_ngrams_match_2": 8.292,
      "eval_nq_n_ngrams_match_3": 3.736,
      "eval_nq_num_pred_words": 49.326,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.298046063816154,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4418118193963947,
      "eval_nq_runtime": 10.9782,
      "eval_nq_samples_per_second": 45.545,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.458000988883285,
      "eval_nq_token_set_f1_sem": 0.004996959045427728,
      "eval_nq_token_set_precision": 0.41627873774746343,
      "eval_nq_token_set_recall": 0.5168683928399,
      "eval_nq_true_num_tokens": 64.0,
      "step": 100000
    },
    {
      "epoch": 19.2,
      "learning_rate": 0.001,
      "loss": 2.5868,
      "step": 100008
    },
    {
      "epoch": 19.21,
      "learning_rate": 0.001,
      "loss": 2.5922,
      "step": 100020
    },
    {
      "epoch": 19.21,
      "learning_rate": 0.001,
      "loss": 2.6018,
      "step": 100032
    },
    {
      "epoch": 19.21,
      "learning_rate": 0.001,
      "loss": 2.5974,
      "step": 100044
    },
    {
      "epoch": 19.21,
      "learning_rate": 0.001,
      "loss": 2.6042,
      "step": 100056
    },
    {
      "epoch": 19.21,
      "learning_rate": 0.001,
      "loss": 2.605,
      "step": 100068
    },
    {
      "epoch": 19.22,
      "learning_rate": 0.001,
      "loss": 2.605,
      "step": 100080
    },
    {
      "epoch": 19.22,
      "learning_rate": 0.001,
      "loss": 2.6,
      "step": 100092
    },
    {
      "epoch": 19.22,
      "learning_rate": 0.001,
      "loss": 2.5973,
      "step": 100104
    },
    {
      "epoch": 19.22,
      "learning_rate": 0.001,
      "loss": 2.599,
      "step": 100116
    },
    {
      "epoch": 19.23,
      "learning_rate": 0.001,
      "loss": 2.6065,
      "step": 100128
    },
    {
      "epoch": 19.23,
      "learning_rate": 0.001,
      "loss": 2.6066,
      "step": 100140
    },
    {
      "epoch": 19.23,
      "learning_rate": 0.001,
      "loss": 2.5916,
      "step": 100152
    },
    {
      "epoch": 19.23,
      "learning_rate": 0.001,
      "loss": 2.605,
      "step": 100164
    },
    {
      "epoch": 19.24,
      "learning_rate": 0.001,
      "loss": 2.601,
      "step": 100176
    },
    {
      "epoch": 19.24,
      "learning_rate": 0.001,
      "loss": 2.6045,
      "step": 100188
    },
    {
      "epoch": 19.24,
      "learning_rate": 0.001,
      "loss": 2.5953,
      "step": 100200
    },
    {
      "epoch": 19.24,
      "learning_rate": 0.001,
      "loss": 2.595,
      "step": 100212
    },
    {
      "epoch": 19.24,
      "learning_rate": 0.001,
      "loss": 2.6054,
      "step": 100224
    },
    {
      "epoch": 19.25,
      "learning_rate": 0.001,
      "loss": 2.5933,
      "step": 100236
    },
    {
      "epoch": 19.25,
      "learning_rate": 0.001,
      "loss": 2.5874,
      "step": 100248
    },
    {
      "epoch": 19.25,
      "learning_rate": 0.001,
      "loss": 2.5985,
      "step": 100260
    },
    {
      "epoch": 19.25,
      "learning_rate": 0.001,
      "loss": 2.5913,
      "step": 100272
    },
    {
      "epoch": 19.26,
      "learning_rate": 0.001,
      "loss": 2.5906,
      "step": 100284
    },
    {
      "epoch": 19.26,
      "learning_rate": 0.001,
      "loss": 2.5983,
      "step": 100296
    },
    {
      "epoch": 19.26,
      "learning_rate": 0.001,
      "loss": 2.6015,
      "step": 100308
    },
    {
      "epoch": 19.26,
      "learning_rate": 0.001,
      "loss": 2.6036,
      "step": 100320
    },
    {
      "epoch": 19.26,
      "learning_rate": 0.001,
      "loss": 2.5961,
      "step": 100332
    },
    {
      "epoch": 19.27,
      "learning_rate": 0.001,
      "loss": 2.5982,
      "step": 100344
    },
    {
      "epoch": 19.27,
      "learning_rate": 0.001,
      "loss": 2.5939,
      "step": 100356
    },
    {
      "epoch": 19.27,
      "learning_rate": 0.001,
      "loss": 2.6015,
      "step": 100368
    },
    {
      "epoch": 19.27,
      "learning_rate": 0.001,
      "loss": 2.593,
      "step": 100380
    },
    {
      "epoch": 19.28,
      "learning_rate": 0.001,
      "loss": 2.6088,
      "step": 100392
    },
    {
      "epoch": 19.28,
      "learning_rate": 0.001,
      "loss": 2.6081,
      "step": 100404
    },
    {
      "epoch": 19.28,
      "learning_rate": 0.001,
      "loss": 2.6071,
      "step": 100416
    },
    {
      "epoch": 19.28,
      "learning_rate": 0.001,
      "loss": 2.609,
      "step": 100428
    },
    {
      "epoch": 19.29,
      "learning_rate": 0.001,
      "loss": 2.607,
      "step": 100440
    },
    {
      "epoch": 19.29,
      "learning_rate": 0.001,
      "loss": 2.5966,
      "step": 100452
    },
    {
      "epoch": 19.29,
      "learning_rate": 0.001,
      "loss": 2.5987,
      "step": 100464
    },
    {
      "epoch": 19.29,
      "learning_rate": 0.001,
      "loss": 2.5967,
      "step": 100476
    },
    {
      "epoch": 19.29,
      "learning_rate": 0.001,
      "loss": 2.5976,
      "step": 100488
    },
    {
      "epoch": 19.3,
      "learning_rate": 0.001,
      "loss": 2.5996,
      "step": 100500
    },
    {
      "epoch": 19.3,
      "learning_rate": 0.001,
      "loss": 2.5981,
      "step": 100512
    },
    {
      "epoch": 19.3,
      "learning_rate": 0.001,
      "loss": 2.6017,
      "step": 100524
    },
    {
      "epoch": 19.3,
      "learning_rate": 0.001,
      "loss": 2.6033,
      "step": 100536
    },
    {
      "epoch": 19.31,
      "learning_rate": 0.001,
      "loss": 2.5974,
      "step": 100548
    },
    {
      "epoch": 19.31,
      "learning_rate": 0.001,
      "loss": 2.5976,
      "step": 100560
    },
    {
      "epoch": 19.31,
      "learning_rate": 0.001,
      "loss": 2.605,
      "step": 100572
    },
    {
      "epoch": 19.31,
      "learning_rate": 0.001,
      "loss": 2.6116,
      "step": 100584
    },
    {
      "epoch": 19.32,
      "learning_rate": 0.001,
      "loss": 2.5884,
      "step": 100596
    },
    {
      "epoch": 19.32,
      "learning_rate": 0.001,
      "loss": 2.6021,
      "step": 100608
    },
    {
      "epoch": 19.32,
      "learning_rate": 0.001,
      "loss": 2.6087,
      "step": 100620
    },
    {
      "epoch": 19.32,
      "eval_ag_news_accuracy": 0.31703125,
      "eval_ag_news_bleu_score": 4.591186233958706,
      "eval_ag_news_bleu_score_sem": 0.13914706106809424,
      "eval_ag_news_emb_cos_sim": 0.8013380765914917,
      "eval_ag_news_emb_cos_sim_sem": 0.007497183487516567,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.620837926864624,
      "eval_ag_news_n_ngrams_match_1": 13.58,
      "eval_ag_news_n_ngrams_match_2": 2.954,
      "eval_ag_news_n_ngrams_match_3": 0.814,
      "eval_ag_news_num_pred_words": 46.866,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 37.36886708460662,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3356389783524234,
      "eval_ag_news_runtime": 11.275,
      "eval_ag_news_samples_per_second": 44.346,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.34021547828123866,
      "eval_ag_news_token_set_f1_sem": 0.004377350883325469,
      "eval_ag_news_token_set_precision": 0.324636686330962,
      "eval_ag_news_token_set_recall": 0.3711949891491547,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 100625
    },
    {
      "epoch": 19.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.11203125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.044232461191681,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11817635203924869,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6540555357933044,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010932708111263676,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3046493530273438,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.022,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.856,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.678,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.528,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.238988645136953,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20568465539913397,
      "eval_anthropic_toxic_prompts_runtime": 11.386,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.913,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.088,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3536839649941146,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006587056755202966,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42552078491173967,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33493448942325127,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 100625
    },
    {
      "epoch": 19.32,
      "eval_arxiv_accuracy": 0.34175,
      "eval_arxiv_bleu_score": 4.139237394545484,
      "eval_arxiv_bleu_score_sem": 0.10746435437629161,
      "eval_arxiv_emb_cos_sim": 0.752979576587677,
      "eval_arxiv_emb_cos_sim_sem": 0.008673793106128149,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.471484899520874,
      "eval_arxiv_n_ngrams_match_1": 14.94,
      "eval_arxiv_n_ngrams_match_2": 2.85,
      "eval_arxiv_n_ngrams_match_3": 0.59,
      "eval_arxiv_num_pred_words": 41.05,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 32.184497725333706,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3529319485637437,
      "eval_arxiv_runtime": 11.3186,
      "eval_arxiv_samples_per_second": 44.175,
      "eval_arxiv_steps_per_second": 0.088,
      "eval_arxiv_token_set_f1": 0.3476968869613086,
      "eval_arxiv_token_set_f1_sem": 0.0039758746415613,
      "eval_arxiv_token_set_precision": 0.2994408277086143,
      "eval_arxiv_token_set_recall": 0.42842181448168554,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 100625
    },
    {
      "epoch": 19.32,
      "eval_python_code_alpaca_accuracy": 0.15928125,
      "eval_python_code_alpaca_bleu_score": 4.256231394956872,
      "eval_python_code_alpaca_bleu_score_sem": 0.13961291586215355,
      "eval_python_code_alpaca_emb_cos_sim": 0.7518568634986877,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008534636724690828,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9363017082214355,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.668,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.712,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.894,
      "eval_python_code_alpaca_num_pred_words": 44.758,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.846019193471047,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.316631569495196,
      "eval_python_code_alpaca_runtime": 11.4226,
      "eval_python_code_alpaca_samples_per_second": 43.773,
      "eval_python_code_alpaca_steps_per_second": 0.088,
      "eval_python_code_alpaca_token_set_f1": 0.46861993383758355,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005677920666004615,
      "eval_python_code_alpaca_token_set_precision": 0.526921459478536,
      "eval_python_code_alpaca_token_set_recall": 0.44183253948713774,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 100625
    },
    {
      "epoch": 19.32,
      "eval_wikibio_accuracy": 0.316625,
      "eval_wikibio_bleu_score": 5.700380231621618,
      "eval_wikibio_bleu_score_sem": 0.21207477886091958,
      "eval_wikibio_emb_cos_sim": 0.7466273307800293,
      "eval_wikibio_emb_cos_sim_sem": 0.008035836675098492,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.847433567047119,
      "eval_wikibio_n_ngrams_match_1": 10.198,
      "eval_wikibio_n_ngrams_match_2": 3.322,
      "eval_wikibio_n_ngrams_match_3": 1.138,
      "eval_wikibio_num_pred_words": 37.086,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.87261331497524,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3532399034144793,
      "eval_wikibio_runtime": 10.6071,
      "eval_wikibio_samples_per_second": 47.138,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.31856224041107567,
      "eval_wikibio_token_set_f1_sem": 0.005105270663572564,
      "eval_wikibio_token_set_precision": 0.3298059710300844,
      "eval_wikibio_token_set_recall": 0.32245833767170023,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 100625
    },
    {
      "epoch": 19.32,
      "eval_nq_accuracy": 0.52084375,
      "eval_nq_bleu_score": 11.489144494353248,
      "eval_nq_bleu_score_sem": 0.48727283262135757,
      "eval_nq_emb_cos_sim": 0.8208481073379517,
      "eval_nq_emb_cos_sim_sem": 0.007484526920617647,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.231174945831299,
      "eval_nq_n_ngrams_match_1": 22.694,
      "eval_nq_n_ngrams_match_2": 8.304,
      "eval_nq_n_ngrams_match_3": 3.826,
      "eval_nq_num_pred_words": 49.176,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.310799340102026,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43799142477360303,
      "eval_nq_runtime": 11.0215,
      "eval_nq_samples_per_second": 45.366,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4524631400963689,
      "eval_nq_token_set_f1_sem": 0.005023237513942982,
      "eval_nq_token_set_precision": 0.41001493006649997,
      "eval_nq_token_set_recall": 0.5134872668866016,
      "eval_nq_true_num_tokens": 64.0,
      "step": 100625
    },
    {
      "epoch": 19.32,
      "learning_rate": 0.001,
      "loss": 2.6002,
      "step": 100632
    },
    {
      "epoch": 19.32,
      "learning_rate": 0.001,
      "loss": 2.5928,
      "step": 100644
    },
    {
      "epoch": 19.33,
      "learning_rate": 0.001,
      "loss": 2.5891,
      "step": 100656
    },
    {
      "epoch": 19.33,
      "learning_rate": 0.001,
      "loss": 2.6001,
      "step": 100668
    },
    {
      "epoch": 19.33,
      "learning_rate": 0.001,
      "loss": 2.6033,
      "step": 100680
    },
    {
      "epoch": 19.33,
      "learning_rate": 0.001,
      "loss": 2.6068,
      "step": 100692
    },
    {
      "epoch": 19.34,
      "learning_rate": 0.001,
      "loss": 2.6047,
      "step": 100704
    },
    {
      "epoch": 19.34,
      "learning_rate": 0.001,
      "loss": 2.6049,
      "step": 100716
    },
    {
      "epoch": 19.34,
      "learning_rate": 0.001,
      "loss": 2.5925,
      "step": 100728
    },
    {
      "epoch": 19.34,
      "learning_rate": 0.001,
      "loss": 2.6,
      "step": 100740
    },
    {
      "epoch": 19.35,
      "learning_rate": 0.001,
      "loss": 2.6001,
      "step": 100752
    },
    {
      "epoch": 19.35,
      "learning_rate": 0.001,
      "loss": 2.6011,
      "step": 100764
    },
    {
      "epoch": 19.35,
      "learning_rate": 0.001,
      "loss": 2.5945,
      "step": 100776
    },
    {
      "epoch": 19.35,
      "learning_rate": 0.001,
      "loss": 2.5971,
      "step": 100788
    },
    {
      "epoch": 19.35,
      "learning_rate": 0.001,
      "loss": 2.599,
      "step": 100800
    },
    {
      "epoch": 19.36,
      "learning_rate": 0.001,
      "loss": 2.5923,
      "step": 100812
    },
    {
      "epoch": 19.36,
      "learning_rate": 0.001,
      "loss": 2.6074,
      "step": 100824
    },
    {
      "epoch": 19.36,
      "learning_rate": 0.001,
      "loss": 2.595,
      "step": 100836
    },
    {
      "epoch": 19.36,
      "learning_rate": 0.001,
      "loss": 2.6008,
      "step": 100848
    },
    {
      "epoch": 19.37,
      "learning_rate": 0.001,
      "loss": 2.5972,
      "step": 100860
    },
    {
      "epoch": 19.37,
      "learning_rate": 0.001,
      "loss": 2.5999,
      "step": 100872
    },
    {
      "epoch": 19.37,
      "learning_rate": 0.001,
      "loss": 2.5996,
      "step": 100884
    },
    {
      "epoch": 19.37,
      "learning_rate": 0.001,
      "loss": 2.5911,
      "step": 100896
    },
    {
      "epoch": 19.38,
      "learning_rate": 0.001,
      "loss": 2.61,
      "step": 100908
    },
    {
      "epoch": 19.38,
      "learning_rate": 0.001,
      "loss": 2.5872,
      "step": 100920
    },
    {
      "epoch": 19.38,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 100932
    },
    {
      "epoch": 19.38,
      "learning_rate": 0.001,
      "loss": 2.6022,
      "step": 100944
    },
    {
      "epoch": 19.38,
      "learning_rate": 0.001,
      "loss": 2.6003,
      "step": 100956
    },
    {
      "epoch": 19.39,
      "learning_rate": 0.001,
      "loss": 2.6016,
      "step": 100968
    },
    {
      "epoch": 19.39,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 100980
    },
    {
      "epoch": 19.39,
      "learning_rate": 0.001,
      "loss": 2.5918,
      "step": 100992
    },
    {
      "epoch": 19.39,
      "learning_rate": 0.001,
      "loss": 2.6033,
      "step": 101004
    },
    {
      "epoch": 19.4,
      "learning_rate": 0.001,
      "loss": 2.5971,
      "step": 101016
    },
    {
      "epoch": 19.4,
      "learning_rate": 0.001,
      "loss": 2.6012,
      "step": 101028
    },
    {
      "epoch": 19.4,
      "learning_rate": 0.001,
      "loss": 2.5965,
      "step": 101040
    },
    {
      "epoch": 19.4,
      "learning_rate": 0.001,
      "loss": 2.5916,
      "step": 101052
    },
    {
      "epoch": 19.41,
      "learning_rate": 0.001,
      "loss": 2.5967,
      "step": 101064
    },
    {
      "epoch": 19.41,
      "learning_rate": 0.001,
      "loss": 2.6005,
      "step": 101076
    },
    {
      "epoch": 19.41,
      "learning_rate": 0.001,
      "loss": 2.6005,
      "step": 101088
    },
    {
      "epoch": 19.41,
      "learning_rate": 0.001,
      "loss": 2.5863,
      "step": 101100
    },
    {
      "epoch": 19.41,
      "learning_rate": 0.001,
      "loss": 2.5929,
      "step": 101112
    },
    {
      "epoch": 19.42,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 101124
    },
    {
      "epoch": 19.42,
      "learning_rate": 0.001,
      "loss": 2.6017,
      "step": 101136
    },
    {
      "epoch": 19.42,
      "learning_rate": 0.001,
      "loss": 2.6024,
      "step": 101148
    },
    {
      "epoch": 19.42,
      "learning_rate": 0.001,
      "loss": 2.5972,
      "step": 101160
    },
    {
      "epoch": 19.43,
      "learning_rate": 0.001,
      "loss": 2.5931,
      "step": 101172
    },
    {
      "epoch": 19.43,
      "learning_rate": 0.001,
      "loss": 2.6115,
      "step": 101184
    },
    {
      "epoch": 19.43,
      "learning_rate": 0.001,
      "loss": 2.6051,
      "step": 101196
    },
    {
      "epoch": 19.43,
      "learning_rate": 0.001,
      "loss": 2.5975,
      "step": 101208
    },
    {
      "epoch": 19.44,
      "learning_rate": 0.001,
      "loss": 2.5996,
      "step": 101220
    },
    {
      "epoch": 19.44,
      "learning_rate": 0.001,
      "loss": 2.6016,
      "step": 101232
    },
    {
      "epoch": 19.44,
      "learning_rate": 0.001,
      "loss": 2.5922,
      "step": 101244
    },
    {
      "epoch": 19.44,
      "eval_ag_news_accuracy": 0.31871875,
      "eval_ag_news_bleu_score": 4.614082710993376,
      "eval_ag_news_bleu_score_sem": 0.1413972830854405,
      "eval_ag_news_emb_cos_sim": 0.7978829145431519,
      "eval_ag_news_emb_cos_sim_sem": 0.00735936460997496,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5990102291107178,
      "eval_ag_news_n_ngrams_match_1": 13.86,
      "eval_ag_news_n_ngrams_match_2": 2.986,
      "eval_ag_news_n_ngrams_match_3": 0.81,
      "eval_ag_news_num_pred_words": 47.018,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.562028497378094,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3409600664235183,
      "eval_ag_news_runtime": 11.3183,
      "eval_ag_news_samples_per_second": 44.176,
      "eval_ag_news_steps_per_second": 0.088,
      "eval_ag_news_token_set_f1": 0.34765595066331473,
      "eval_ag_news_token_set_f1_sem": 0.0043305966468352155,
      "eval_ag_news_token_set_precision": 0.33083606207194277,
      "eval_ag_news_token_set_recall": 0.38215795902851046,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 101250
    },
    {
      "epoch": 19.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.1124375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9389724036588585,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11049165821692555,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6543651223182678,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010366423713215812,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.269542932510376,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.964,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.8,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.64,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.854,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.29931603338497,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20547194012002334,
      "eval_anthropic_toxic_prompts_runtime": 10.6576,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.915,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3467155327361048,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006458522233706072,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42202739861726735,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.323628123225362,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 101250
    },
    {
      "epoch": 19.44,
      "eval_arxiv_accuracy": 0.3418125,
      "eval_arxiv_bleu_score": 4.391075653340749,
      "eval_arxiv_bleu_score_sem": 0.12009580445282984,
      "eval_arxiv_emb_cos_sim": 0.7571845054626465,
      "eval_arxiv_emb_cos_sim_sem": 0.006659691363290688,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4609336853027344,
      "eval_arxiv_n_ngrams_match_1": 15.096,
      "eval_arxiv_n_ngrams_match_2": 2.952,
      "eval_arxiv_n_ngrams_match_3": 0.682,
      "eval_arxiv_num_pred_words": 41.31,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.846697430849048,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3576581303122379,
      "eval_arxiv_runtime": 11.1349,
      "eval_arxiv_samples_per_second": 44.904,
      "eval_arxiv_steps_per_second": 0.09,
      "eval_arxiv_token_set_f1": 0.35162833277337363,
      "eval_arxiv_token_set_f1_sem": 0.004082675359765557,
      "eval_arxiv_token_set_precision": 0.30323196926083246,
      "eval_arxiv_token_set_recall": 0.4329976120264988,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 101250
    },
    {
      "epoch": 19.44,
      "eval_python_code_alpaca_accuracy": 0.157375,
      "eval_python_code_alpaca_bleu_score": 4.542335939915411,
      "eval_python_code_alpaca_bleu_score_sem": 0.15324601552301118,
      "eval_python_code_alpaca_emb_cos_sim": 0.7516502141952515,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007781579226011444,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.940692663192749,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.658,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.812,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.938,
      "eval_python_code_alpaca_num_pred_words": 44.206,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.928953161547238,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3248172631746263,
      "eval_python_code_alpaca_runtime": 11.2646,
      "eval_python_code_alpaca_samples_per_second": 44.387,
      "eval_python_code_alpaca_steps_per_second": 0.089,
      "eval_python_code_alpaca_token_set_f1": 0.47216346999724057,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00526295892577975,
      "eval_python_code_alpaca_token_set_precision": 0.531452324383423,
      "eval_python_code_alpaca_token_set_recall": 0.4457535644804907,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 101250
    },
    {
      "epoch": 19.44,
      "eval_wikibio_accuracy": 0.31678125,
      "eval_wikibio_bleu_score": 5.742427462420765,
      "eval_wikibio_bleu_score_sem": 0.19567733220639472,
      "eval_wikibio_emb_cos_sim": 0.7237722873687744,
      "eval_wikibio_emb_cos_sim_sem": 0.010059310823373562,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8146474361419678,
      "eval_wikibio_n_ngrams_match_1": 10.138,
      "eval_wikibio_n_ngrams_match_2": 3.416,
      "eval_wikibio_n_ngrams_match_3": 1.178,
      "eval_wikibio_num_pred_words": 36.86,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.360760998514806,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3513548987769217,
      "eval_wikibio_runtime": 10.8437,
      "eval_wikibio_samples_per_second": 46.11,
      "eval_wikibio_steps_per_second": 0.092,
      "eval_wikibio_token_set_f1": 0.31881149895725247,
      "eval_wikibio_token_set_f1_sem": 0.00526758127228664,
      "eval_wikibio_token_set_precision": 0.3293605004608683,
      "eval_wikibio_token_set_recall": 0.32457429815562655,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 101250
    },
    {
      "epoch": 19.44,
      "eval_nq_accuracy": 0.52209375,
      "eval_nq_bleu_score": 11.725716604927474,
      "eval_nq_bleu_score_sem": 0.4813356634441808,
      "eval_nq_emb_cos_sim": 0.8301323652267456,
      "eval_nq_emb_cos_sim_sem": 0.006866184579213749,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2232437133789062,
      "eval_nq_n_ngrams_match_1": 23.148,
      "eval_nq_n_ngrams_match_2": 8.564,
      "eval_nq_n_ngrams_match_3": 3.906,
      "eval_nq_num_pred_words": 49.472,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.237245298890054,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44575824014735066,
      "eval_nq_runtime": 11.2649,
      "eval_nq_samples_per_second": 44.386,
      "eval_nq_steps_per_second": 0.089,
      "eval_nq_token_set_f1": 0.46191403786435464,
      "eval_nq_token_set_f1_sem": 0.005112620923326513,
      "eval_nq_token_set_precision": 0.41927793908568967,
      "eval_nq_token_set_recall": 0.5215706163570686,
      "eval_nq_true_num_tokens": 64.0,
      "step": 101250
    },
    {
      "epoch": 19.44,
      "learning_rate": 0.001,
      "loss": 2.5934,
      "step": 101256
    },
    {
      "epoch": 19.44,
      "learning_rate": 0.001,
      "loss": 2.591,
      "step": 101268
    },
    {
      "epoch": 19.45,
      "learning_rate": 0.001,
      "loss": 2.5899,
      "step": 101280
    },
    {
      "epoch": 19.45,
      "learning_rate": 0.001,
      "loss": 2.5953,
      "step": 101292
    },
    {
      "epoch": 19.45,
      "learning_rate": 0.001,
      "loss": 2.5971,
      "step": 101304
    },
    {
      "epoch": 19.45,
      "learning_rate": 0.001,
      "loss": 2.6034,
      "step": 101316
    },
    {
      "epoch": 19.46,
      "learning_rate": 0.001,
      "loss": 2.5992,
      "step": 101328
    },
    {
      "epoch": 19.46,
      "learning_rate": 0.001,
      "loss": 2.5907,
      "step": 101340
    },
    {
      "epoch": 19.46,
      "learning_rate": 0.001,
      "loss": 2.5917,
      "step": 101352
    },
    {
      "epoch": 19.46,
      "learning_rate": 0.001,
      "loss": 2.5977,
      "step": 101364
    },
    {
      "epoch": 19.47,
      "learning_rate": 0.001,
      "loss": 2.5986,
      "step": 101376
    },
    {
      "epoch": 19.47,
      "learning_rate": 0.001,
      "loss": 2.5994,
      "step": 101388
    },
    {
      "epoch": 19.47,
      "learning_rate": 0.001,
      "loss": 2.592,
      "step": 101400
    },
    {
      "epoch": 19.47,
      "learning_rate": 0.001,
      "loss": 2.5924,
      "step": 101412
    },
    {
      "epoch": 19.47,
      "learning_rate": 0.001,
      "loss": 2.5973,
      "step": 101424
    },
    {
      "epoch": 19.48,
      "learning_rate": 0.001,
      "loss": 2.6041,
      "step": 101436
    },
    {
      "epoch": 19.48,
      "learning_rate": 0.001,
      "loss": 2.6014,
      "step": 101448
    },
    {
      "epoch": 19.48,
      "learning_rate": 0.001,
      "loss": 2.602,
      "step": 101460
    },
    {
      "epoch": 19.48,
      "learning_rate": 0.001,
      "loss": 2.615,
      "step": 101472
    },
    {
      "epoch": 19.49,
      "learning_rate": 0.001,
      "loss": 2.5996,
      "step": 101484
    },
    {
      "epoch": 19.49,
      "learning_rate": 0.001,
      "loss": 2.6026,
      "step": 101496
    },
    {
      "epoch": 19.49,
      "learning_rate": 0.001,
      "loss": 2.6084,
      "step": 101508
    },
    {
      "epoch": 19.49,
      "learning_rate": 0.001,
      "loss": 2.6021,
      "step": 101520
    },
    {
      "epoch": 19.5,
      "learning_rate": 0.001,
      "loss": 2.6056,
      "step": 101532
    },
    {
      "epoch": 19.5,
      "learning_rate": 0.001,
      "loss": 2.6031,
      "step": 101544
    },
    {
      "epoch": 19.5,
      "learning_rate": 0.001,
      "loss": 2.6007,
      "step": 101556
    },
    {
      "epoch": 19.5,
      "learning_rate": 0.001,
      "loss": 2.5972,
      "step": 101568
    },
    {
      "epoch": 19.5,
      "learning_rate": 0.001,
      "loss": 2.5995,
      "step": 101580
    },
    {
      "epoch": 19.51,
      "learning_rate": 0.001,
      "loss": 2.5982,
      "step": 101592
    },
    {
      "epoch": 19.51,
      "learning_rate": 0.001,
      "loss": 2.6039,
      "step": 101604
    },
    {
      "epoch": 19.51,
      "learning_rate": 0.001,
      "loss": 2.5908,
      "step": 101616
    },
    {
      "epoch": 19.51,
      "learning_rate": 0.001,
      "loss": 2.6104,
      "step": 101628
    },
    {
      "epoch": 19.52,
      "learning_rate": 0.001,
      "loss": 2.5909,
      "step": 101640
    },
    {
      "epoch": 19.52,
      "learning_rate": 0.001,
      "loss": 2.5996,
      "step": 101652
    },
    {
      "epoch": 19.52,
      "learning_rate": 0.001,
      "loss": 2.5962,
      "step": 101664
    },
    {
      "epoch": 19.52,
      "learning_rate": 0.001,
      "loss": 2.5991,
      "step": 101676
    },
    {
      "epoch": 19.53,
      "learning_rate": 0.001,
      "loss": 2.6001,
      "step": 101688
    },
    {
      "epoch": 19.53,
      "learning_rate": 0.001,
      "loss": 2.6006,
      "step": 101700
    },
    {
      "epoch": 19.53,
      "learning_rate": 0.001,
      "loss": 2.5991,
      "step": 101712
    },
    {
      "epoch": 19.53,
      "learning_rate": 0.001,
      "loss": 2.6124,
      "step": 101724
    },
    {
      "epoch": 19.53,
      "learning_rate": 0.001,
      "loss": 2.6046,
      "step": 101736
    },
    {
      "epoch": 19.54,
      "learning_rate": 0.001,
      "loss": 2.6032,
      "step": 101748
    },
    {
      "epoch": 19.54,
      "learning_rate": 0.001,
      "loss": 2.6062,
      "step": 101760
    },
    {
      "epoch": 19.54,
      "learning_rate": 0.001,
      "loss": 2.5897,
      "step": 101772
    },
    {
      "epoch": 19.54,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 101784
    },
    {
      "epoch": 19.55,
      "learning_rate": 0.001,
      "loss": 2.6013,
      "step": 101796
    },
    {
      "epoch": 19.55,
      "learning_rate": 0.001,
      "loss": 2.6073,
      "step": 101808
    },
    {
      "epoch": 19.55,
      "learning_rate": 0.001,
      "loss": 2.5973,
      "step": 101820
    },
    {
      "epoch": 19.55,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 101832
    },
    {
      "epoch": 19.56,
      "learning_rate": 0.001,
      "loss": 2.6025,
      "step": 101844
    },
    {
      "epoch": 19.56,
      "learning_rate": 0.001,
      "loss": 2.5958,
      "step": 101856
    },
    {
      "epoch": 19.56,
      "learning_rate": 0.001,
      "loss": 2.6,
      "step": 101868
    },
    {
      "epoch": 19.56,
      "eval_ag_news_accuracy": 0.319,
      "eval_ag_news_bleu_score": 4.704650097639762,
      "eval_ag_news_bleu_score_sem": 0.15210916543100397,
      "eval_ag_news_emb_cos_sim": 0.8024437427520752,
      "eval_ag_news_emb_cos_sim_sem": 0.007323365932990592,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5986874103546143,
      "eval_ag_news_n_ngrams_match_1": 13.676,
      "eval_ag_news_n_ngrams_match_2": 3.044,
      "eval_ag_news_n_ngrams_match_3": 0.858,
      "eval_ag_news_num_pred_words": 46.918,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.550227493713095,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3385121464470692,
      "eval_ag_news_runtime": 11.4467,
      "eval_ag_news_samples_per_second": 43.681,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.34422470334528726,
      "eval_ag_news_token_set_f1_sem": 0.004504203028635901,
      "eval_ag_news_token_set_precision": 0.3248866218184864,
      "eval_ag_news_token_set_recall": 0.3855057975397497,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 101875
    },
    {
      "epoch": 19.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.11259375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.002170282392957,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11276446455027171,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6671350002288818,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00911122405156608,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.296062707901001,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.066,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.834,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.66,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.59,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.00609841982357,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20791827402255775,
      "eval_anthropic_toxic_prompts_runtime": 10.7394,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.557,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.093,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3571139406219928,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006809083476808562,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42852530404330225,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3345380694582043,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 101875
    },
    {
      "epoch": 19.56,
      "eval_arxiv_accuracy": 0.3444375,
      "eval_arxiv_bleu_score": 4.254292047209247,
      "eval_arxiv_bleu_score_sem": 0.12241767385326932,
      "eval_arxiv_emb_cos_sim": 0.7594126462936401,
      "eval_arxiv_emb_cos_sim_sem": 0.007300154873262233,
      "eval_arxiv_emb_top1_equal": 0.21875,
      "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4384498596191406,
      "eval_arxiv_n_ngrams_match_1": 14.698,
      "eval_arxiv_n_ngrams_match_2": 2.834,
      "eval_arxiv_n_ngrams_match_3": 0.668,
      "eval_arxiv_num_pred_words": 39.974,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.13865145586257,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3499436135823866,
      "eval_arxiv_runtime": 11.0466,
      "eval_arxiv_samples_per_second": 45.263,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.3462160504998833,
      "eval_arxiv_token_set_f1_sem": 0.00428577945847585,
      "eval_arxiv_token_set_precision": 0.29505246950577924,
      "eval_arxiv_token_set_recall": 0.4372360848286974,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 101875
    },
    {
      "epoch": 19.56,
      "eval_python_code_alpaca_accuracy": 0.1566875,
      "eval_python_code_alpaca_bleu_score": 4.185241386519374,
      "eval_python_code_alpaca_bleu_score_sem": 0.1276186431473251,
      "eval_python_code_alpaca_emb_cos_sim": 0.7340362668037415,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010707734819357603,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.967298984527588,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.368,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.626,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.768,
      "eval_python_code_alpaca_num_pred_words": 42.766,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.439342657142458,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3179384309369828,
      "eval_python_code_alpaca_runtime": 10.686,
      "eval_python_code_alpaca_samples_per_second": 46.79,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.4667173101722582,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005810604308319629,
      "eval_python_code_alpaca_token_set_precision": 0.5093183807723372,
      "eval_python_code_alpaca_token_set_recall": 0.4568380514323313,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 101875
    },
    {
      "epoch": 19.56,
      "eval_wikibio_accuracy": 0.31896875,
      "eval_wikibio_bleu_score": 5.992545877183331,
      "eval_wikibio_bleu_score_sem": 0.21551527606006293,
      "eval_wikibio_emb_cos_sim": 0.7440139055252075,
      "eval_wikibio_emb_cos_sim_sem": 0.008596680836965912,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.797386407852173,
      "eval_wikibio_n_ngrams_match_1": 10.366,
      "eval_wikibio_n_ngrams_match_2": 3.462,
      "eval_wikibio_n_ngrams_match_3": 1.258,
      "eval_wikibio_num_pred_words": 37.002,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.58450636944242,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35732610885301463,
      "eval_wikibio_runtime": 10.89,
      "eval_wikibio_samples_per_second": 45.914,
      "eval_wikibio_steps_per_second": 0.092,
      "eval_wikibio_token_set_f1": 0.32272176503670524,
      "eval_wikibio_token_set_f1_sem": 0.005234541721493347,
      "eval_wikibio_token_set_precision": 0.33427859464187826,
      "eval_wikibio_token_set_recall": 0.32624952345419367,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 101875
    },
    {
      "epoch": 19.56,
      "eval_nq_accuracy": 0.52353125,
      "eval_nq_bleu_score": 11.569392119807638,
      "eval_nq_bleu_score_sem": 0.47966337178858387,
      "eval_nq_emb_cos_sim": 0.8315171003341675,
      "eval_nq_emb_cos_sim_sem": 0.007283858530867693,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2224278450012207,
      "eval_nq_n_ngrams_match_1": 22.988,
      "eval_nq_n_ngrams_match_2": 8.41,
      "eval_nq_n_ngrams_match_3": 3.81,
      "eval_nq_num_pred_words": 49.364,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.229711996063415,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4425375167605566,
      "eval_nq_runtime": 12.1825,
      "eval_nq_samples_per_second": 41.042,
      "eval_nq_steps_per_second": 0.082,
      "eval_nq_token_set_f1": 0.4591295117525607,
      "eval_nq_token_set_f1_sem": 0.00494357544052548,
      "eval_nq_token_set_precision": 0.41454146807500586,
      "eval_nq_token_set_recall": 0.5213796359466645,
      "eval_nq_true_num_tokens": 64.0,
      "step": 101875
    },
    {
      "epoch": 19.56,
      "learning_rate": 0.001,
      "loss": 2.598,
      "step": 101880
    },
    {
      "epoch": 19.56,
      "learning_rate": 0.001,
      "loss": 2.6049,
      "step": 101892
    },
    {
      "epoch": 19.57,
      "learning_rate": 0.001,
      "loss": 2.5985,
      "step": 101904
    },
    {
      "epoch": 19.57,
      "learning_rate": 0.001,
      "loss": 2.6028,
      "step": 101916
    },
    {
      "epoch": 19.57,
      "learning_rate": 0.001,
      "loss": 2.6106,
      "step": 101928
    },
    {
      "epoch": 19.57,
      "learning_rate": 0.001,
      "loss": 2.5947,
      "step": 101940
    },
    {
      "epoch": 19.58,
      "learning_rate": 0.001,
      "loss": 2.6036,
      "step": 101952
    },
    {
      "epoch": 19.58,
      "learning_rate": 0.001,
      "loss": 2.6085,
      "step": 101964
    },
    {
      "epoch": 19.58,
      "learning_rate": 0.001,
      "loss": 2.5751,
      "step": 101976
    },
    {
      "epoch": 19.58,
      "learning_rate": 0.001,
      "loss": 2.5955,
      "step": 101988
    },
    {
      "epoch": 19.59,
      "learning_rate": 0.001,
      "loss": 2.6069,
      "step": 102000
    },
    {
      "epoch": 19.59,
      "learning_rate": 0.001,
      "loss": 2.6089,
      "step": 102012
    },
    {
      "epoch": 19.59,
      "learning_rate": 0.001,
      "loss": 2.6046,
      "step": 102024
    },
    {
      "epoch": 19.59,
      "learning_rate": 0.001,
      "loss": 2.5961,
      "step": 102036
    },
    {
      "epoch": 19.59,
      "learning_rate": 0.001,
      "loss": 2.6012,
      "step": 102048
    },
    {
      "epoch": 19.6,
      "learning_rate": 0.001,
      "loss": 2.6039,
      "step": 102060
    },
    {
      "epoch": 19.6,
      "learning_rate": 0.001,
      "loss": 2.6086,
      "step": 102072
    },
    {
      "epoch": 19.6,
      "learning_rate": 0.001,
      "loss": 2.5996,
      "step": 102084
    },
    {
      "epoch": 19.6,
      "learning_rate": 0.001,
      "loss": 2.6006,
      "step": 102096
    },
    {
      "epoch": 19.61,
      "learning_rate": 0.001,
      "loss": 2.6014,
      "step": 102108
    },
    {
      "epoch": 19.61,
      "learning_rate": 0.001,
      "loss": 2.601,
      "step": 102120
    },
    {
      "epoch": 19.61,
      "learning_rate": 0.001,
      "loss": 2.5975,
      "step": 102132
    },
    {
      "epoch": 19.61,
      "learning_rate": 0.001,
      "loss": 2.6014,
      "step": 102144
    },
    {
      "epoch": 19.62,
      "learning_rate": 0.001,
      "loss": 2.6065,
      "step": 102156
    },
    {
      "epoch": 19.62,
      "learning_rate": 0.001,
      "loss": 2.6078,
      "step": 102168
    },
    {
      "epoch": 19.62,
      "learning_rate": 0.001,
      "loss": 2.5905,
      "step": 102180
    },
    {
      "epoch": 19.62,
      "learning_rate": 0.001,
      "loss": 2.6107,
      "step": 102192
    },
    {
      "epoch": 19.62,
      "learning_rate": 0.001,
      "loss": 2.5995,
      "step": 102204
    },
    {
      "epoch": 19.63,
      "learning_rate": 0.001,
      "loss": 2.6061,
      "step": 102216
    },
    {
      "epoch": 19.63,
      "learning_rate": 0.001,
      "loss": 2.6102,
      "step": 102228
    },
    {
      "epoch": 19.63,
      "learning_rate": 0.001,
      "loss": 2.5967,
      "step": 102240
    },
    {
      "epoch": 19.63,
      "learning_rate": 0.001,
      "loss": 2.5952,
      "step": 102252
    },
    {
      "epoch": 19.64,
      "learning_rate": 0.001,
      "loss": 2.609,
      "step": 102264
    },
    {
      "epoch": 19.64,
      "learning_rate": 0.001,
      "loss": 2.6074,
      "step": 102276
    },
    {
      "epoch": 19.64,
      "learning_rate": 0.001,
      "loss": 2.6021,
      "step": 102288
    },
    {
      "epoch": 19.64,
      "learning_rate": 0.001,
      "loss": 2.5935,
      "step": 102300
    },
    {
      "epoch": 19.65,
      "learning_rate": 0.001,
      "loss": 2.5947,
      "step": 102312
    },
    {
      "epoch": 19.65,
      "learning_rate": 0.001,
      "loss": 2.6023,
      "step": 102324
    },
    {
      "epoch": 19.65,
      "learning_rate": 0.001,
      "loss": 2.6119,
      "step": 102336
    },
    {
      "epoch": 19.65,
      "learning_rate": 0.001,
      "loss": 2.5924,
      "step": 102348
    },
    {
      "epoch": 19.65,
      "learning_rate": 0.001,
      "loss": 2.5955,
      "step": 102360
    },
    {
      "epoch": 19.66,
      "learning_rate": 0.001,
      "loss": 2.603,
      "step": 102372
    },
    {
      "epoch": 19.66,
      "learning_rate": 0.001,
      "loss": 2.5935,
      "step": 102384
    },
    {
      "epoch": 19.66,
      "learning_rate": 0.001,
      "loss": 2.6022,
      "step": 102396
    },
    {
      "epoch": 19.66,
      "learning_rate": 0.001,
      "loss": 2.5943,
      "step": 102408
    },
    {
      "epoch": 19.67,
      "learning_rate": 0.001,
      "loss": 2.6027,
      "step": 102420
    },
    {
      "epoch": 19.67,
      "learning_rate": 0.001,
      "loss": 2.5944,
      "step": 102432
    },
    {
      "epoch": 19.67,
      "learning_rate": 0.001,
      "loss": 2.5984,
      "step": 102444
    },
    {
      "epoch": 19.67,
      "learning_rate": 0.001,
      "loss": 2.5968,
      "step": 102456
    },
    {
      "epoch": 19.68,
      "learning_rate": 0.001,
      "loss": 2.5983,
      "step": 102468
    },
    {
      "epoch": 19.68,
      "learning_rate": 0.001,
      "loss": 2.6033,
      "step": 102480
    },
    {
      "epoch": 19.68,
      "learning_rate": 0.001,
      "loss": 2.6004,
      "step": 102492
    },
    {
      "epoch": 19.68,
      "eval_ag_news_accuracy": 0.3173125,
      "eval_ag_news_bleu_score": 4.691423623628973,
      "eval_ag_news_bleu_score_sem": 0.15466133768682222,
      "eval_ag_news_emb_cos_sim": 0.7932641506195068,
      "eval_ag_news_emb_cos_sim_sem": 0.008623128642951006,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.60044527053833,
      "eval_ag_news_n_ngrams_match_1": 13.744,
      "eval_ag_news_n_ngrams_match_2": 2.996,
      "eval_ag_news_n_ngrams_match_3": 0.846,
      "eval_ag_news_num_pred_words": 46.362,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.61453418785928,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3402795388194745,
      "eval_ag_news_runtime": 10.5343,
      "eval_ag_news_samples_per_second": 47.464,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3445436018691345,
      "eval_ag_news_token_set_f1_sem": 0.004439842698434328,
      "eval_ag_news_token_set_precision": 0.32660452770689047,
      "eval_ag_news_token_set_recall": 0.382334889985528,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 102500
    },
    {
      "epoch": 19.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.11165625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.038927182508756,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1166048093642569,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6699259281158447,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008529760861153724,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3088090419769287,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.014,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.846,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.674,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.698,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.352530350572962,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20770925195551268,
      "eval_anthropic_toxic_prompts_runtime": 11.0456,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.267,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.091,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3506336904106731,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006533185214663563,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42733017352350927,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3232101722749988,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 102500
    },
    {
      "epoch": 19.68,
      "eval_arxiv_accuracy": 0.34353125,
      "eval_arxiv_bleu_score": 4.381685962133321,
      "eval_arxiv_bleu_score_sem": 0.12864199554866626,
      "eval_arxiv_emb_cos_sim": 0.7539971470832825,
      "eval_arxiv_emb_cos_sim_sem": 0.008171694407095328,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4614906311035156,
      "eval_arxiv_n_ngrams_match_1": 14.888,
      "eval_arxiv_n_ngrams_match_2": 2.958,
      "eval_arxiv_n_ngrams_match_3": 0.682,
      "eval_arxiv_num_pred_words": 40.188,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.864439255410645,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3545552164503677,
      "eval_arxiv_runtime": 12.2991,
      "eval_arxiv_samples_per_second": 40.653,
      "eval_arxiv_steps_per_second": 0.081,
      "eval_arxiv_token_set_f1": 0.35171876288349385,
      "eval_arxiv_token_set_f1_sem": 0.004232513574492042,
      "eval_arxiv_token_set_precision": 0.3019476420818245,
      "eval_arxiv_token_set_recall": 0.4430720504923636,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 102500
    },
    {
      "epoch": 19.68,
      "eval_python_code_alpaca_accuracy": 0.156875,
      "eval_python_code_alpaca_bleu_score": 4.579654516055341,
      "eval_python_code_alpaca_bleu_score_sem": 0.1453812580765918,
      "eval_python_code_alpaca_emb_cos_sim": 0.749637246131897,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01052159984835432,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9196114540100098,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.754,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.87,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.996,
      "eval_python_code_alpaca_num_pred_words": 44.162,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.534084716244298,
      "eval_python_code_alpaca_pred_num_tokens": 62.9921875,
      "eval_python_code_alpaca_rouge_score": 0.3267327343477824,
      "eval_python_code_alpaca_runtime": 10.2488,
      "eval_python_code_alpaca_samples_per_second": 48.786,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4749646198966025,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005584243114920346,
      "eval_python_code_alpaca_token_set_precision": 0.5326308208430273,
      "eval_python_code_alpaca_token_set_recall": 0.4489898794251858,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 102500
    },
    {
      "epoch": 19.68,
      "eval_wikibio_accuracy": 0.31878125,
      "eval_wikibio_bleu_score": 5.8178783076919345,
      "eval_wikibio_bleu_score_sem": 0.2051005964325302,
      "eval_wikibio_emb_cos_sim": 0.7350831031799316,
      "eval_wikibio_emb_cos_sim_sem": 0.009765998392240599,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8103599548339844,
      "eval_wikibio_n_ngrams_match_1": 10.184,
      "eval_wikibio_n_ngrams_match_2": 3.398,
      "eval_wikibio_n_ngrams_match_3": 1.202,
      "eval_wikibio_num_pred_words": 37.13,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.1666939104105,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3548268860363404,
      "eval_wikibio_runtime": 11.5237,
      "eval_wikibio_samples_per_second": 43.389,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.31853611382067815,
      "eval_wikibio_token_set_f1_sem": 0.005338818709232502,
      "eval_wikibio_token_set_precision": 0.331798315823681,
      "eval_wikibio_token_set_recall": 0.321959454461384,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 102500
    },
    {
      "epoch": 19.68,
      "eval_nq_accuracy": 0.52290625,
      "eval_nq_bleu_score": 11.631830093739987,
      "eval_nq_bleu_score_sem": 0.4752784839567903,
      "eval_nq_emb_cos_sim": 0.8363233804702759,
      "eval_nq_emb_cos_sim_sem": 0.006605236317925597,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2226545810699463,
      "eval_nq_n_ngrams_match_1": 23.05,
      "eval_nq_n_ngrams_match_2": 8.416,
      "eval_nq_n_ngrams_match_3": 3.862,
      "eval_nq_num_pred_words": 49.088,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.231804941941066,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4451820902385734,
      "eval_nq_runtime": 10.5124,
      "eval_nq_samples_per_second": 47.563,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.45973034526634554,
      "eval_nq_token_set_f1_sem": 0.004803582203352779,
      "eval_nq_token_set_precision": 0.41600693268306,
      "eval_nq_token_set_recall": 0.5203950705862183,
      "eval_nq_true_num_tokens": 64.0,
      "step": 102500
    },
    {
      "epoch": 19.68,
      "learning_rate": 0.001,
      "loss": 2.59,
      "step": 102504
    },
    {
      "epoch": 19.68,
      "learning_rate": 0.001,
      "loss": 2.6049,
      "step": 102516
    },
    {
      "epoch": 19.69,
      "learning_rate": 0.001,
      "loss": 2.6069,
      "step": 102528
    },
    {
      "epoch": 19.69,
      "learning_rate": 0.001,
      "loss": 2.5974,
      "step": 102540
    },
    {
      "epoch": 19.69,
      "learning_rate": 0.001,
      "loss": 2.6002,
      "step": 102552
    },
    {
      "epoch": 19.69,
      "learning_rate": 0.001,
      "loss": 2.6024,
      "step": 102564
    },
    {
      "epoch": 19.7,
      "learning_rate": 0.001,
      "loss": 2.6021,
      "step": 102576
    },
    {
      "epoch": 19.7,
      "learning_rate": 0.001,
      "loss": 2.6002,
      "step": 102588
    },
    {
      "epoch": 19.7,
      "learning_rate": 0.001,
      "loss": 2.5889,
      "step": 102600
    },
    {
      "epoch": 19.7,
      "learning_rate": 0.001,
      "loss": 2.6066,
      "step": 102612
    },
    {
      "epoch": 19.71,
      "learning_rate": 0.001,
      "loss": 2.6015,
      "step": 102624
    },
    {
      "epoch": 19.71,
      "learning_rate": 0.001,
      "loss": 2.5972,
      "step": 102636
    },
    {
      "epoch": 19.71,
      "learning_rate": 0.001,
      "loss": 2.6029,
      "step": 102648
    },
    {
      "epoch": 19.71,
      "learning_rate": 0.001,
      "loss": 2.599,
      "step": 102660
    },
    {
      "epoch": 19.71,
      "learning_rate": 0.001,
      "loss": 2.5969,
      "step": 102672
    },
    {
      "epoch": 19.72,
      "learning_rate": 0.001,
      "loss": 2.5982,
      "step": 102684
    },
    {
      "epoch": 19.72,
      "learning_rate": 0.001,
      "loss": 2.596,
      "step": 102696
    },
    {
      "epoch": 19.72,
      "learning_rate": 0.001,
      "loss": 2.5877,
      "step": 102708
    },
    {
      "epoch": 19.72,
      "learning_rate": 0.001,
      "loss": 2.6028,
      "step": 102720
    },
    {
      "epoch": 19.73,
      "learning_rate": 0.001,
      "loss": 2.6034,
      "step": 102732
    },
    {
      "epoch": 19.73,
      "learning_rate": 0.001,
      "loss": 2.5918,
      "step": 102744
    },
    {
      "epoch": 19.73,
      "learning_rate": 0.001,
      "loss": 2.5882,
      "step": 102756
    },
    {
      "epoch": 19.73,
      "learning_rate": 0.001,
      "loss": 2.6035,
      "step": 102768
    },
    {
      "epoch": 19.74,
      "learning_rate": 0.001,
      "loss": 2.5902,
      "step": 102780
    },
    {
      "epoch": 19.74,
      "learning_rate": 0.001,
      "loss": 2.6089,
      "step": 102792
    },
    {
      "epoch": 19.74,
      "learning_rate": 0.001,
      "loss": 2.5955,
      "step": 102804
    },
    {
      "epoch": 19.74,
      "learning_rate": 0.001,
      "loss": 2.5986,
      "step": 102816
    },
    {
      "epoch": 19.74,
      "learning_rate": 0.001,
      "loss": 2.5908,
      "step": 102828
    },
    {
      "epoch": 19.75,
      "learning_rate": 0.001,
      "loss": 2.6112,
      "step": 102840
    },
    {
      "epoch": 19.75,
      "learning_rate": 0.001,
      "loss": 2.5974,
      "step": 102852
    },
    {
      "epoch": 19.75,
      "learning_rate": 0.001,
      "loss": 2.6036,
      "step": 102864
    },
    {
      "epoch": 19.75,
      "learning_rate": 0.001,
      "loss": 2.5963,
      "step": 102876
    },
    {
      "epoch": 19.76,
      "learning_rate": 0.001,
      "loss": 2.5966,
      "step": 102888
    },
    {
      "epoch": 19.76,
      "learning_rate": 0.001,
      "loss": 2.6023,
      "step": 102900
    },
    {
      "epoch": 19.76,
      "learning_rate": 0.001,
      "loss": 2.5957,
      "step": 102912
    },
    {
      "epoch": 19.76,
      "learning_rate": 0.001,
      "loss": 2.5988,
      "step": 102924
    },
    {
      "epoch": 19.76,
      "learning_rate": 0.001,
      "loss": 2.5974,
      "step": 102936
    },
    {
      "epoch": 19.77,
      "learning_rate": 0.001,
      "loss": 2.5908,
      "step": 102948
    },
    {
      "epoch": 19.77,
      "learning_rate": 0.001,
      "loss": 2.5999,
      "step": 102960
    },
    {
      "epoch": 19.77,
      "learning_rate": 0.001,
      "loss": 2.6026,
      "step": 102972
    },
    {
      "epoch": 19.77,
      "learning_rate": 0.001,
      "loss": 2.6008,
      "step": 102984
    },
    {
      "epoch": 19.78,
      "learning_rate": 0.001,
      "loss": 2.599,
      "step": 102996
    },
    {
      "epoch": 19.78,
      "learning_rate": 0.001,
      "loss": 2.6026,
      "step": 103008
    },
    {
      "epoch": 19.78,
      "learning_rate": 0.001,
      "loss": 2.6083,
      "step": 103020
    },
    {
      "epoch": 19.78,
      "learning_rate": 0.001,
      "loss": 2.606,
      "step": 103032
    },
    {
      "epoch": 19.79,
      "learning_rate": 0.001,
      "loss": 2.5947,
      "step": 103044
    },
    {
      "epoch": 19.79,
      "learning_rate": 0.001,
      "loss": 2.6033,
      "step": 103056
    },
    {
      "epoch": 19.79,
      "learning_rate": 0.001,
      "loss": 2.6062,
      "step": 103068
    },
    {
      "epoch": 19.79,
      "learning_rate": 0.001,
      "loss": 2.6004,
      "step": 103080
    },
    {
      "epoch": 19.79,
      "learning_rate": 0.001,
      "loss": 2.6022,
      "step": 103092
    },
    {
      "epoch": 19.8,
      "learning_rate": 0.001,
      "loss": 2.6023,
      "step": 103104
    },
    {
      "epoch": 19.8,
      "learning_rate": 0.001,
      "loss": 2.5889,
      "step": 103116
    },
    {
      "epoch": 19.8,
      "eval_ag_news_accuracy": 0.31709375,
      "eval_ag_news_bleu_score": 4.607937684504936,
      "eval_ag_news_bleu_score_sem": 0.14321047998175232,
      "eval_ag_news_emb_cos_sim": 0.802483081817627,
      "eval_ag_news_emb_cos_sim_sem": 0.0072418084883994725,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.601728916168213,
      "eval_ag_news_n_ngrams_match_1": 13.78,
      "eval_ag_news_n_ngrams_match_2": 2.916,
      "eval_ag_news_n_ngrams_match_3": 0.77,
      "eval_ag_news_num_pred_words": 46.534,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.66156445329925,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34030002597252956,
      "eval_ag_news_runtime": 12.3707,
      "eval_ag_news_samples_per_second": 40.418,
      "eval_ag_news_steps_per_second": 0.081,
      "eval_ag_news_token_set_f1": 0.34443955534310594,
      "eval_ag_news_token_set_f1_sem": 0.004312122583042378,
      "eval_ag_news_token_set_precision": 0.3280786270056416,
      "eval_ag_news_token_set_recall": 0.37649928616567674,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 103125
    },
    {
      "epoch": 19.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.11309375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0478889488619254,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12645050526843735,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6633448600769043,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009141090362092114,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2605907917022705,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.052,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.838,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.664,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.392,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.064931539903483,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20870707256624718,
      "eval_anthropic_toxic_prompts_runtime": 9.9192,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.407,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3507397207628983,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006486808460927362,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42381584074695283,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3283925144215763,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 103125
    },
    {
      "epoch": 19.8,
      "eval_arxiv_accuracy": 0.34275,
      "eval_arxiv_bleu_score": 4.288245254818205,
      "eval_arxiv_bleu_score_sem": 0.11968165039450031,
      "eval_arxiv_emb_cos_sim": 0.7569453716278076,
      "eval_arxiv_emb_cos_sim_sem": 0.008195620646159356,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4457015991210938,
      "eval_arxiv_n_ngrams_match_1": 14.974,
      "eval_arxiv_n_ngrams_match_2": 2.942,
      "eval_arxiv_n_ngrams_match_3": 0.648,
      "eval_arxiv_num_pred_words": 40.822,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.365281582821886,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3566035159297212,
      "eval_arxiv_runtime": 10.1589,
      "eval_arxiv_samples_per_second": 49.218,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.34963592612515726,
      "eval_arxiv_token_set_f1_sem": 0.004042362921905531,
      "eval_arxiv_token_set_precision": 0.30014976224361234,
      "eval_arxiv_token_set_recall": 0.4386803905042103,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 103125
    },
    {
      "epoch": 19.8,
      "eval_python_code_alpaca_accuracy": 0.15715625,
      "eval_python_code_alpaca_bleu_score": 4.326761162920418,
      "eval_python_code_alpaca_bleu_score_sem": 0.14400794714798654,
      "eval_python_code_alpaca_emb_cos_sim": 0.7477049231529236,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01158838136519159,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9304184913635254,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.566,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.766,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.856,
      "eval_python_code_alpaca_num_pred_words": 43.656,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.73546948845584,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3205807638372131,
      "eval_python_code_alpaca_runtime": 10.042,
      "eval_python_code_alpaca_samples_per_second": 49.791,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.46741444420144385,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006132715892511715,
      "eval_python_code_alpaca_token_set_precision": 0.5229858419161856,
      "eval_python_code_alpaca_token_set_recall": 0.4422477184574977,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 103125
    },
    {
      "epoch": 19.8,
      "eval_wikibio_accuracy": 0.31846875,
      "eval_wikibio_bleu_score": 5.731702906131469,
      "eval_wikibio_bleu_score_sem": 0.20794995462695767,
      "eval_wikibio_emb_cos_sim": 0.7460880875587463,
      "eval_wikibio_emb_cos_sim_sem": 0.00848050700031268,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.798103094100952,
      "eval_wikibio_n_ngrams_match_1": 9.7,
      "eval_wikibio_n_ngrams_match_2": 3.188,
      "eval_wikibio_n_ngrams_match_3": 1.146,
      "eval_wikibio_num_pred_words": 35.244,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.616470924976525,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34378730685016656,
      "eval_wikibio_runtime": 9.9569,
      "eval_wikibio_samples_per_second": 50.216,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.30734571230261204,
      "eval_wikibio_token_set_f1_sem": 0.005821506104100853,
      "eval_wikibio_token_set_precision": 0.3133343318682684,
      "eval_wikibio_token_set_recall": 0.3177849725596456,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 103125
    },
    {
      "epoch": 19.8,
      "eval_nq_accuracy": 0.52165625,
      "eval_nq_bleu_score": 11.19061555193629,
      "eval_nq_bleu_score_sem": 0.46693828374467233,
      "eval_nq_emb_cos_sim": 0.8303855657577515,
      "eval_nq_emb_cos_sim_sem": 0.007060565186001485,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.220346689224243,
      "eval_nq_n_ngrams_match_1": 22.662,
      "eval_nq_n_ngrams_match_2": 8.196,
      "eval_nq_n_ngrams_match_3": 3.686,
      "eval_nq_num_pred_words": 49.054,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.210523501671728,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.43750425848509755,
      "eval_nq_runtime": 10.4023,
      "eval_nq_samples_per_second": 48.066,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.45334682762444267,
      "eval_nq_token_set_f1_sem": 0.0049616936657383695,
      "eval_nq_token_set_precision": 0.4100297675442324,
      "eval_nq_token_set_recall": 0.5151700213104357,
      "eval_nq_true_num_tokens": 64.0,
      "step": 103125
    },
    {
      "epoch": 19.8,
      "learning_rate": 0.001,
      "loss": 2.5988,
      "step": 103128
    },
    {
      "epoch": 19.8,
      "learning_rate": 0.001,
      "loss": 2.6066,
      "step": 103140
    },
    {
      "epoch": 19.81,
      "learning_rate": 0.001,
      "loss": 2.6019,
      "step": 103152
    },
    {
      "epoch": 19.81,
      "learning_rate": 0.001,
      "loss": 2.5935,
      "step": 103164
    },
    {
      "epoch": 19.81,
      "learning_rate": 0.001,
      "loss": 2.5946,
      "step": 103176
    },
    {
      "epoch": 19.81,
      "learning_rate": 0.001,
      "loss": 2.6025,
      "step": 103188
    },
    {
      "epoch": 19.82,
      "learning_rate": 0.001,
      "loss": 2.6019,
      "step": 103200
    },
    {
      "epoch": 19.82,
      "learning_rate": 0.001,
      "loss": 2.5965,
      "step": 103212
    },
    {
      "epoch": 19.82,
      "learning_rate": 0.001,
      "loss": 2.6026,
      "step": 103224
    },
    {
      "epoch": 19.82,
      "learning_rate": 0.001,
      "loss": 2.5891,
      "step": 103236
    },
    {
      "epoch": 19.82,
      "learning_rate": 0.001,
      "loss": 2.601,
      "step": 103248
    },
    {
      "epoch": 19.83,
      "learning_rate": 0.001,
      "loss": 2.5996,
      "step": 103260
    },
    {
      "epoch": 19.83,
      "learning_rate": 0.001,
      "loss": 2.5999,
      "step": 103272
    },
    {
      "epoch": 19.83,
      "learning_rate": 0.001,
      "loss": 2.5884,
      "step": 103284
    },
    {
      "epoch": 19.83,
      "learning_rate": 0.001,
      "loss": 2.6001,
      "step": 103296
    },
    {
      "epoch": 19.84,
      "learning_rate": 0.001,
      "loss": 2.5985,
      "step": 103308
    },
    {
      "epoch": 19.84,
      "learning_rate": 0.001,
      "loss": 2.6041,
      "step": 103320
    },
    {
      "epoch": 19.84,
      "learning_rate": 0.001,
      "loss": 2.5999,
      "step": 103332
    },
    {
      "epoch": 19.84,
      "learning_rate": 0.001,
      "loss": 2.5856,
      "step": 103344
    },
    {
      "epoch": 19.85,
      "learning_rate": 0.001,
      "loss": 2.5925,
      "step": 103356
    },
    {
      "epoch": 19.85,
      "learning_rate": 0.001,
      "loss": 2.6027,
      "step": 103368
    },
    {
      "epoch": 19.85,
      "learning_rate": 0.001,
      "loss": 2.5984,
      "step": 103380
    },
    {
      "epoch": 19.85,
      "learning_rate": 0.001,
      "loss": 2.6,
      "step": 103392
    },
    {
      "epoch": 19.85,
      "learning_rate": 0.001,
      "loss": 2.5915,
      "step": 103404
    },
    {
      "epoch": 19.86,
      "learning_rate": 0.001,
      "loss": 2.5929,
      "step": 103416
    },
    {
      "epoch": 19.86,
      "learning_rate": 0.001,
      "loss": 2.5969,
      "step": 103428
    },
    {
      "epoch": 19.86,
      "learning_rate": 0.001,
      "loss": 2.6028,
      "step": 103440
    },
    {
      "epoch": 19.86,
      "learning_rate": 0.001,
      "loss": 2.6007,
      "step": 103452
    },
    {
      "epoch": 19.87,
      "learning_rate": 0.001,
      "loss": 2.6043,
      "step": 103464
    },
    {
      "epoch": 19.87,
      "learning_rate": 0.001,
      "loss": 2.597,
      "step": 103476
    },
    {
      "epoch": 19.87,
      "learning_rate": 0.001,
      "loss": 2.6008,
      "step": 103488
    },
    {
      "epoch": 19.87,
      "learning_rate": 0.001,
      "loss": 2.601,
      "step": 103500
    },
    {
      "epoch": 19.88,
      "learning_rate": 0.001,
      "loss": 2.6046,
      "step": 103512
    },
    {
      "epoch": 19.88,
      "learning_rate": 0.001,
      "loss": 2.5902,
      "step": 103524
    },
    {
      "epoch": 19.88,
      "learning_rate": 0.001,
      "loss": 2.5969,
      "step": 103536
    },
    {
      "epoch": 19.88,
      "learning_rate": 0.001,
      "loss": 2.6042,
      "step": 103548
    },
    {
      "epoch": 19.88,
      "learning_rate": 0.001,
      "loss": 2.5924,
      "step": 103560
    },
    {
      "epoch": 19.89,
      "learning_rate": 0.001,
      "loss": 2.604,
      "step": 103572
    },
    {
      "epoch": 19.89,
      "learning_rate": 0.001,
      "loss": 2.5983,
      "step": 103584
    },
    {
      "epoch": 19.89,
      "learning_rate": 0.001,
      "loss": 2.5933,
      "step": 103596
    },
    {
      "epoch": 19.89,
      "learning_rate": 0.001,
      "loss": 2.6005,
      "step": 103608
    },
    {
      "epoch": 19.9,
      "learning_rate": 0.001,
      "loss": 2.5921,
      "step": 103620
    },
    {
      "epoch": 19.9,
      "learning_rate": 0.001,
      "loss": 2.5911,
      "step": 103632
    },
    {
      "epoch": 19.9,
      "learning_rate": 0.001,
      "loss": 2.607,
      "step": 103644
    },
    {
      "epoch": 19.9,
      "learning_rate": 0.001,
      "loss": 2.6071,
      "step": 103656
    },
    {
      "epoch": 19.91,
      "learning_rate": 0.001,
      "loss": 2.6034,
      "step": 103668
    },
    {
      "epoch": 19.91,
      "learning_rate": 0.001,
      "loss": 2.591,
      "step": 103680
    },
    {
      "epoch": 19.91,
      "learning_rate": 0.001,
      "loss": 2.6005,
      "step": 103692
    },
    {
      "epoch": 19.91,
      "learning_rate": 0.001,
      "loss": 2.6062,
      "step": 103704
    },
    {
      "epoch": 19.91,
      "learning_rate": 0.001,
      "loss": 2.594,
      "step": 103716
    },
    {
      "epoch": 19.92,
      "learning_rate": 0.001,
      "loss": 2.5969,
      "step": 103728
    },
    {
      "epoch": 19.92,
      "learning_rate": 0.001,
      "loss": 2.595,
      "step": 103740
    },
    {
      "epoch": 19.92,
      "eval_ag_news_accuracy": 0.3181875,
      "eval_ag_news_bleu_score": 4.715667647533455,
      "eval_ag_news_bleu_score_sem": 0.15630175575770888,
      "eval_ag_news_emb_cos_sim": 0.8031496405601501,
      "eval_ag_news_emb_cos_sim_sem": 0.006938623816547341,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5962557792663574,
      "eval_ag_news_n_ngrams_match_1": 13.736,
      "eval_ag_news_n_ngrams_match_2": 2.966,
      "eval_ag_news_n_ngrams_match_3": 0.84,
      "eval_ag_news_num_pred_words": 46.312,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.46145879436054,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34195502514304654,
      "eval_ag_news_runtime": 10.6348,
      "eval_ag_news_samples_per_second": 47.015,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.3434867870513179,
      "eval_ag_news_token_set_f1_sem": 0.004414470275763657,
      "eval_ag_news_token_set_precision": 0.32764298635347155,
      "eval_ag_news_token_set_recall": 0.3793269588079093,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 103750
    },
    {
      "epoch": 19.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.1115625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.018182871168191,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11276513876030779,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6696054935455322,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00884096858074661,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3192715644836426,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.056,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.83,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.0,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.640209113537587,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21006779873229692,
      "eval_anthropic_toxic_prompts_runtime": 9.9518,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.242,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3520671543164431,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006309386139710659,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43008164766729573,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3258277909550858,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 103750
    },
    {
      "epoch": 19.92,
      "eval_arxiv_accuracy": 0.34215625,
      "eval_arxiv_bleu_score": 4.176736975809541,
      "eval_arxiv_bleu_score_sem": 0.12467495953943461,
      "eval_arxiv_emb_cos_sim": 0.7531987428665161,
      "eval_arxiv_emb_cos_sim_sem": 0.0075409055744905555,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.453075408935547,
      "eval_arxiv_n_ngrams_match_1": 14.544,
      "eval_arxiv_n_ngrams_match_2": 2.8,
      "eval_arxiv_n_ngrams_match_3": 0.638,
      "eval_arxiv_num_pred_words": 39.818,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.59741801711919,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.34731129323909604,
      "eval_arxiv_runtime": 10.0994,
      "eval_arxiv_samples_per_second": 49.508,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.34377904849398316,
      "eval_arxiv_token_set_f1_sem": 0.004463704423932529,
      "eval_arxiv_token_set_precision": 0.29239258164571863,
      "eval_arxiv_token_set_recall": 0.4403505401885056,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 103750
    },
    {
      "epoch": 19.92,
      "eval_python_code_alpaca_accuracy": 0.1574375,
      "eval_python_code_alpaca_bleu_score": 4.403554212258782,
      "eval_python_code_alpaca_bleu_score_sem": 0.13585818842600816,
      "eval_python_code_alpaca_emb_cos_sim": 0.7534340023994446,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008355750353086909,
      "eval_python_code_alpaca_emb_top1_equal": 0.078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9384117126464844,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.674,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.866,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.934,
      "eval_python_code_alpaca_num_pred_words": 43.554,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.885826359242937,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3256599766462427,
      "eval_python_code_alpaca_runtime": 9.6925,
      "eval_python_code_alpaca_samples_per_second": 51.586,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.4706860314796137,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005952787845075403,
      "eval_python_code_alpaca_token_set_precision": 0.528415100865822,
      "eval_python_code_alpaca_token_set_recall": 0.4505679422166955,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 103750
    },
    {
      "epoch": 19.92,
      "eval_wikibio_accuracy": 0.31825,
      "eval_wikibio_bleu_score": 5.666073048865579,
      "eval_wikibio_bleu_score_sem": 0.21024273837489288,
      "eval_wikibio_emb_cos_sim": 0.7306700944900513,
      "eval_wikibio_emb_cos_sim_sem": 0.010345679561214356,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7904253005981445,
      "eval_wikibio_n_ngrams_match_1": 9.45,
      "eval_wikibio_n_ngrams_match_2": 3.178,
      "eval_wikibio_n_ngrams_match_3": 1.176,
      "eval_wikibio_num_pred_words": 34.446,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.27522655262205,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33124776114616805,
      "eval_wikibio_runtime": 10.5162,
      "eval_wikibio_samples_per_second": 47.546,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.30305201187485425,
      "eval_wikibio_token_set_f1_sem": 0.006091675061500554,
      "eval_wikibio_token_set_precision": 0.305604218844721,
      "eval_wikibio_token_set_recall": 0.31958889759391196,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 103750
    },
    {
      "epoch": 19.92,
      "eval_nq_accuracy": 0.523625,
      "eval_nq_bleu_score": 11.562758117945734,
      "eval_nq_bleu_score_sem": 0.494669787310588,
      "eval_nq_emb_cos_sim": 0.827433168888092,
      "eval_nq_emb_cos_sim_sem": 0.008588306126457895,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2214205265045166,
      "eval_nq_n_ngrams_match_1": 22.974,
      "eval_nq_n_ngrams_match_2": 8.374,
      "eval_nq_n_ngrams_match_3": 3.82,
      "eval_nq_num_pred_words": 48.762,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.220419417529406,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44541622854136464,
      "eval_nq_runtime": 10.4349,
      "eval_nq_samples_per_second": 47.916,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4598731638988288,
      "eval_nq_token_set_f1_sem": 0.005011778048939061,
      "eval_nq_token_set_precision": 0.41685872662466955,
      "eval_nq_token_set_recall": 0.5217251813728859,
      "eval_nq_true_num_tokens": 64.0,
      "step": 103750
    },
    {
      "epoch": 19.92,
      "learning_rate": 0.001,
      "loss": 2.6056,
      "step": 103752
    },
    {
      "epoch": 19.92,
      "learning_rate": 0.001,
      "loss": 2.5962,
      "step": 103764
    },
    {
      "epoch": 19.93,
      "learning_rate": 0.001,
      "loss": 2.6014,
      "step": 103776
    },
    {
      "epoch": 19.93,
      "learning_rate": 0.001,
      "loss": 2.5973,
      "step": 103788
    },
    {
      "epoch": 19.93,
      "learning_rate": 0.001,
      "loss": 2.5865,
      "step": 103800
    },
    {
      "epoch": 19.93,
      "learning_rate": 0.001,
      "loss": 2.6059,
      "step": 103812
    },
    {
      "epoch": 19.94,
      "learning_rate": 0.001,
      "loss": 2.5984,
      "step": 103824
    },
    {
      "epoch": 19.94,
      "learning_rate": 0.001,
      "loss": 2.5989,
      "step": 103836
    },
    {
      "epoch": 19.94,
      "learning_rate": 0.001,
      "loss": 2.6043,
      "step": 103848
    },
    {
      "epoch": 19.94,
      "learning_rate": 0.001,
      "loss": 2.5983,
      "step": 103860
    },
    {
      "epoch": 19.94,
      "learning_rate": 0.001,
      "loss": 2.6014,
      "step": 103872
    },
    {
      "epoch": 19.95,
      "learning_rate": 0.001,
      "loss": 2.6042,
      "step": 103884
    },
    {
      "epoch": 19.95,
      "learning_rate": 0.001,
      "loss": 2.5932,
      "step": 103896
    },
    {
      "epoch": 19.95,
      "learning_rate": 0.001,
      "loss": 2.5925,
      "step": 103908
    },
    {
      "epoch": 19.95,
      "learning_rate": 0.001,
      "loss": 2.5947,
      "step": 103920
    },
    {
      "epoch": 19.96,
      "learning_rate": 0.001,
      "loss": 2.5945,
      "step": 103932
    },
    {
      "epoch": 19.96,
      "learning_rate": 0.001,
      "loss": 2.5927,
      "step": 103944
    },
    {
      "epoch": 19.96,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 103956
    },
    {
      "epoch": 19.96,
      "learning_rate": 0.001,
      "loss": 2.5943,
      "step": 103968
    },
    {
      "epoch": 19.97,
      "learning_rate": 0.001,
      "loss": 2.5984,
      "step": 103980
    },
    {
      "epoch": 19.97,
      "learning_rate": 0.001,
      "loss": 2.6047,
      "step": 103992
    },
    {
      "epoch": 19.97,
      "learning_rate": 0.001,
      "loss": 2.6054,
      "step": 104004
    },
    {
      "epoch": 19.97,
      "learning_rate": 0.001,
      "loss": 2.6002,
      "step": 104016
    },
    {
      "epoch": 19.97,
      "learning_rate": 0.001,
      "loss": 2.5994,
      "step": 104028
    },
    {
      "epoch": 19.98,
      "learning_rate": 0.001,
      "loss": 2.6086,
      "step": 104040
    },
    {
      "epoch": 19.98,
      "learning_rate": 0.001,
      "loss": 2.5946,
      "step": 104052
    },
    {
      "epoch": 19.98,
      "learning_rate": 0.001,
      "loss": 2.5961,
      "step": 104064
    },
    {
      "epoch": 19.98,
      "learning_rate": 0.001,
      "loss": 2.597,
      "step": 104076
    },
    {
      "epoch": 19.99,
      "learning_rate": 0.001,
      "loss": 2.6066,
      "step": 104088
    },
    {
      "epoch": 19.99,
      "learning_rate": 0.001,
      "loss": 2.6019,
      "step": 104100
    },
    {
      "epoch": 19.99,
      "learning_rate": 0.001,
      "loss": 2.5918,
      "step": 104112
    },
    {
      "epoch": 19.99,
      "learning_rate": 0.001,
      "loss": 2.596,
      "step": 104124
    },
    {
      "epoch": 20.0,
      "learning_rate": 0.001,
      "loss": 2.5922,
      "step": 104136
    },
    {
      "epoch": 20.0,
      "learning_rate": 0.001,
      "loss": 2.6008,
      "step": 104148
    },
    {
      "epoch": 20.0,
      "learning_rate": 0.001,
      "loss": 2.5896,
      "step": 104160
    },
    {
      "epoch": 20.0,
      "learning_rate": 0.001,
      "loss": 2.5917,
      "step": 104172
    },
    {
      "epoch": 20.0,
      "learning_rate": 0.001,
      "loss": 2.5753,
      "step": 104184
    },
    {
      "epoch": 20.01,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 104196
    },
    {
      "epoch": 20.01,
      "learning_rate": 0.001,
      "loss": 2.5833,
      "step": 104208
    },
    {
      "epoch": 20.01,
      "learning_rate": 0.001,
      "loss": 2.581,
      "step": 104220
    },
    {
      "epoch": 20.01,
      "learning_rate": 0.001,
      "loss": 2.5944,
      "step": 104232
    },
    {
      "epoch": 20.02,
      "learning_rate": 0.001,
      "loss": 2.5791,
      "step": 104244
    },
    {
      "epoch": 20.02,
      "learning_rate": 0.001,
      "loss": 2.5846,
      "step": 104256
    },
    {
      "epoch": 20.02,
      "learning_rate": 0.001,
      "loss": 2.5908,
      "step": 104268
    },
    {
      "epoch": 20.02,
      "learning_rate": 0.001,
      "loss": 2.5882,
      "step": 104280
    },
    {
      "epoch": 20.03,
      "learning_rate": 0.001,
      "loss": 2.594,
      "step": 104292
    },
    {
      "epoch": 20.03,
      "learning_rate": 0.001,
      "loss": 2.5724,
      "step": 104304
    },
    {
      "epoch": 20.03,
      "learning_rate": 0.001,
      "loss": 2.5755,
      "step": 104316
    },
    {
      "epoch": 20.03,
      "learning_rate": 0.001,
      "loss": 2.5702,
      "step": 104328
    },
    {
      "epoch": 20.03,
      "learning_rate": 0.001,
      "loss": 2.5751,
      "step": 104340
    },
    {
      "epoch": 20.04,
      "learning_rate": 0.001,
      "loss": 2.5809,
      "step": 104352
    },
    {
      "epoch": 20.04,
      "learning_rate": 0.001,
      "loss": 2.5806,
      "step": 104364
    },
    {
      "epoch": 20.04,
      "eval_ag_news_accuracy": 0.31740625,
      "eval_ag_news_bleu_score": 4.883588666029329,
      "eval_ag_news_bleu_score_sem": 0.15723113542537812,
      "eval_ag_news_emb_cos_sim": 0.8079344034194946,
      "eval_ag_news_emb_cos_sim_sem": 0.007645888105775129,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5977940559387207,
      "eval_ag_news_n_ngrams_match_1": 14.032,
      "eval_ag_news_n_ngrams_match_2": 3.154,
      "eval_ag_news_n_ngrams_match_3": 0.89,
      "eval_ag_news_num_pred_words": 46.416,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.51758976727882,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3469472651954559,
      "eval_ag_news_runtime": 11.2562,
      "eval_ag_news_samples_per_second": 44.42,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.3525941620996267,
      "eval_ag_news_token_set_f1_sem": 0.004374450540016678,
      "eval_ag_news_token_set_precision": 0.3365449646854891,
      "eval_ag_news_token_set_recall": 0.38861080053122043,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 104375
    },
    {
      "epoch": 20.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.11059375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9586549768073547,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11195752254001154,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6649320721626282,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008099481050527975,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3028931617736816,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.094,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.834,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.654,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.154,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.191193752290268,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21091802648932717,
      "eval_anthropic_toxic_prompts_runtime": 10.8255,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.187,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.092,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35448495895375165,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006407197503116182,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43185959562365966,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32764787968156023,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 104375
    },
    {
      "epoch": 20.04,
      "eval_arxiv_accuracy": 0.34171875,
      "eval_arxiv_bleu_score": 4.284346862346857,
      "eval_arxiv_bleu_score_sem": 0.12288335203105014,
      "eval_arxiv_emb_cos_sim": 0.7581665515899658,
      "eval_arxiv_emb_cos_sim_sem": 0.009112410748769466,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4507367610931396,
      "eval_arxiv_n_ngrams_match_1": 15.064,
      "eval_arxiv_n_ngrams_match_2": 2.904,
      "eval_arxiv_n_ngrams_match_3": 0.636,
      "eval_arxiv_num_pred_words": 41.05,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.52360912379246,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3551188882625711,
      "eval_arxiv_runtime": 10.1463,
      "eval_arxiv_samples_per_second": 49.279,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3484516683462858,
      "eval_arxiv_token_set_f1_sem": 0.004204622078277267,
      "eval_arxiv_token_set_precision": 0.30191216204063465,
      "eval_arxiv_token_set_recall": 0.429590681575259,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 104375
    },
    {
      "epoch": 20.04,
      "eval_python_code_alpaca_accuracy": 0.1565625,
      "eval_python_code_alpaca_bleu_score": 4.4566096427278215,
      "eval_python_code_alpaca_bleu_score_sem": 0.1318644154584627,
      "eval_python_code_alpaca_emb_cos_sim": 0.739457368850708,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011631139343005893,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9366791248321533,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.678,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.822,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.916,
      "eval_python_code_alpaca_num_pred_words": 43.132,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.85313333657401,
      "eval_python_code_alpaca_pred_num_tokens": 62.9921875,
      "eval_python_code_alpaca_rouge_score": 0.3266450614411218,
      "eval_python_code_alpaca_runtime": 10.9889,
      "eval_python_code_alpaca_samples_per_second": 45.5,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4623365543211727,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00579049073921156,
      "eval_python_code_alpaca_token_set_precision": 0.5247487697138802,
      "eval_python_code_alpaca_token_set_recall": 0.43969554453997806,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 104375
    },
    {
      "epoch": 20.04,
      "eval_wikibio_accuracy": 0.32203125,
      "eval_wikibio_bleu_score": 5.693586258426564,
      "eval_wikibio_bleu_score_sem": 0.21401222196896194,
      "eval_wikibio_emb_cos_sim": 0.7210272550582886,
      "eval_wikibio_emb_cos_sim_sem": 0.011277541771978667,
      "eval_wikibio_emb_top1_equal": 0.09375,
      "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7538135051727295,
      "eval_wikibio_n_ngrams_match_1": 9.782,
      "eval_wikibio_n_ngrams_match_2": 3.264,
      "eval_wikibio_n_ngrams_match_3": 1.184,
      "eval_wikibio_num_pred_words": 35.648,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.68354594788247,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.33653755581064637,
      "eval_wikibio_runtime": 10.6279,
      "eval_wikibio_samples_per_second": 47.046,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.30889792740431826,
      "eval_wikibio_token_set_f1_sem": 0.006045139571051343,
      "eval_wikibio_token_set_precision": 0.3135824512940988,
      "eval_wikibio_token_set_recall": 0.32193176092263764,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 104375
    },
    {
      "epoch": 20.04,
      "eval_nq_accuracy": 0.52159375,
      "eval_nq_bleu_score": 11.51127286391578,
      "eval_nq_bleu_score_sem": 0.4610427434499334,
      "eval_nq_emb_cos_sim": 0.8304370641708374,
      "eval_nq_emb_cos_sim_sem": 0.006957488482523013,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2188475131988525,
      "eval_nq_n_ngrams_match_1": 22.922,
      "eval_nq_n_ngrams_match_2": 8.428,
      "eval_nq_n_ngrams_match_3": 3.796,
      "eval_nq_num_pred_words": 48.972,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.196725650944487,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4410085262379106,
      "eval_nq_runtime": 10.6301,
      "eval_nq_samples_per_second": 47.036,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.45868090157178976,
      "eval_nq_token_set_f1_sem": 0.004903544929834385,
      "eval_nq_token_set_precision": 0.4157491712440606,
      "eval_nq_token_set_recall": 0.5192416185417322,
      "eval_nq_true_num_tokens": 64.0,
      "step": 104375
    },
    {
      "epoch": 20.04,
      "learning_rate": 0.001,
      "loss": 2.5943,
      "step": 104376
    },
    {
      "epoch": 20.04,
      "learning_rate": 0.001,
      "loss": 2.5822,
      "step": 104388
    },
    {
      "epoch": 20.05,
      "learning_rate": 0.001,
      "loss": 2.5842,
      "step": 104400
    },
    {
      "epoch": 20.05,
      "learning_rate": 0.001,
      "loss": 2.5877,
      "step": 104412
    },
    {
      "epoch": 20.05,
      "learning_rate": 0.001,
      "loss": 2.5825,
      "step": 104424
    },
    {
      "epoch": 20.05,
      "learning_rate": 0.001,
      "loss": 2.5847,
      "step": 104436
    },
    {
      "epoch": 20.06,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 104448
    },
    {
      "epoch": 20.06,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 104460
    },
    {
      "epoch": 20.06,
      "learning_rate": 0.001,
      "loss": 2.5753,
      "step": 104472
    },
    {
      "epoch": 20.06,
      "learning_rate": 0.001,
      "loss": 2.5832,
      "step": 104484
    },
    {
      "epoch": 20.06,
      "learning_rate": 0.001,
      "loss": 2.5793,
      "step": 104496
    },
    {
      "epoch": 20.07,
      "learning_rate": 0.001,
      "loss": 2.5809,
      "step": 104508
    },
    {
      "epoch": 20.07,
      "learning_rate": 0.001,
      "loss": 2.5828,
      "step": 104520
    },
    {
      "epoch": 20.07,
      "learning_rate": 0.001,
      "loss": 2.5648,
      "step": 104532
    },
    {
      "epoch": 20.07,
      "learning_rate": 0.001,
      "loss": 2.5822,
      "step": 104544
    },
    {
      "epoch": 20.08,
      "learning_rate": 0.001,
      "loss": 2.5809,
      "step": 104556
    },
    {
      "epoch": 20.08,
      "learning_rate": 0.001,
      "loss": 2.5846,
      "step": 104568
    },
    {
      "epoch": 20.08,
      "learning_rate": 0.001,
      "loss": 2.5818,
      "step": 104580
    },
    {
      "epoch": 20.08,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 104592
    },
    {
      "epoch": 20.09,
      "learning_rate": 0.001,
      "loss": 2.5708,
      "step": 104604
    },
    {
      "epoch": 20.09,
      "learning_rate": 0.001,
      "loss": 2.5804,
      "step": 104616
    },
    {
      "epoch": 20.09,
      "learning_rate": 0.001,
      "loss": 2.5734,
      "step": 104628
    },
    {
      "epoch": 20.09,
      "learning_rate": 0.001,
      "loss": 2.59,
      "step": 104640
    },
    {
      "epoch": 20.09,
      "learning_rate": 0.001,
      "loss": 2.5806,
      "step": 104652
    },
    {
      "epoch": 20.1,
      "learning_rate": 0.001,
      "loss": 2.5679,
      "step": 104664
    },
    {
      "epoch": 20.1,
      "learning_rate": 0.001,
      "loss": 2.5897,
      "step": 104676
    },
    {
      "epoch": 20.1,
      "learning_rate": 0.001,
      "loss": 2.5846,
      "step": 104688
    },
    {
      "epoch": 20.1,
      "learning_rate": 0.001,
      "loss": 2.5961,
      "step": 104700
    },
    {
      "epoch": 20.11,
      "learning_rate": 0.001,
      "loss": 2.582,
      "step": 104712
    },
    {
      "epoch": 20.11,
      "learning_rate": 0.001,
      "loss": 2.5898,
      "step": 104724
    },
    {
      "epoch": 20.11,
      "learning_rate": 0.001,
      "loss": 2.5806,
      "step": 104736
    },
    {
      "epoch": 20.11,
      "learning_rate": 0.001,
      "loss": 2.5939,
      "step": 104748
    },
    {
      "epoch": 20.12,
      "learning_rate": 0.001,
      "loss": 2.5822,
      "step": 104760
    },
    {
      "epoch": 20.12,
      "learning_rate": 0.001,
      "loss": 2.5928,
      "step": 104772
    },
    {
      "epoch": 20.12,
      "learning_rate": 0.001,
      "loss": 2.5863,
      "step": 104784
    },
    {
      "epoch": 20.12,
      "learning_rate": 0.001,
      "loss": 2.5867,
      "step": 104796
    },
    {
      "epoch": 20.12,
      "learning_rate": 0.001,
      "loss": 2.5942,
      "step": 104808
    },
    {
      "epoch": 20.13,
      "learning_rate": 0.001,
      "loss": 2.5794,
      "step": 104820
    },
    {
      "epoch": 20.13,
      "learning_rate": 0.001,
      "loss": 2.5802,
      "step": 104832
    },
    {
      "epoch": 20.13,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 104844
    },
    {
      "epoch": 20.13,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 104856
    },
    {
      "epoch": 20.14,
      "learning_rate": 0.001,
      "loss": 2.5831,
      "step": 104868
    },
    {
      "epoch": 20.14,
      "learning_rate": 0.001,
      "loss": 2.5878,
      "step": 104880
    },
    {
      "epoch": 20.14,
      "learning_rate": 0.001,
      "loss": 2.5879,
      "step": 104892
    },
    {
      "epoch": 20.14,
      "learning_rate": 0.001,
      "loss": 2.5904,
      "step": 104904
    },
    {
      "epoch": 20.15,
      "learning_rate": 0.001,
      "loss": 2.5825,
      "step": 104916
    },
    {
      "epoch": 20.15,
      "learning_rate": 0.001,
      "loss": 2.5799,
      "step": 104928
    },
    {
      "epoch": 20.15,
      "learning_rate": 0.001,
      "loss": 2.5824,
      "step": 104940
    },
    {
      "epoch": 20.15,
      "learning_rate": 0.001,
      "loss": 2.5853,
      "step": 104952
    },
    {
      "epoch": 20.15,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 104964
    },
    {
      "epoch": 20.16,
      "learning_rate": 0.001,
      "loss": 2.5789,
      "step": 104976
    },
    {
      "epoch": 20.16,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 104988
    },
    {
      "epoch": 20.16,
      "learning_rate": 0.001,
      "loss": 2.5932,
      "step": 105000
    },
    {
      "epoch": 20.16,
      "eval_ag_news_accuracy": 0.319,
      "eval_ag_news_bleu_score": 4.605429809274004,
      "eval_ag_news_bleu_score_sem": 0.14796464979030508,
      "eval_ag_news_emb_cos_sim": 0.7997758388519287,
      "eval_ag_news_emb_cos_sim_sem": 0.008311400014652305,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.59835147857666,
      "eval_ag_news_n_ngrams_match_1": 13.786,
      "eval_ag_news_n_ngrams_match_2": 2.982,
      "eval_ag_news_n_ngrams_match_3": 0.814,
      "eval_ag_news_num_pred_words": 46.552,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.53795117292509,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3422152509367118,
      "eval_ag_news_runtime": 10.3613,
      "eval_ag_news_samples_per_second": 48.256,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3448444164429131,
      "eval_ag_news_token_set_f1_sem": 0.004375114955950184,
      "eval_ag_news_token_set_precision": 0.3272199511481177,
      "eval_ag_news_token_set_recall": 0.383210601694993,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 105000
    },
    {
      "epoch": 20.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.1111875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.913957418262643,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11354240886446086,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6618475914001465,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008768023581230888,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.27228045463562,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.968,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.81,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.664,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.572,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.37140962678605,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20260705874664958,
      "eval_anthropic_toxic_prompts_runtime": 10.2024,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.008,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35556414073326476,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006566697430293288,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4219906214910666,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3388754652470632,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 105000
    },
    {
      "epoch": 20.16,
      "eval_arxiv_accuracy": 0.34475,
      "eval_arxiv_bleu_score": 4.208309160536787,
      "eval_arxiv_bleu_score_sem": 0.12308498120915533,
      "eval_arxiv_emb_cos_sim": 0.7577486634254456,
      "eval_arxiv_emb_cos_sim_sem": 0.006845360743326828,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.426072359085083,
      "eval_arxiv_n_ngrams_match_1": 14.822,
      "eval_arxiv_n_ngrams_match_2": 2.89,
      "eval_arxiv_n_ngrams_match_3": 0.636,
      "eval_arxiv_num_pred_words": 40.15,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.755608229957517,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3521257509215361,
      "eval_arxiv_runtime": 10.2874,
      "eval_arxiv_samples_per_second": 48.603,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.34755364501066693,
      "eval_arxiv_token_set_f1_sem": 0.004244639601194786,
      "eval_arxiv_token_set_precision": 0.29681714022247974,
      "eval_arxiv_token_set_recall": 0.439418931454802,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 105000
    },
    {
      "epoch": 20.16,
      "eval_python_code_alpaca_accuracy": 0.15621875,
      "eval_python_code_alpaca_bleu_score": 4.263972644966267,
      "eval_python_code_alpaca_bleu_score_sem": 0.13122937793061254,
      "eval_python_code_alpaca_emb_cos_sim": 0.7547144293785095,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009145565830568364,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.944267511367798,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.67,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.85,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.91,
      "eval_python_code_alpaca_num_pred_words": 46.294,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.996742391121284,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3192542589726646,
      "eval_python_code_alpaca_runtime": 10.4067,
      "eval_python_code_alpaca_samples_per_second": 48.046,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.47121015489205725,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005519328262978709,
      "eval_python_code_alpaca_token_set_precision": 0.5266123175871972,
      "eval_python_code_alpaca_token_set_recall": 0.4544299950246113,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 105000
    },
    {
      "epoch": 20.16,
      "eval_wikibio_accuracy": 0.32009375,
      "eval_wikibio_bleu_score": 5.812003918938086,
      "eval_wikibio_bleu_score_sem": 0.2018049464266148,
      "eval_wikibio_emb_cos_sim": 0.733515739440918,
      "eval_wikibio_emb_cos_sim_sem": 0.01020074642735301,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.782012701034546,
      "eval_wikibio_n_ngrams_match_1": 10.268,
      "eval_wikibio_n_ngrams_match_2": 3.452,
      "eval_wikibio_n_ngrams_match_3": 1.258,
      "eval_wikibio_num_pred_words": 37.706,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.90431913631204,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35194199644964386,
      "eval_wikibio_runtime": 10.1156,
      "eval_wikibio_samples_per_second": 49.428,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3190888122402507,
      "eval_wikibio_token_set_f1_sem": 0.005576558664879203,
      "eval_wikibio_token_set_precision": 0.33124807985049276,
      "eval_wikibio_token_set_recall": 0.321457263469454,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 105000
    },
    {
      "epoch": 20.16,
      "eval_nq_accuracy": 0.52303125,
      "eval_nq_bleu_score": 11.609777055328264,
      "eval_nq_bleu_score_sem": 0.4788533672022871,
      "eval_nq_emb_cos_sim": 0.8259741067886353,
      "eval_nq_emb_cos_sim_sem": 0.007167097931057849,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2192955017089844,
      "eval_nq_n_ngrams_match_1": 22.978,
      "eval_nq_n_ngrams_match_2": 8.426,
      "eval_nq_n_ngrams_match_3": 3.912,
      "eval_nq_num_pred_words": 49.236,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.200846601367244,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4417233456326265,
      "eval_nq_runtime": 11.0886,
      "eval_nq_samples_per_second": 45.092,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.45956217890785706,
      "eval_nq_token_set_f1_sem": 0.005096092215346017,
      "eval_nq_token_set_precision": 0.41619126249112176,
      "eval_nq_token_set_recall": 0.5219188661556968,
      "eval_nq_true_num_tokens": 64.0,
      "step": 105000
    },
    {
      "epoch": 20.16,
      "learning_rate": 0.001,
      "loss": 2.5869,
      "step": 105012
    },
    {
      "epoch": 20.17,
      "learning_rate": 0.001,
      "loss": 2.5809,
      "step": 105024
    },
    {
      "epoch": 20.17,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 105036
    },
    {
      "epoch": 20.17,
      "learning_rate": 0.001,
      "loss": 2.5961,
      "step": 105048
    },
    {
      "epoch": 20.17,
      "learning_rate": 0.001,
      "loss": 2.5904,
      "step": 105060
    },
    {
      "epoch": 20.18,
      "learning_rate": 0.001,
      "loss": 2.5849,
      "step": 105072
    },
    {
      "epoch": 20.18,
      "learning_rate": 0.001,
      "loss": 2.5872,
      "step": 105084
    },
    {
      "epoch": 20.18,
      "learning_rate": 0.001,
      "loss": 2.5938,
      "step": 105096
    },
    {
      "epoch": 20.18,
      "learning_rate": 0.001,
      "loss": 2.5827,
      "step": 105108
    },
    {
      "epoch": 20.18,
      "learning_rate": 0.001,
      "loss": 2.5817,
      "step": 105120
    },
    {
      "epoch": 20.19,
      "learning_rate": 0.001,
      "loss": 2.5864,
      "step": 105132
    },
    {
      "epoch": 20.19,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 105144
    },
    {
      "epoch": 20.19,
      "learning_rate": 0.001,
      "loss": 2.5811,
      "step": 105156
    },
    {
      "epoch": 20.19,
      "learning_rate": 0.001,
      "loss": 2.5943,
      "step": 105168
    },
    {
      "epoch": 20.2,
      "learning_rate": 0.001,
      "loss": 2.5767,
      "step": 105180
    },
    {
      "epoch": 20.2,
      "learning_rate": 0.001,
      "loss": 2.5862,
      "step": 105192
    },
    {
      "epoch": 20.2,
      "learning_rate": 0.001,
      "loss": 2.5905,
      "step": 105204
    },
    {
      "epoch": 20.2,
      "learning_rate": 0.001,
      "loss": 2.5866,
      "step": 105216
    },
    {
      "epoch": 20.21,
      "learning_rate": 0.001,
      "loss": 2.5946,
      "step": 105228
    },
    {
      "epoch": 20.21,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 105240
    },
    {
      "epoch": 20.21,
      "learning_rate": 0.001,
      "loss": 2.5823,
      "step": 105252
    },
    {
      "epoch": 20.21,
      "learning_rate": 0.001,
      "loss": 2.5978,
      "step": 105264
    },
    {
      "epoch": 20.21,
      "learning_rate": 0.001,
      "loss": 2.5971,
      "step": 105276
    },
    {
      "epoch": 20.22,
      "learning_rate": 0.001,
      "loss": 2.5861,
      "step": 105288
    },
    {
      "epoch": 20.22,
      "learning_rate": 0.001,
      "loss": 2.5929,
      "step": 105300
    },
    {
      "epoch": 20.22,
      "learning_rate": 0.001,
      "loss": 2.584,
      "step": 105312
    },
    {
      "epoch": 20.22,
      "learning_rate": 0.001,
      "loss": 2.5924,
      "step": 105324
    },
    {
      "epoch": 20.23,
      "learning_rate": 0.001,
      "loss": 2.5857,
      "step": 105336
    },
    {
      "epoch": 20.23,
      "learning_rate": 0.001,
      "loss": 2.5836,
      "step": 105348
    },
    {
      "epoch": 20.23,
      "learning_rate": 0.001,
      "loss": 2.5951,
      "step": 105360
    },
    {
      "epoch": 20.23,
      "learning_rate": 0.001,
      "loss": 2.59,
      "step": 105372
    },
    {
      "epoch": 20.24,
      "learning_rate": 0.001,
      "loss": 2.5871,
      "step": 105384
    },
    {
      "epoch": 20.24,
      "learning_rate": 0.001,
      "loss": 2.5871,
      "step": 105396
    },
    {
      "epoch": 20.24,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 105408
    },
    {
      "epoch": 20.24,
      "learning_rate": 0.001,
      "loss": 2.5915,
      "step": 105420
    },
    {
      "epoch": 20.24,
      "learning_rate": 0.001,
      "loss": 2.581,
      "step": 105432
    },
    {
      "epoch": 20.25,
      "learning_rate": 0.001,
      "loss": 2.5691,
      "step": 105444
    },
    {
      "epoch": 20.25,
      "learning_rate": 0.001,
      "loss": 2.5891,
      "step": 105456
    },
    {
      "epoch": 20.25,
      "learning_rate": 0.001,
      "loss": 2.5906,
      "step": 105468
    },
    {
      "epoch": 20.25,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 105480
    },
    {
      "epoch": 20.26,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 105492
    },
    {
      "epoch": 20.26,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 105504
    },
    {
      "epoch": 20.26,
      "learning_rate": 0.001,
      "loss": 2.5869,
      "step": 105516
    },
    {
      "epoch": 20.26,
      "learning_rate": 0.001,
      "loss": 2.5923,
      "step": 105528
    },
    {
      "epoch": 20.26,
      "learning_rate": 0.001,
      "loss": 2.592,
      "step": 105540
    },
    {
      "epoch": 20.27,
      "learning_rate": 0.001,
      "loss": 2.5774,
      "step": 105552
    },
    {
      "epoch": 20.27,
      "learning_rate": 0.001,
      "loss": 2.5837,
      "step": 105564
    },
    {
      "epoch": 20.27,
      "learning_rate": 0.001,
      "loss": 2.5849,
      "step": 105576
    },
    {
      "epoch": 20.27,
      "learning_rate": 0.001,
      "loss": 2.5905,
      "step": 105588
    },
    {
      "epoch": 20.28,
      "learning_rate": 0.001,
      "loss": 2.5853,
      "step": 105600
    },
    {
      "epoch": 20.28,
      "learning_rate": 0.001,
      "loss": 2.5835,
      "step": 105612
    },
    {
      "epoch": 20.28,
      "learning_rate": 0.001,
      "loss": 2.5939,
      "step": 105624
    },
    {
      "epoch": 20.28,
      "eval_ag_news_accuracy": 0.3175,
      "eval_ag_news_bleu_score": 4.686586673134966,
      "eval_ag_news_bleu_score_sem": 0.1487195148458065,
      "eval_ag_news_emb_cos_sim": 0.8074089884757996,
      "eval_ag_news_emb_cos_sim_sem": 0.006657484580644438,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.597015380859375,
      "eval_ag_news_n_ngrams_match_1": 13.786,
      "eval_ag_news_n_ngrams_match_2": 2.988,
      "eval_ag_news_n_ngrams_match_3": 0.862,
      "eval_ag_news_num_pred_words": 46.744,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.48916549824047,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34167332181654797,
      "eval_ag_news_runtime": 10.2887,
      "eval_ag_news_samples_per_second": 48.597,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3476416951360769,
      "eval_ag_news_token_set_f1_sem": 0.004279736414009697,
      "eval_ag_news_token_set_precision": 0.33120236761040905,
      "eval_ag_news_token_set_recall": 0.3804365707214545,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 105625
    },
    {
      "epoch": 20.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.11240625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.114800385741145,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13139770303858317,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6710171699523926,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00954596681832243,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2607951164245605,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.162,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.89,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.698,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.83,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.07025779392604,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2103906065837262,
      "eval_anthropic_toxic_prompts_runtime": 10.1147,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.433,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35165873890380994,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006430648021044267,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43630196424075235,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3222309206911361,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 105625
    },
    {
      "epoch": 20.28,
      "eval_arxiv_accuracy": 0.342125,
      "eval_arxiv_bleu_score": 4.163515466711427,
      "eval_arxiv_bleu_score_sem": 0.11898672585469931,
      "eval_arxiv_emb_cos_sim": 0.7568680644035339,
      "eval_arxiv_emb_cos_sim_sem": 0.008085180756537129,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4533305168151855,
      "eval_arxiv_n_ngrams_match_1": 14.824,
      "eval_arxiv_n_ngrams_match_2": 2.832,
      "eval_arxiv_n_ngrams_match_3": 0.62,
      "eval_arxiv_num_pred_words": 40.538,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.60547979569949,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3536696118573205,
      "eval_arxiv_runtime": 10.7478,
      "eval_arxiv_samples_per_second": 46.521,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.347638054299798,
      "eval_arxiv_token_set_f1_sem": 0.004396368419476976,
      "eval_arxiv_token_set_precision": 0.29900263163404556,
      "eval_arxiv_token_set_recall": 0.4334344934610621,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 105625
    },
    {
      "epoch": 20.28,
      "eval_python_code_alpaca_accuracy": 0.15675,
      "eval_python_code_alpaca_bleu_score": 4.312690157876314,
      "eval_python_code_alpaca_bleu_score_sem": 0.13894895255538506,
      "eval_python_code_alpaca_emb_cos_sim": 0.7477904558181763,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01032910465449294,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9216020107269287,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.634,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.784,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.908,
      "eval_python_code_alpaca_num_pred_words": 44.446,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.571014606394616,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32147926756059025,
      "eval_python_code_alpaca_runtime": 9.9425,
      "eval_python_code_alpaca_samples_per_second": 50.289,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4657104079206335,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005738082887467584,
      "eval_python_code_alpaca_token_set_precision": 0.5243087454582851,
      "eval_python_code_alpaca_token_set_recall": 0.44338081823921743,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 105625
    },
    {
      "epoch": 20.28,
      "eval_wikibio_accuracy": 0.31696875,
      "eval_wikibio_bleu_score": 5.670991468345321,
      "eval_wikibio_bleu_score_sem": 0.2037453600528457,
      "eval_wikibio_emb_cos_sim": 0.745564341545105,
      "eval_wikibio_emb_cos_sim_sem": 0.008491318325500232,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8296010494232178,
      "eval_wikibio_n_ngrams_match_1": 10.294,
      "eval_wikibio_n_ngrams_match_2": 3.33,
      "eval_wikibio_n_ngrams_match_3": 1.178,
      "eval_wikibio_num_pred_words": 37.974,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 46.04416522322324,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3546732297518005,
      "eval_wikibio_runtime": 10.1032,
      "eval_wikibio_samples_per_second": 49.489,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3230028201685258,
      "eval_wikibio_token_set_f1_sem": 0.0051683189323545615,
      "eval_wikibio_token_set_precision": 0.3327636165973762,
      "eval_wikibio_token_set_recall": 0.3281494835127084,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 105625
    },
    {
      "epoch": 20.28,
      "eval_nq_accuracy": 0.522875,
      "eval_nq_bleu_score": 11.763845667243404,
      "eval_nq_bleu_score_sem": 0.47547163346351584,
      "eval_nq_emb_cos_sim": 0.826331615447998,
      "eval_nq_emb_cos_sim_sem": 0.007203581955344599,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.218851327896118,
      "eval_nq_n_ngrams_match_1": 23.094,
      "eval_nq_n_ngrams_match_2": 8.532,
      "eval_nq_n_ngrams_match_3": 3.958,
      "eval_nq_num_pred_words": 49.09,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.196760733735596,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44519123317655285,
      "eval_nq_runtime": 11.085,
      "eval_nq_samples_per_second": 45.106,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.4613269563111501,
      "eval_nq_token_set_f1_sem": 0.004909965814098997,
      "eval_nq_token_set_precision": 0.41814444380315813,
      "eval_nq_token_set_recall": 0.522397844153114,
      "eval_nq_true_num_tokens": 64.0,
      "step": 105625
    },
    {
      "epoch": 20.28,
      "learning_rate": 0.001,
      "loss": 2.5818,
      "step": 105636
    },
    {
      "epoch": 20.29,
      "learning_rate": 0.001,
      "loss": 2.5856,
      "step": 105648
    },
    {
      "epoch": 20.29,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 105660
    },
    {
      "epoch": 20.29,
      "learning_rate": 0.001,
      "loss": 2.5907,
      "step": 105672
    },
    {
      "epoch": 20.29,
      "learning_rate": 0.001,
      "loss": 2.5939,
      "step": 105684
    },
    {
      "epoch": 20.29,
      "learning_rate": 0.001,
      "loss": 2.5896,
      "step": 105696
    },
    {
      "epoch": 20.3,
      "learning_rate": 0.001,
      "loss": 2.5944,
      "step": 105708
    },
    {
      "epoch": 20.3,
      "learning_rate": 0.001,
      "loss": 2.5898,
      "step": 105720
    },
    {
      "epoch": 20.3,
      "learning_rate": 0.001,
      "loss": 2.5856,
      "step": 105732
    },
    {
      "epoch": 20.3,
      "learning_rate": 0.001,
      "loss": 2.6003,
      "step": 105744
    },
    {
      "epoch": 20.31,
      "learning_rate": 0.001,
      "loss": 2.5906,
      "step": 105756
    },
    {
      "epoch": 20.31,
      "learning_rate": 0.001,
      "loss": 2.5973,
      "step": 105768
    },
    {
      "epoch": 20.31,
      "learning_rate": 0.001,
      "loss": 2.595,
      "step": 105780
    },
    {
      "epoch": 20.31,
      "learning_rate": 0.001,
      "loss": 2.587,
      "step": 105792
    },
    {
      "epoch": 20.32,
      "learning_rate": 0.001,
      "loss": 2.5896,
      "step": 105804
    },
    {
      "epoch": 20.32,
      "learning_rate": 0.001,
      "loss": 2.5922,
      "step": 105816
    },
    {
      "epoch": 20.32,
      "learning_rate": 0.001,
      "loss": 2.5965,
      "step": 105828
    },
    {
      "epoch": 20.32,
      "learning_rate": 0.001,
      "loss": 2.5847,
      "step": 105840
    },
    {
      "epoch": 20.32,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 105852
    },
    {
      "epoch": 20.33,
      "learning_rate": 0.001,
      "loss": 2.5966,
      "step": 105864
    },
    {
      "epoch": 20.33,
      "learning_rate": 0.001,
      "loss": 2.5882,
      "step": 105876
    },
    {
      "epoch": 20.33,
      "learning_rate": 0.001,
      "loss": 2.5902,
      "step": 105888
    },
    {
      "epoch": 20.33,
      "learning_rate": 0.001,
      "loss": 2.5898,
      "step": 105900
    },
    {
      "epoch": 20.34,
      "learning_rate": 0.001,
      "loss": 2.5848,
      "step": 105912
    },
    {
      "epoch": 20.34,
      "learning_rate": 0.001,
      "loss": 2.5793,
      "step": 105924
    },
    {
      "epoch": 20.34,
      "learning_rate": 0.001,
      "loss": 2.5832,
      "step": 105936
    },
    {
      "epoch": 20.34,
      "learning_rate": 0.001,
      "loss": 2.5817,
      "step": 105948
    },
    {
      "epoch": 20.35,
      "learning_rate": 0.001,
      "loss": 2.5984,
      "step": 105960
    },
    {
      "epoch": 20.35,
      "learning_rate": 0.001,
      "loss": 2.5899,
      "step": 105972
    },
    {
      "epoch": 20.35,
      "learning_rate": 0.001,
      "loss": 2.5905,
      "step": 105984
    },
    {
      "epoch": 20.35,
      "learning_rate": 0.001,
      "loss": 2.5802,
      "step": 105996
    },
    {
      "epoch": 20.35,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 106008
    },
    {
      "epoch": 20.36,
      "learning_rate": 0.001,
      "loss": 2.5909,
      "step": 106020
    },
    {
      "epoch": 20.36,
      "learning_rate": 0.001,
      "loss": 2.5904,
      "step": 106032
    },
    {
      "epoch": 20.36,
      "learning_rate": 0.001,
      "loss": 2.5936,
      "step": 106044
    },
    {
      "epoch": 20.36,
      "learning_rate": 0.001,
      "loss": 2.5874,
      "step": 106056
    },
    {
      "epoch": 20.37,
      "learning_rate": 0.001,
      "loss": 2.5966,
      "step": 106068
    },
    {
      "epoch": 20.37,
      "learning_rate": 0.001,
      "loss": 2.5919,
      "step": 106080
    },
    {
      "epoch": 20.37,
      "learning_rate": 0.001,
      "loss": 2.5902,
      "step": 106092
    },
    {
      "epoch": 20.37,
      "learning_rate": 0.001,
      "loss": 2.5898,
      "step": 106104
    },
    {
      "epoch": 20.38,
      "learning_rate": 0.001,
      "loss": 2.5805,
      "step": 106116
    },
    {
      "epoch": 20.38,
      "learning_rate": 0.001,
      "loss": 2.5824,
      "step": 106128
    },
    {
      "epoch": 20.38,
      "learning_rate": 0.001,
      "loss": 2.589,
      "step": 106140
    },
    {
      "epoch": 20.38,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 106152
    },
    {
      "epoch": 20.38,
      "learning_rate": 0.001,
      "loss": 2.603,
      "step": 106164
    },
    {
      "epoch": 20.39,
      "learning_rate": 0.001,
      "loss": 2.5964,
      "step": 106176
    },
    {
      "epoch": 20.39,
      "learning_rate": 0.001,
      "loss": 2.5786,
      "step": 106188
    },
    {
      "epoch": 20.39,
      "learning_rate": 0.001,
      "loss": 2.5899,
      "step": 106200
    },
    {
      "epoch": 20.39,
      "learning_rate": 0.001,
      "loss": 2.5817,
      "step": 106212
    },
    {
      "epoch": 20.4,
      "learning_rate": 0.001,
      "loss": 2.5818,
      "step": 106224
    },
    {
      "epoch": 20.4,
      "learning_rate": 0.001,
      "loss": 2.591,
      "step": 106236
    },
    {
      "epoch": 20.4,
      "learning_rate": 0.001,
      "loss": 2.5945,
      "step": 106248
    },
    {
      "epoch": 20.4,
      "eval_ag_news_accuracy": 0.317875,
      "eval_ag_news_bleu_score": 4.736162124506833,
      "eval_ag_news_bleu_score_sem": 0.15106853552264363,
      "eval_ag_news_emb_cos_sim": 0.8027890920639038,
      "eval_ag_news_emb_cos_sim_sem": 0.008061560740669313,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.6067285537719727,
      "eval_ag_news_n_ngrams_match_1": 13.786,
      "eval_ag_news_n_ngrams_match_2": 3.026,
      "eval_ag_news_n_ngrams_match_3": 0.85,
      "eval_ag_news_num_pred_words": 46.112,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.84531795725257,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3414733496759852,
      "eval_ag_news_runtime": 10.4907,
      "eval_ag_news_samples_per_second": 47.661,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3464649564691012,
      "eval_ag_news_token_set_f1_sem": 0.004412410021202581,
      "eval_ag_news_token_set_precision": 0.3279196359557733,
      "eval_ag_news_token_set_recall": 0.38701578998733127,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 106250
    },
    {
      "epoch": 20.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.1120625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9651580586416264,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1149846847651953,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6625226736068726,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009664726759258509,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.280329704284668,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.05,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.808,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.654,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.72,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.584536290618793,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20726464934653505,
      "eval_anthropic_toxic_prompts_runtime": 9.909,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.459,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3539139198354194,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006704180090698138,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42860452362979795,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32741245372358746,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 106250
    },
    {
      "epoch": 20.4,
      "eval_arxiv_accuracy": 0.34215625,
      "eval_arxiv_bleu_score": 4.24142969224101,
      "eval_arxiv_bleu_score_sem": 0.12202159091311132,
      "eval_arxiv_emb_cos_sim": 0.7643013596534729,
      "eval_arxiv_emb_cos_sim_sem": 0.007691076771368395,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.461731433868408,
      "eval_arxiv_n_ngrams_match_1": 14.774,
      "eval_arxiv_n_ngrams_match_2": 2.89,
      "eval_arxiv_n_ngrams_match_3": 0.652,
      "eval_arxiv_num_pred_words": 40.804,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.87211322440449,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3517843858240757,
      "eval_arxiv_runtime": 10.4051,
      "eval_arxiv_samples_per_second": 48.053,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.34460872298111217,
      "eval_arxiv_token_set_f1_sem": 0.004201232769472353,
      "eval_arxiv_token_set_precision": 0.296133870411427,
      "eval_arxiv_token_set_recall": 0.43056774661485525,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 106250
    },
    {
      "epoch": 20.4,
      "eval_python_code_alpaca_accuracy": 0.1545,
      "eval_python_code_alpaca_bleu_score": 4.290810821574007,
      "eval_python_code_alpaca_bleu_score_sem": 0.1388669003342831,
      "eval_python_code_alpaca_emb_cos_sim": 0.739973247051239,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011939318416406566,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9438319206237793,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.598,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.76,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.892,
      "eval_python_code_alpaca_num_pred_words": 44.62,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.988469387921825,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3161371891457895,
      "eval_python_code_alpaca_runtime": 9.7482,
      "eval_python_code_alpaca_samples_per_second": 51.292,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.4660896763141984,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0059043821743581665,
      "eval_python_code_alpaca_token_set_precision": 0.5185077422429903,
      "eval_python_code_alpaca_token_set_recall": 0.4496648271016924,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 106250
    },
    {
      "epoch": 20.4,
      "eval_wikibio_accuracy": 0.31709375,
      "eval_wikibio_bleu_score": 5.758761685748959,
      "eval_wikibio_bleu_score_sem": 0.2014654036850103,
      "eval_wikibio_emb_cos_sim": 0.7358646392822266,
      "eval_wikibio_emb_cos_sim_sem": 0.00875286256956811,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8096470832824707,
      "eval_wikibio_n_ngrams_match_1": 9.998,
      "eval_wikibio_n_ngrams_match_2": 3.342,
      "eval_wikibio_n_ngrams_match_3": 1.188,
      "eval_wikibio_num_pred_words": 36.488,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.134507333056575,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3507836800002612,
      "eval_wikibio_runtime": 10.5957,
      "eval_wikibio_samples_per_second": 47.189,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.31732368184979565,
      "eval_wikibio_token_set_f1_sem": 0.005412909304453982,
      "eval_wikibio_token_set_precision": 0.3238138680832841,
      "eval_wikibio_token_set_recall": 0.32640364788147386,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 106250
    },
    {
      "epoch": 20.4,
      "eval_nq_accuracy": 0.52278125,
      "eval_nq_bleu_score": 11.533538808764156,
      "eval_nq_bleu_score_sem": 0.45828506881051434,
      "eval_nq_emb_cos_sim": 0.8285806179046631,
      "eval_nq_emb_cos_sim_sem": 0.007730273999028035,
      "eval_nq_emb_top1_equal": 0.203125,
      "eval_nq_emb_top1_equal_sem": 0.03570055125142555,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2204086780548096,
      "eval_nq_n_ngrams_match_1": 22.95,
      "eval_nq_n_ngrams_match_2": 8.362,
      "eval_nq_n_ngrams_match_3": 3.852,
      "eval_nq_num_pred_words": 49.132,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.211094468949115,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44288603975113794,
      "eval_nq_runtime": 10.4301,
      "eval_nq_samples_per_second": 47.938,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4580814147836532,
      "eval_nq_token_set_f1_sem": 0.00479774175121098,
      "eval_nq_token_set_precision": 0.412922008068103,
      "eval_nq_token_set_recall": 0.5225166517959134,
      "eval_nq_true_num_tokens": 64.0,
      "step": 106250
    },
    {
      "epoch": 20.4,
      "learning_rate": 0.001,
      "loss": 2.5854,
      "step": 106260
    },
    {
      "epoch": 20.41,
      "learning_rate": 0.001,
      "loss": 2.5925,
      "step": 106272
    },
    {
      "epoch": 20.41,
      "learning_rate": 0.001,
      "loss": 2.5855,
      "step": 106284
    },
    {
      "epoch": 20.41,
      "learning_rate": 0.001,
      "loss": 2.5898,
      "step": 106296
    },
    {
      "epoch": 20.41,
      "learning_rate": 0.001,
      "loss": 2.5913,
      "step": 106308
    },
    {
      "epoch": 20.41,
      "learning_rate": 0.001,
      "loss": 2.5859,
      "step": 106320
    },
    {
      "epoch": 20.42,
      "learning_rate": 0.001,
      "loss": 2.5956,
      "step": 106332
    },
    {
      "epoch": 20.42,
      "learning_rate": 0.001,
      "loss": 2.5959,
      "step": 106344
    },
    {
      "epoch": 20.42,
      "learning_rate": 0.001,
      "loss": 2.5855,
      "step": 106356
    },
    {
      "epoch": 20.42,
      "learning_rate": 0.001,
      "loss": 2.5942,
      "step": 106368
    },
    {
      "epoch": 20.43,
      "learning_rate": 0.001,
      "loss": 2.5867,
      "step": 106380
    },
    {
      "epoch": 20.43,
      "learning_rate": 0.001,
      "loss": 2.5854,
      "step": 106392
    },
    {
      "epoch": 20.43,
      "learning_rate": 0.001,
      "loss": 2.583,
      "step": 106404
    },
    {
      "epoch": 20.43,
      "learning_rate": 0.001,
      "loss": 2.5894,
      "step": 106416
    },
    {
      "epoch": 20.44,
      "learning_rate": 0.001,
      "loss": 2.592,
      "step": 106428
    },
    {
      "epoch": 20.44,
      "learning_rate": 0.001,
      "loss": 2.5789,
      "step": 106440
    },
    {
      "epoch": 20.44,
      "learning_rate": 0.001,
      "loss": 2.5939,
      "step": 106452
    },
    {
      "epoch": 20.44,
      "learning_rate": 0.001,
      "loss": 2.5895,
      "step": 106464
    },
    {
      "epoch": 20.44,
      "learning_rate": 0.001,
      "loss": 2.5906,
      "step": 106476
    },
    {
      "epoch": 20.45,
      "learning_rate": 0.001,
      "loss": 2.5825,
      "step": 106488
    },
    {
      "epoch": 20.45,
      "learning_rate": 0.001,
      "loss": 2.5841,
      "step": 106500
    },
    {
      "epoch": 20.45,
      "learning_rate": 0.001,
      "loss": 2.5712,
      "step": 106512
    },
    {
      "epoch": 20.45,
      "learning_rate": 0.001,
      "loss": 2.5879,
      "step": 106524
    },
    {
      "epoch": 20.46,
      "learning_rate": 0.001,
      "loss": 2.5967,
      "step": 106536
    },
    {
      "epoch": 20.46,
      "learning_rate": 0.001,
      "loss": 2.5826,
      "step": 106548
    },
    {
      "epoch": 20.46,
      "learning_rate": 0.001,
      "loss": 2.5897,
      "step": 106560
    },
    {
      "epoch": 20.46,
      "learning_rate": 0.001,
      "loss": 2.5877,
      "step": 106572
    },
    {
      "epoch": 20.47,
      "learning_rate": 0.001,
      "loss": 2.5886,
      "step": 106584
    },
    {
      "epoch": 20.47,
      "learning_rate": 0.001,
      "loss": 2.5966,
      "step": 106596
    },
    {
      "epoch": 20.47,
      "learning_rate": 0.001,
      "loss": 2.5851,
      "step": 106608
    },
    {
      "epoch": 20.47,
      "learning_rate": 0.001,
      "loss": 2.5808,
      "step": 106620
    },
    {
      "epoch": 20.47,
      "learning_rate": 0.001,
      "loss": 2.5872,
      "step": 106632
    },
    {
      "epoch": 20.48,
      "learning_rate": 0.001,
      "loss": 2.5821,
      "step": 106644
    },
    {
      "epoch": 20.48,
      "learning_rate": 0.001,
      "loss": 2.578,
      "step": 106656
    },
    {
      "epoch": 20.48,
      "learning_rate": 0.001,
      "loss": 2.5917,
      "step": 106668
    },
    {
      "epoch": 20.48,
      "learning_rate": 0.001,
      "loss": 2.5848,
      "step": 106680
    },
    {
      "epoch": 20.49,
      "learning_rate": 0.001,
      "loss": 2.5872,
      "step": 106692
    },
    {
      "epoch": 20.49,
      "learning_rate": 0.001,
      "loss": 2.5827,
      "step": 106704
    },
    {
      "epoch": 20.49,
      "learning_rate": 0.001,
      "loss": 2.5903,
      "step": 106716
    },
    {
      "epoch": 20.49,
      "learning_rate": 0.001,
      "loss": 2.5829,
      "step": 106728
    },
    {
      "epoch": 20.5,
      "learning_rate": 0.001,
      "loss": 2.5805,
      "step": 106740
    },
    {
      "epoch": 20.5,
      "learning_rate": 0.001,
      "loss": 2.5795,
      "step": 106752
    },
    {
      "epoch": 20.5,
      "learning_rate": 0.001,
      "loss": 2.5953,
      "step": 106764
    },
    {
      "epoch": 20.5,
      "learning_rate": 0.001,
      "loss": 2.5886,
      "step": 106776
    },
    {
      "epoch": 20.5,
      "learning_rate": 0.001,
      "loss": 2.5843,
      "step": 106788
    },
    {
      "epoch": 20.51,
      "learning_rate": 0.001,
      "loss": 2.5922,
      "step": 106800
    },
    {
      "epoch": 20.51,
      "learning_rate": 0.001,
      "loss": 2.5953,
      "step": 106812
    },
    {
      "epoch": 20.51,
      "learning_rate": 0.001,
      "loss": 2.5925,
      "step": 106824
    },
    {
      "epoch": 20.51,
      "learning_rate": 0.001,
      "loss": 2.5862,
      "step": 106836
    },
    {
      "epoch": 20.52,
      "learning_rate": 0.001,
      "loss": 2.5953,
      "step": 106848
    },
    {
      "epoch": 20.52,
      "learning_rate": 0.001,
      "loss": 2.5869,
      "step": 106860
    },
    {
      "epoch": 20.52,
      "learning_rate": 0.001,
      "loss": 2.5853,
      "step": 106872
    },
    {
      "epoch": 20.52,
      "eval_ag_news_accuracy": 0.31903125,
      "eval_ag_news_bleu_score": 4.675808205117197,
      "eval_ag_news_bleu_score_sem": 0.15038055364200864,
      "eval_ag_news_emb_cos_sim": 0.8060142397880554,
      "eval_ag_news_emb_cos_sim_sem": 0.007473165710765929,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.589247465133667,
      "eval_ag_news_n_ngrams_match_1": 13.778,
      "eval_ag_news_n_ngrams_match_2": 2.988,
      "eval_ag_news_n_ngrams_match_3": 0.84,
      "eval_ag_news_num_pred_words": 46.4,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.206818778255965,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34461262568055817,
      "eval_ag_news_runtime": 10.2854,
      "eval_ag_news_samples_per_second": 48.613,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3460400386277784,
      "eval_ag_news_token_set_f1_sem": 0.00443891591014686,
      "eval_ag_news_token_set_precision": 0.32815925631245246,
      "eval_ag_news_token_set_recall": 0.38706170670097445,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 106875
    },
    {
      "epoch": 20.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.1120625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8172890296624455,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10069188346220201,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6546214818954468,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010074594324314826,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2812349796295166,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.96,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.748,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.59,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.62,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.60861351249027,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2046918798357004,
      "eval_anthropic_toxic_prompts_runtime": 9.9743,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.129,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3521623057039035,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006321514142783254,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42174827340097226,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33479658803808365,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 106875
    },
    {
      "epoch": 20.52,
      "eval_arxiv_accuracy": 0.344125,
      "eval_arxiv_bleu_score": 4.155494291056252,
      "eval_arxiv_bleu_score_sem": 0.11624197610517291,
      "eval_arxiv_emb_cos_sim": 0.7548593282699585,
      "eval_arxiv_emb_cos_sim_sem": 0.007868334562404072,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4369235038757324,
      "eval_arxiv_n_ngrams_match_1": 14.814,
      "eval_arxiv_n_ngrams_match_2": 2.87,
      "eval_arxiv_n_ngrams_match_3": 0.586,
      "eval_arxiv_num_pred_words": 40.154,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.091159050744086,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3534161748848369,
      "eval_arxiv_runtime": 10.6045,
      "eval_arxiv_samples_per_second": 47.15,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.34797830157383214,
      "eval_arxiv_token_set_f1_sem": 0.004419123468664785,
      "eval_arxiv_token_set_precision": 0.29702121770489404,
      "eval_arxiv_token_set_recall": 0.43770806121837064,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 106875
    },
    {
      "epoch": 20.52,
      "eval_python_code_alpaca_accuracy": 0.1551875,
      "eval_python_code_alpaca_bleu_score": 4.343087852374023,
      "eval_python_code_alpaca_bleu_score_sem": 0.1395949619055419,
      "eval_python_code_alpaca_emb_cos_sim": 0.7138265371322632,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.012877403214226806,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.942326545715332,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.302,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.702,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.87,
      "eval_python_code_alpaca_num_pred_words": 42.856,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.95990612715752,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3169222548597542,
      "eval_python_code_alpaca_runtime": 9.9616,
      "eval_python_code_alpaca_samples_per_second": 50.193,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4682792194392736,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006167622203396601,
      "eval_python_code_alpaca_token_set_precision": 0.5021414070720427,
      "eval_python_code_alpaca_token_set_recall": 0.46914912025549066,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 106875
    },
    {
      "epoch": 20.52,
      "eval_wikibio_accuracy": 0.322125,
      "eval_wikibio_bleu_score": 6.068244902810261,
      "eval_wikibio_bleu_score_sem": 0.20719131144981617,
      "eval_wikibio_emb_cos_sim": 0.746246337890625,
      "eval_wikibio_emb_cos_sim_sem": 0.00842967526683051,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.750788688659668,
      "eval_wikibio_n_ngrams_match_1": 10.336,
      "eval_wikibio_n_ngrams_match_2": 3.472,
      "eval_wikibio_n_ngrams_match_3": 1.216,
      "eval_wikibio_num_pred_words": 36.718,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.554631123400554,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36182565201179606,
      "eval_wikibio_runtime": 10.0603,
      "eval_wikibio_samples_per_second": 49.7,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3272347824625536,
      "eval_wikibio_token_set_f1_sem": 0.0051785986877144075,
      "eval_wikibio_token_set_precision": 0.3355020336165487,
      "eval_wikibio_token_set_recall": 0.33522756420869765,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 106875
    },
    {
      "epoch": 20.52,
      "eval_nq_accuracy": 0.52378125,
      "eval_nq_bleu_score": 11.616922851142785,
      "eval_nq_bleu_score_sem": 0.4802084653288616,
      "eval_nq_emb_cos_sim": 0.8299338221549988,
      "eval_nq_emb_cos_sim_sem": 0.007294561854755936,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2151927947998047,
      "eval_nq_n_ngrams_match_1": 22.876,
      "eval_nq_n_ngrams_match_2": 8.382,
      "eval_nq_n_ngrams_match_3": 3.87,
      "eval_nq_num_pred_words": 48.586,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.16317555391938,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44475224986269324,
      "eval_nq_runtime": 10.4687,
      "eval_nq_samples_per_second": 47.761,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4599874957344267,
      "eval_nq_token_set_f1_sem": 0.004955143824878148,
      "eval_nq_token_set_precision": 0.41495361764274546,
      "eval_nq_token_set_recall": 0.5260386940884074,
      "eval_nq_true_num_tokens": 64.0,
      "step": 106875
    },
    {
      "epoch": 20.52,
      "learning_rate": 0.001,
      "loss": 2.5879,
      "step": 106884
    },
    {
      "epoch": 20.53,
      "learning_rate": 0.001,
      "loss": 2.5934,
      "step": 106896
    },
    {
      "epoch": 20.53,
      "learning_rate": 0.001,
      "loss": 2.5952,
      "step": 106908
    },
    {
      "epoch": 20.53,
      "learning_rate": 0.001,
      "loss": 2.586,
      "step": 106920
    },
    {
      "epoch": 20.53,
      "learning_rate": 0.001,
      "loss": 2.5832,
      "step": 106932
    },
    {
      "epoch": 20.53,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 106944
    },
    {
      "epoch": 20.54,
      "learning_rate": 0.001,
      "loss": 2.5924,
      "step": 106956
    },
    {
      "epoch": 20.54,
      "learning_rate": 0.001,
      "loss": 2.5899,
      "step": 106968
    },
    {
      "epoch": 20.54,
      "learning_rate": 0.001,
      "loss": 2.5951,
      "step": 106980
    },
    {
      "epoch": 20.54,
      "learning_rate": 0.001,
      "loss": 2.5969,
      "step": 106992
    },
    {
      "epoch": 20.55,
      "learning_rate": 0.001,
      "loss": 2.5901,
      "step": 107004
    },
    {
      "epoch": 20.55,
      "learning_rate": 0.001,
      "loss": 2.5886,
      "step": 107016
    },
    {
      "epoch": 20.55,
      "learning_rate": 0.001,
      "loss": 2.5925,
      "step": 107028
    },
    {
      "epoch": 20.55,
      "learning_rate": 0.001,
      "loss": 2.5873,
      "step": 107040
    },
    {
      "epoch": 20.56,
      "learning_rate": 0.001,
      "loss": 2.5922,
      "step": 107052
    },
    {
      "epoch": 20.56,
      "learning_rate": 0.001,
      "loss": 2.5842,
      "step": 107064
    },
    {
      "epoch": 20.56,
      "learning_rate": 0.001,
      "loss": 2.5924,
      "step": 107076
    },
    {
      "epoch": 20.56,
      "learning_rate": 0.001,
      "loss": 2.6001,
      "step": 107088
    },
    {
      "epoch": 20.56,
      "learning_rate": 0.001,
      "loss": 2.5928,
      "step": 107100
    },
    {
      "epoch": 20.57,
      "learning_rate": 0.001,
      "loss": 2.5878,
      "step": 107112
    },
    {
      "epoch": 20.57,
      "learning_rate": 0.001,
      "loss": 2.6031,
      "step": 107124
    },
    {
      "epoch": 20.57,
      "learning_rate": 0.001,
      "loss": 2.601,
      "step": 107136
    },
    {
      "epoch": 20.57,
      "learning_rate": 0.001,
      "loss": 2.5832,
      "step": 107148
    },
    {
      "epoch": 20.58,
      "learning_rate": 0.001,
      "loss": 2.589,
      "step": 107160
    },
    {
      "epoch": 20.58,
      "learning_rate": 0.001,
      "loss": 2.5807,
      "step": 107172
    },
    {
      "epoch": 20.58,
      "learning_rate": 0.001,
      "loss": 2.5893,
      "step": 107184
    },
    {
      "epoch": 20.58,
      "learning_rate": 0.001,
      "loss": 2.5943,
      "step": 107196
    },
    {
      "epoch": 20.59,
      "learning_rate": 0.001,
      "loss": 2.5915,
      "step": 107208
    },
    {
      "epoch": 20.59,
      "learning_rate": 0.001,
      "loss": 2.5866,
      "step": 107220
    },
    {
      "epoch": 20.59,
      "learning_rate": 0.001,
      "loss": 2.5802,
      "step": 107232
    },
    {
      "epoch": 20.59,
      "learning_rate": 0.001,
      "loss": 2.5905,
      "step": 107244
    },
    {
      "epoch": 20.59,
      "learning_rate": 0.001,
      "loss": 2.5936,
      "step": 107256
    },
    {
      "epoch": 20.6,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 107268
    },
    {
      "epoch": 20.6,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 107280
    },
    {
      "epoch": 20.6,
      "learning_rate": 0.001,
      "loss": 2.5936,
      "step": 107292
    },
    {
      "epoch": 20.6,
      "learning_rate": 0.001,
      "loss": 2.5881,
      "step": 107304
    },
    {
      "epoch": 20.61,
      "learning_rate": 0.001,
      "loss": 2.5856,
      "step": 107316
    },
    {
      "epoch": 20.61,
      "learning_rate": 0.001,
      "loss": 2.606,
      "step": 107328
    },
    {
      "epoch": 20.61,
      "learning_rate": 0.001,
      "loss": 2.5822,
      "step": 107340
    },
    {
      "epoch": 20.61,
      "learning_rate": 0.001,
      "loss": 2.5896,
      "step": 107352
    },
    {
      "epoch": 20.62,
      "learning_rate": 0.001,
      "loss": 2.5825,
      "step": 107364
    },
    {
      "epoch": 20.62,
      "learning_rate": 0.001,
      "loss": 2.593,
      "step": 107376
    },
    {
      "epoch": 20.62,
      "learning_rate": 0.001,
      "loss": 2.5874,
      "step": 107388
    },
    {
      "epoch": 20.62,
      "learning_rate": 0.001,
      "loss": 2.5918,
      "step": 107400
    },
    {
      "epoch": 20.62,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 107412
    },
    {
      "epoch": 20.63,
      "learning_rate": 0.001,
      "loss": 2.5901,
      "step": 107424
    },
    {
      "epoch": 20.63,
      "learning_rate": 0.001,
      "loss": 2.5821,
      "step": 107436
    },
    {
      "epoch": 20.63,
      "learning_rate": 0.001,
      "loss": 2.5889,
      "step": 107448
    },
    {
      "epoch": 20.63,
      "learning_rate": 0.001,
      "loss": 2.5887,
      "step": 107460
    },
    {
      "epoch": 20.64,
      "learning_rate": 0.001,
      "loss": 2.5858,
      "step": 107472
    },
    {
      "epoch": 20.64,
      "learning_rate": 0.001,
      "loss": 2.5912,
      "step": 107484
    },
    {
      "epoch": 20.64,
      "learning_rate": 0.001,
      "loss": 2.5882,
      "step": 107496
    },
    {
      "epoch": 20.64,
      "eval_ag_news_accuracy": 0.319375,
      "eval_ag_news_bleu_score": 4.688954603981253,
      "eval_ag_news_bleu_score_sem": 0.15133146469219155,
      "eval_ag_news_emb_cos_sim": 0.8037905097007751,
      "eval_ag_news_emb_cos_sim_sem": 0.007028073258522125,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.585221290588379,
      "eval_ag_news_n_ngrams_match_1": 13.93,
      "eval_ag_news_n_ngrams_match_2": 3.008,
      "eval_ag_news_n_ngrams_match_3": 0.83,
      "eval_ag_news_num_pred_words": 46.68,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.06133687042489,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34443414735216493,
      "eval_ag_news_runtime": 10.4247,
      "eval_ag_news_samples_per_second": 47.963,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3470642592983945,
      "eval_ag_news_token_set_f1_sem": 0.004297789329807443,
      "eval_ag_news_token_set_precision": 0.33060344019138604,
      "eval_ag_news_token_set_recall": 0.38159832091759044,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 107500
    },
    {
      "epoch": 20.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.112875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.091999051385931,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11981511911373215,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6684356927871704,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009511060008132297,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.27815580368042,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.128,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.85,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.69,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.868,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.526806922801747,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2102893138981995,
      "eval_anthropic_toxic_prompts_runtime": 10.4973,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.631,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3549976083694851,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006362206553091603,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4326036946265551,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32971289703117035,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 107500
    },
    {
      "epoch": 20.64,
      "eval_arxiv_accuracy": 0.34278125,
      "eval_arxiv_bleu_score": 4.419931285345419,
      "eval_arxiv_bleu_score_sem": 0.1269135370474574,
      "eval_arxiv_emb_cos_sim": 0.7733661532402039,
      "eval_arxiv_emb_cos_sim_sem": 0.0072253679247576835,
      "eval_arxiv_emb_top1_equal": 0.328125,
      "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4513282775878906,
      "eval_arxiv_n_ngrams_match_1": 15.244,
      "eval_arxiv_n_ngrams_match_2": 3.008,
      "eval_arxiv_n_ngrams_match_3": 0.686,
      "eval_arxiv_num_pred_words": 41.026,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.542261374576412,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36047886469912493,
      "eval_arxiv_runtime": 10.3496,
      "eval_arxiv_samples_per_second": 48.311,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.35798543081434503,
      "eval_arxiv_token_set_f1_sem": 0.004153517692732146,
      "eval_arxiv_token_set_precision": 0.3075819086243378,
      "eval_arxiv_token_set_recall": 0.4432304342214834,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 107500
    },
    {
      "epoch": 20.64,
      "eval_python_code_alpaca_accuracy": 0.155375,
      "eval_python_code_alpaca_bleu_score": 4.291386664688456,
      "eval_python_code_alpaca_bleu_score_sem": 0.13161837014713682,
      "eval_python_code_alpaca_emb_cos_sim": 0.7419006824493408,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010992546757981525,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9321656227111816,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.58,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.736,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.854,
      "eval_python_code_alpaca_num_pred_words": 43.546,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.768231425882394,
      "eval_python_code_alpaca_pred_num_tokens": 62.9921875,
      "eval_python_code_alpaca_rouge_score": 0.3210950863801495,
      "eval_python_code_alpaca_runtime": 10.7033,
      "eval_python_code_alpaca_samples_per_second": 46.715,
      "eval_python_code_alpaca_steps_per_second": 0.093,
      "eval_python_code_alpaca_token_set_f1": 0.4624423383131976,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005773163030078458,
      "eval_python_code_alpaca_token_set_precision": 0.5200071395801114,
      "eval_python_code_alpaca_token_set_recall": 0.44217830970271715,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 107500
    },
    {
      "epoch": 20.64,
      "eval_wikibio_accuracy": 0.3189375,
      "eval_wikibio_bleu_score": 5.968765219251967,
      "eval_wikibio_bleu_score_sem": 0.21871732595582424,
      "eval_wikibio_emb_cos_sim": 0.7466143369674683,
      "eval_wikibio_emb_cos_sim_sem": 0.0092694782237349,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8147342205047607,
      "eval_wikibio_n_ngrams_match_1": 10.254,
      "eval_wikibio_n_ngrams_match_2": 3.424,
      "eval_wikibio_n_ngrams_match_3": 1.224,
      "eval_wikibio_num_pred_words": 37.238,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.364697774076674,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3580095555832107,
      "eval_wikibio_runtime": 10.1188,
      "eval_wikibio_samples_per_second": 49.413,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.32120994433856687,
      "eval_wikibio_token_set_f1_sem": 0.005277880820177232,
      "eval_wikibio_token_set_precision": 0.33301637481149027,
      "eval_wikibio_token_set_recall": 0.32323062131427566,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 107500
    },
    {
      "epoch": 20.64,
      "eval_nq_accuracy": 0.52453125,
      "eval_nq_bleu_score": 11.616328402222152,
      "eval_nq_bleu_score_sem": 0.47293824110718763,
      "eval_nq_emb_cos_sim": 0.8306170701980591,
      "eval_nq_emb_cos_sim_sem": 0.007003467401525859,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2154154777526855,
      "eval_nq_n_ngrams_match_1": 23.128,
      "eval_nq_n_ngrams_match_2": 8.504,
      "eval_nq_n_ngrams_match_3": 3.892,
      "eval_nq_num_pred_words": 49.308,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.165216264116747,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44480744966270735,
      "eval_nq_runtime": 10.5489,
      "eval_nq_samples_per_second": 47.398,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.46189889954581576,
      "eval_nq_token_set_f1_sem": 0.005182516616622513,
      "eval_nq_token_set_precision": 0.4203626065249666,
      "eval_nq_token_set_recall": 0.5212598925502242,
      "eval_nq_true_num_tokens": 64.0,
      "step": 107500
    },
    {
      "epoch": 20.64,
      "learning_rate": 0.001,
      "loss": 2.5877,
      "step": 107508
    },
    {
      "epoch": 20.65,
      "learning_rate": 0.001,
      "loss": 2.5879,
      "step": 107520
    },
    {
      "epoch": 20.65,
      "learning_rate": 0.001,
      "loss": 2.5912,
      "step": 107532
    },
    {
      "epoch": 20.65,
      "learning_rate": 0.001,
      "loss": 2.595,
      "step": 107544
    },
    {
      "epoch": 20.65,
      "learning_rate": 0.001,
      "loss": 2.5856,
      "step": 107556
    },
    {
      "epoch": 20.65,
      "learning_rate": 0.001,
      "loss": 2.5915,
      "step": 107568
    },
    {
      "epoch": 20.66,
      "learning_rate": 0.001,
      "loss": 2.5841,
      "step": 107580
    },
    {
      "epoch": 20.66,
      "learning_rate": 0.001,
      "loss": 2.5879,
      "step": 107592
    },
    {
      "epoch": 20.66,
      "learning_rate": 0.001,
      "loss": 2.5928,
      "step": 107604
    },
    {
      "epoch": 20.66,
      "learning_rate": 0.001,
      "loss": 2.5936,
      "step": 107616
    },
    {
      "epoch": 20.67,
      "learning_rate": 0.001,
      "loss": 2.5895,
      "step": 107628
    },
    {
      "epoch": 20.67,
      "learning_rate": 0.001,
      "loss": 2.585,
      "step": 107640
    },
    {
      "epoch": 20.67,
      "learning_rate": 0.001,
      "loss": 2.5858,
      "step": 107652
    },
    {
      "epoch": 20.67,
      "learning_rate": 0.001,
      "loss": 2.5929,
      "step": 107664
    },
    {
      "epoch": 20.68,
      "learning_rate": 0.001,
      "loss": 2.5916,
      "step": 107676
    },
    {
      "epoch": 20.68,
      "learning_rate": 0.001,
      "loss": 2.5987,
      "step": 107688
    },
    {
      "epoch": 20.68,
      "learning_rate": 0.001,
      "loss": 2.5939,
      "step": 107700
    },
    {
      "epoch": 20.68,
      "learning_rate": 0.001,
      "loss": 2.5904,
      "step": 107712
    },
    {
      "epoch": 20.68,
      "learning_rate": 0.001,
      "loss": 2.5786,
      "step": 107724
    },
    {
      "epoch": 20.69,
      "learning_rate": 0.001,
      "loss": 2.5896,
      "step": 107736
    },
    {
      "epoch": 20.69,
      "learning_rate": 0.001,
      "loss": 2.5952,
      "step": 107748
    },
    {
      "epoch": 20.69,
      "learning_rate": 0.001,
      "loss": 2.5965,
      "step": 107760
    },
    {
      "epoch": 20.69,
      "learning_rate": 0.001,
      "loss": 2.5948,
      "step": 107772
    },
    {
      "epoch": 20.7,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 107784
    },
    {
      "epoch": 20.7,
      "learning_rate": 0.001,
      "loss": 2.5911,
      "step": 107796
    },
    {
      "epoch": 20.7,
      "learning_rate": 0.001,
      "loss": 2.6014,
      "step": 107808
    },
    {
      "epoch": 20.7,
      "learning_rate": 0.001,
      "loss": 2.5986,
      "step": 107820
    },
    {
      "epoch": 20.71,
      "learning_rate": 0.001,
      "loss": 2.6021,
      "step": 107832
    },
    {
      "epoch": 20.71,
      "learning_rate": 0.001,
      "loss": 2.5943,
      "step": 107844
    },
    {
      "epoch": 20.71,
      "learning_rate": 0.001,
      "loss": 2.5956,
      "step": 107856
    },
    {
      "epoch": 20.71,
      "learning_rate": 0.001,
      "loss": 2.5827,
      "step": 107868
    },
    {
      "epoch": 20.71,
      "learning_rate": 0.001,
      "loss": 2.614,
      "step": 107880
    },
    {
      "epoch": 20.72,
      "learning_rate": 0.001,
      "loss": 2.588,
      "step": 107892
    },
    {
      "epoch": 20.72,
      "learning_rate": 0.001,
      "loss": 2.6008,
      "step": 107904
    },
    {
      "epoch": 20.72,
      "learning_rate": 0.001,
      "loss": 2.5895,
      "step": 107916
    },
    {
      "epoch": 20.72,
      "learning_rate": 0.001,
      "loss": 2.6044,
      "step": 107928
    },
    {
      "epoch": 20.73,
      "learning_rate": 0.001,
      "loss": 2.5922,
      "step": 107940
    },
    {
      "epoch": 20.73,
      "learning_rate": 0.001,
      "loss": 2.5917,
      "step": 107952
    },
    {
      "epoch": 20.73,
      "learning_rate": 0.001,
      "loss": 2.5961,
      "step": 107964
    },
    {
      "epoch": 20.73,
      "learning_rate": 0.001,
      "loss": 2.5929,
      "step": 107976
    },
    {
      "epoch": 20.74,
      "learning_rate": 0.001,
      "loss": 2.5928,
      "step": 107988
    },
    {
      "epoch": 20.74,
      "learning_rate": 0.001,
      "loss": 2.5919,
      "step": 108000
    },
    {
      "epoch": 20.74,
      "learning_rate": 0.001,
      "loss": 2.5973,
      "step": 108012
    },
    {
      "epoch": 20.74,
      "learning_rate": 0.001,
      "loss": 2.6022,
      "step": 108024
    },
    {
      "epoch": 20.74,
      "learning_rate": 0.001,
      "loss": 2.5978,
      "step": 108036
    },
    {
      "epoch": 20.75,
      "learning_rate": 0.001,
      "loss": 2.5983,
      "step": 108048
    },
    {
      "epoch": 20.75,
      "learning_rate": 0.001,
      "loss": 2.5856,
      "step": 108060
    },
    {
      "epoch": 20.75,
      "learning_rate": 0.001,
      "loss": 2.5956,
      "step": 108072
    },
    {
      "epoch": 20.75,
      "learning_rate": 0.001,
      "loss": 2.5829,
      "step": 108084
    },
    {
      "epoch": 20.76,
      "learning_rate": 0.001,
      "loss": 2.5904,
      "step": 108096
    },
    {
      "epoch": 20.76,
      "learning_rate": 0.001,
      "loss": 2.5794,
      "step": 108108
    },
    {
      "epoch": 20.76,
      "learning_rate": 0.001,
      "loss": 2.5955,
      "step": 108120
    },
    {
      "epoch": 20.76,
      "eval_ag_news_accuracy": 0.31875,
      "eval_ag_news_bleu_score": 4.778080978524433,
      "eval_ag_news_bleu_score_sem": 0.15357152358327375,
      "eval_ag_news_emb_cos_sim": 0.8031768798828125,
      "eval_ag_news_emb_cos_sim_sem": 0.007985539021212296,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5966758728027344,
      "eval_ag_news_n_ngrams_match_1": 13.998,
      "eval_ag_news_n_ngrams_match_2": 2.974,
      "eval_ag_news_n_ngrams_match_3": 0.87,
      "eval_ag_news_num_pred_words": 47.01,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.47677923531072,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34382825639216164,
      "eval_ag_news_runtime": 11.2987,
      "eval_ag_news_samples_per_second": 44.253,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.34927534209835465,
      "eval_ag_news_token_set_f1_sem": 0.004317128705614198,
      "eval_ag_news_token_set_precision": 0.33452105991346714,
      "eval_ag_news_token_set_recall": 0.38007046870232786,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 108125
    },
    {
      "epoch": 20.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.1129375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.888986175307003,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12211752060455751,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6607291102409363,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009518338242034522,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2888426780700684,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.996,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.782,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.626,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.776,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.81181579078046,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20619539152934796,
      "eval_anthropic_toxic_prompts_runtime": 10.0288,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.857,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34977827459647454,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006591544267040966,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42263909504860236,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32634371458448963,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 108125
    },
    {
      "epoch": 20.76,
      "eval_arxiv_accuracy": 0.344,
      "eval_arxiv_bleu_score": 4.3282999216840325,
      "eval_arxiv_bleu_score_sem": 0.12465486673948048,
      "eval_arxiv_emb_cos_sim": 0.7621566653251648,
      "eval_arxiv_emb_cos_sim_sem": 0.007000118044158258,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4395155906677246,
      "eval_arxiv_n_ngrams_match_1": 14.954,
      "eval_arxiv_n_ngrams_match_2": 2.938,
      "eval_arxiv_n_ngrams_match_3": 0.652,
      "eval_arxiv_num_pred_words": 40.494,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.171854573184007,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35776408224509265,
      "eval_arxiv_runtime": 10.5554,
      "eval_arxiv_samples_per_second": 47.369,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3503111790453682,
      "eval_arxiv_token_set_f1_sem": 0.004244257589276047,
      "eval_arxiv_token_set_precision": 0.30012455740087235,
      "eval_arxiv_token_set_recall": 0.43698676108896617,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 108125
    },
    {
      "epoch": 20.76,
      "eval_python_code_alpaca_accuracy": 0.154875,
      "eval_python_code_alpaca_bleu_score": 4.26654464775827,
      "eval_python_code_alpaca_bleu_score_sem": 0.13383782823754412,
      "eval_python_code_alpaca_emb_cos_sim": 0.7454551458358765,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009964508073093386,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.925708532333374,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.53,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.72,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.854,
      "eval_python_code_alpaca_num_pred_words": 43.246,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.647433680026715,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3191635715121748,
      "eval_python_code_alpaca_runtime": 9.9373,
      "eval_python_code_alpaca_samples_per_second": 50.316,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4636949319137289,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0060427583230633726,
      "eval_python_code_alpaca_token_set_precision": 0.5167051788675627,
      "eval_python_code_alpaca_token_set_recall": 0.44488502996542006,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 108125
    },
    {
      "epoch": 20.76,
      "eval_wikibio_accuracy": 0.31890625,
      "eval_wikibio_bleu_score": 6.028696025100964,
      "eval_wikibio_bleu_score_sem": 0.1973861884727308,
      "eval_wikibio_emb_cos_sim": 0.7399263978004456,
      "eval_wikibio_emb_cos_sim_sem": 0.00879182403829389,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8208425045013428,
      "eval_wikibio_n_ngrams_match_1": 10.466,
      "eval_wikibio_n_ngrams_match_2": 3.5,
      "eval_wikibio_n_ngrams_match_3": 1.236,
      "eval_wikibio_num_pred_words": 36.89,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.64264626143456,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3669505356921922,
      "eval_wikibio_runtime": 10.1421,
      "eval_wikibio_samples_per_second": 49.299,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.33002352040099997,
      "eval_wikibio_token_set_f1_sem": 0.004958464144499835,
      "eval_wikibio_token_set_precision": 0.3410126613469393,
      "eval_wikibio_token_set_recall": 0.3333282468945071,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 108125
    },
    {
      "epoch": 20.76,
      "eval_nq_accuracy": 0.52240625,
      "eval_nq_bleu_score": 11.666710848272931,
      "eval_nq_bleu_score_sem": 0.47769848330662973,
      "eval_nq_emb_cos_sim": 0.8327779173851013,
      "eval_nq_emb_cos_sim_sem": 0.006809925121896786,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.215294122695923,
      "eval_nq_n_ngrams_match_1": 23.212,
      "eval_nq_n_ngrams_match_2": 8.394,
      "eval_nq_n_ngrams_match_3": 3.826,
      "eval_nq_num_pred_words": 49.362,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.164104086262341,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4465019938306465,
      "eval_nq_runtime": 10.4987,
      "eval_nq_samples_per_second": 47.625,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.462062337946061,
      "eval_nq_token_set_f1_sem": 0.004770450278674321,
      "eval_nq_token_set_precision": 0.4180090124924598,
      "eval_nq_token_set_recall": 0.5230308971480412,
      "eval_nq_true_num_tokens": 64.0,
      "step": 108125
    },
    {
      "epoch": 20.76,
      "learning_rate": 0.001,
      "loss": 2.594,
      "step": 108132
    },
    {
      "epoch": 20.76,
      "learning_rate": 0.001,
      "loss": 2.5848,
      "step": 108144
    },
    {
      "epoch": 20.77,
      "learning_rate": 0.001,
      "loss": 2.5946,
      "step": 108156
    },
    {
      "epoch": 20.77,
      "learning_rate": 0.001,
      "loss": 2.5957,
      "step": 108168
    },
    {
      "epoch": 20.77,
      "learning_rate": 0.001,
      "loss": 2.5875,
      "step": 108180
    },
    {
      "epoch": 20.77,
      "learning_rate": 0.001,
      "loss": 2.6001,
      "step": 108192
    },
    {
      "epoch": 20.78,
      "learning_rate": 0.001,
      "loss": 2.5924,
      "step": 108204
    },
    {
      "epoch": 20.78,
      "learning_rate": 0.001,
      "loss": 2.5955,
      "step": 108216
    },
    {
      "epoch": 20.78,
      "learning_rate": 0.001,
      "loss": 2.5963,
      "step": 108228
    },
    {
      "epoch": 20.78,
      "learning_rate": 0.001,
      "loss": 2.5909,
      "step": 108240
    },
    {
      "epoch": 20.79,
      "learning_rate": 0.001,
      "loss": 2.5954,
      "step": 108252
    },
    {
      "epoch": 20.79,
      "learning_rate": 0.001,
      "loss": 2.5976,
      "step": 108264
    },
    {
      "epoch": 20.79,
      "learning_rate": 0.001,
      "loss": 2.5986,
      "step": 108276
    },
    {
      "epoch": 20.79,
      "learning_rate": 0.001,
      "loss": 2.5904,
      "step": 108288
    },
    {
      "epoch": 20.79,
      "learning_rate": 0.001,
      "loss": 2.5883,
      "step": 108300
    },
    {
      "epoch": 20.8,
      "learning_rate": 0.001,
      "loss": 2.5968,
      "step": 108312
    },
    {
      "epoch": 20.8,
      "learning_rate": 0.001,
      "loss": 2.5805,
      "step": 108324
    },
    {
      "epoch": 20.8,
      "learning_rate": 0.001,
      "loss": 2.5921,
      "step": 108336
    },
    {
      "epoch": 20.8,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 108348
    },
    {
      "epoch": 20.81,
      "learning_rate": 0.001,
      "loss": 2.5838,
      "step": 108360
    },
    {
      "epoch": 20.81,
      "learning_rate": 0.001,
      "loss": 2.5947,
      "step": 108372
    },
    {
      "epoch": 20.81,
      "learning_rate": 0.001,
      "loss": 2.5857,
      "step": 108384
    },
    {
      "epoch": 20.81,
      "learning_rate": 0.001,
      "loss": 2.5777,
      "step": 108396
    },
    {
      "epoch": 20.82,
      "learning_rate": 0.001,
      "loss": 2.5895,
      "step": 108408
    },
    {
      "epoch": 20.82,
      "learning_rate": 0.001,
      "loss": 2.591,
      "step": 108420
    },
    {
      "epoch": 20.82,
      "learning_rate": 0.001,
      "loss": 2.5936,
      "step": 108432
    },
    {
      "epoch": 20.82,
      "learning_rate": 0.001,
      "loss": 2.5923,
      "step": 108444
    },
    {
      "epoch": 20.82,
      "learning_rate": 0.001,
      "loss": 2.5931,
      "step": 108456
    },
    {
      "epoch": 20.83,
      "learning_rate": 0.001,
      "loss": 2.59,
      "step": 108468
    },
    {
      "epoch": 20.83,
      "learning_rate": 0.001,
      "loss": 2.5901,
      "step": 108480
    },
    {
      "epoch": 20.83,
      "learning_rate": 0.001,
      "loss": 2.6029,
      "step": 108492
    },
    {
      "epoch": 20.83,
      "learning_rate": 0.001,
      "loss": 2.5962,
      "step": 108504
    },
    {
      "epoch": 20.84,
      "learning_rate": 0.001,
      "loss": 2.5937,
      "step": 108516
    },
    {
      "epoch": 20.84,
      "learning_rate": 0.001,
      "loss": 2.5955,
      "step": 108528
    },
    {
      "epoch": 20.84,
      "learning_rate": 0.001,
      "loss": 2.5903,
      "step": 108540
    },
    {
      "epoch": 20.84,
      "learning_rate": 0.001,
      "loss": 2.5835,
      "step": 108552
    },
    {
      "epoch": 20.85,
      "learning_rate": 0.001,
      "loss": 2.5881,
      "step": 108564
    },
    {
      "epoch": 20.85,
      "learning_rate": 0.001,
      "loss": 2.5987,
      "step": 108576
    },
    {
      "epoch": 20.85,
      "learning_rate": 0.001,
      "loss": 2.597,
      "step": 108588
    },
    {
      "epoch": 20.85,
      "learning_rate": 0.001,
      "loss": 2.5909,
      "step": 108600
    },
    {
      "epoch": 20.85,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 108612
    },
    {
      "epoch": 20.86,
      "learning_rate": 0.001,
      "loss": 2.5849,
      "step": 108624
    },
    {
      "epoch": 20.86,
      "learning_rate": 0.001,
      "loss": 2.5927,
      "step": 108636
    },
    {
      "epoch": 20.86,
      "learning_rate": 0.001,
      "loss": 2.5907,
      "step": 108648
    },
    {
      "epoch": 20.86,
      "learning_rate": 0.001,
      "loss": 2.5951,
      "step": 108660
    },
    {
      "epoch": 20.87,
      "learning_rate": 0.001,
      "loss": 2.5949,
      "step": 108672
    },
    {
      "epoch": 20.87,
      "learning_rate": 0.001,
      "loss": 2.5944,
      "step": 108684
    },
    {
      "epoch": 20.87,
      "learning_rate": 0.001,
      "loss": 2.5932,
      "step": 108696
    },
    {
      "epoch": 20.87,
      "learning_rate": 0.001,
      "loss": 2.5971,
      "step": 108708
    },
    {
      "epoch": 20.88,
      "learning_rate": 0.001,
      "loss": 2.5941,
      "step": 108720
    },
    {
      "epoch": 20.88,
      "learning_rate": 0.001,
      "loss": 2.5927,
      "step": 108732
    },
    {
      "epoch": 20.88,
      "learning_rate": 0.001,
      "loss": 2.592,
      "step": 108744
    },
    {
      "epoch": 20.88,
      "eval_ag_news_accuracy": 0.31871875,
      "eval_ag_news_bleu_score": 4.873370447428463,
      "eval_ag_news_bleu_score_sem": 0.15098036946663992,
      "eval_ag_news_emb_cos_sim": 0.8129647970199585,
      "eval_ag_news_emb_cos_sim_sem": 0.006730886610048699,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5923500061035156,
      "eval_ag_news_n_ngrams_match_1": 14.048,
      "eval_ag_news_n_ngrams_match_2": 3.15,
      "eval_ag_news_n_ngrams_match_3": 0.902,
      "eval_ag_news_num_pred_words": 46.618,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.31932635634103,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3484963346129579,
      "eval_ag_news_runtime": 10.5771,
      "eval_ag_news_samples_per_second": 47.272,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3504828600643719,
      "eval_ag_news_token_set_f1_sem": 0.0043611171316939555,
      "eval_ag_news_token_set_precision": 0.33493982325945754,
      "eval_ag_news_token_set_recall": 0.3827705621636813,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 108750
    },
    {
      "epoch": 20.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.112625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9707463777623873,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11054395141998996,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6669542193412781,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00893271766014327,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.27938175201416,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.036,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.776,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.634,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.762,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.559347359919986,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2097143809114018,
      "eval_anthropic_toxic_prompts_runtime": 9.9603,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.199,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35240630743276874,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006275436050904172,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42992016538312583,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32449227795144736,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 108750
    },
    {
      "epoch": 20.88,
      "eval_arxiv_accuracy": 0.348125,
      "eval_arxiv_bleu_score": 4.400249783797118,
      "eval_arxiv_bleu_score_sem": 0.12567661947030506,
      "eval_arxiv_emb_cos_sim": 0.7580965757369995,
      "eval_arxiv_emb_cos_sim_sem": 0.007996936451011953,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4323618412017822,
      "eval_arxiv_n_ngrams_match_1": 14.994,
      "eval_arxiv_n_ngrams_match_2": 2.956,
      "eval_arxiv_n_ngrams_match_3": 0.692,
      "eval_arxiv_num_pred_words": 40.414,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.949654664029552,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35805403599114105,
      "eval_arxiv_runtime": 10.3656,
      "eval_arxiv_samples_per_second": 48.236,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.349453693463863,
      "eval_arxiv_token_set_f1_sem": 0.0042896941451422265,
      "eval_arxiv_token_set_precision": 0.30031323232159085,
      "eval_arxiv_token_set_recall": 0.43436994637610643,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 108750
    },
    {
      "epoch": 20.88,
      "eval_python_code_alpaca_accuracy": 0.15665625,
      "eval_python_code_alpaca_bleu_score": 4.371294824286917,
      "eval_python_code_alpaca_bleu_score_sem": 0.1365779538529853,
      "eval_python_code_alpaca_emb_cos_sim": 0.7505536079406738,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00804314981185964,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9335525035858154,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.668,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.73,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.848,
      "eval_python_code_alpaca_num_pred_words": 43.104,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.794278785214857,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32916010935591244,
      "eval_python_code_alpaca_runtime": 11.1488,
      "eval_python_code_alpaca_samples_per_second": 44.848,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.46798239105152967,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005458406793191103,
      "eval_python_code_alpaca_token_set_precision": 0.5250172107024541,
      "eval_python_code_alpaca_token_set_recall": 0.44627157475698254,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 108750
    },
    {
      "epoch": 20.88,
      "eval_wikibio_accuracy": 0.32478125,
      "eval_wikibio_bleu_score": 6.379616694657736,
      "eval_wikibio_bleu_score_sem": 0.23158062316896835,
      "eval_wikibio_emb_cos_sim": 0.7379165887832642,
      "eval_wikibio_emb_cos_sim_sem": 0.008865217506619548,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7640633583068848,
      "eval_wikibio_n_ngrams_match_1": 10.296,
      "eval_wikibio_n_ngrams_match_2": 3.59,
      "eval_wikibio_n_ngrams_match_3": 1.354,
      "eval_wikibio_num_pred_words": 36.804,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.12329586112143,
      "eval_wikibio_pred_num_tokens": 62.9921875,
      "eval_wikibio_rouge_score": 0.3595443509799587,
      "eval_wikibio_runtime": 10.0145,
      "eval_wikibio_samples_per_second": 49.928,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.32544783434585106,
      "eval_wikibio_token_set_f1_sem": 0.005209346479870393,
      "eval_wikibio_token_set_precision": 0.33508837052885965,
      "eval_wikibio_token_set_recall": 0.3302157900476604,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 108750
    },
    {
      "epoch": 20.88,
      "eval_nq_accuracy": 0.524625,
      "eval_nq_bleu_score": 11.509977937884306,
      "eval_nq_bleu_score_sem": 0.4798335647255485,
      "eval_nq_emb_cos_sim": 0.8304468989372253,
      "eval_nq_emb_cos_sim_sem": 0.007206013960212579,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2169437408447266,
      "eval_nq_n_ngrams_match_1": 22.826,
      "eval_nq_n_ngrams_match_2": 8.26,
      "eval_nq_n_ngrams_match_3": 3.77,
      "eval_nq_num_pred_words": 49.15,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.179233834403153,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44084884462137924,
      "eval_nq_runtime": 11.6632,
      "eval_nq_samples_per_second": 42.87,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4570505234090874,
      "eval_nq_token_set_f1_sem": 0.004903078945843605,
      "eval_nq_token_set_precision": 0.4148243655339545,
      "eval_nq_token_set_recall": 0.5166141597770819,
      "eval_nq_true_num_tokens": 64.0,
      "step": 108750
    },
    {
      "epoch": 20.88,
      "learning_rate": 0.001,
      "loss": 2.5986,
      "step": 108756
    },
    {
      "epoch": 20.88,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 108768
    },
    {
      "epoch": 20.89,
      "learning_rate": 0.001,
      "loss": 2.5871,
      "step": 108780
    },
    {
      "epoch": 20.89,
      "learning_rate": 0.001,
      "loss": 2.6143,
      "step": 108792
    },
    {
      "epoch": 20.89,
      "learning_rate": 0.001,
      "loss": 2.5976,
      "step": 108804
    },
    {
      "epoch": 20.89,
      "learning_rate": 0.001,
      "loss": 2.5864,
      "step": 108816
    },
    {
      "epoch": 20.9,
      "learning_rate": 0.001,
      "loss": 2.5953,
      "step": 108828
    },
    {
      "epoch": 20.9,
      "learning_rate": 0.001,
      "loss": 2.5959,
      "step": 108840
    },
    {
      "epoch": 20.9,
      "learning_rate": 0.001,
      "loss": 2.5949,
      "step": 108852
    },
    {
      "epoch": 20.9,
      "learning_rate": 0.001,
      "loss": 2.5964,
      "step": 108864
    },
    {
      "epoch": 20.91,
      "learning_rate": 0.001,
      "loss": 2.5975,
      "step": 108876
    },
    {
      "epoch": 20.91,
      "learning_rate": 0.001,
      "loss": 2.6015,
      "step": 108888
    },
    {
      "epoch": 20.91,
      "learning_rate": 0.001,
      "loss": 2.5889,
      "step": 108900
    },
    {
      "epoch": 20.91,
      "learning_rate": 0.001,
      "loss": 2.5898,
      "step": 108912
    },
    {
      "epoch": 20.91,
      "learning_rate": 0.001,
      "loss": 2.5955,
      "step": 108924
    },
    {
      "epoch": 20.92,
      "learning_rate": 0.001,
      "loss": 2.5904,
      "step": 108936
    },
    {
      "epoch": 20.92,
      "learning_rate": 0.001,
      "loss": 2.5965,
      "step": 108948
    },
    {
      "epoch": 20.92,
      "learning_rate": 0.001,
      "loss": 2.5903,
      "step": 108960
    },
    {
      "epoch": 20.92,
      "learning_rate": 0.001,
      "loss": 2.5975,
      "step": 108972
    },
    {
      "epoch": 20.93,
      "learning_rate": 0.001,
      "loss": 2.5952,
      "step": 108984
    },
    {
      "epoch": 20.93,
      "learning_rate": 0.001,
      "loss": 2.5972,
      "step": 108996
    },
    {
      "epoch": 20.93,
      "learning_rate": 0.001,
      "loss": 2.5893,
      "step": 109008
    },
    {
      "epoch": 20.93,
      "learning_rate": 0.001,
      "loss": 2.5887,
      "step": 109020
    },
    {
      "epoch": 20.94,
      "learning_rate": 0.001,
      "loss": 2.5864,
      "step": 109032
    },
    {
      "epoch": 20.94,
      "learning_rate": 0.001,
      "loss": 2.5989,
      "step": 109044
    },
    {
      "epoch": 20.94,
      "learning_rate": 0.001,
      "loss": 2.5898,
      "step": 109056
    },
    {
      "epoch": 20.94,
      "learning_rate": 0.001,
      "loss": 2.5829,
      "step": 109068
    },
    {
      "epoch": 20.94,
      "learning_rate": 0.001,
      "loss": 2.5828,
      "step": 109080
    },
    {
      "epoch": 20.95,
      "learning_rate": 0.001,
      "loss": 2.5965,
      "step": 109092
    },
    {
      "epoch": 20.95,
      "learning_rate": 0.001,
      "loss": 2.5896,
      "step": 109104
    },
    {
      "epoch": 20.95,
      "learning_rate": 0.001,
      "loss": 2.5889,
      "step": 109116
    },
    {
      "epoch": 20.95,
      "learning_rate": 0.001,
      "loss": 2.5948,
      "step": 109128
    },
    {
      "epoch": 20.96,
      "learning_rate": 0.001,
      "loss": 2.5994,
      "step": 109140
    },
    {
      "epoch": 20.96,
      "learning_rate": 0.001,
      "loss": 2.5854,
      "step": 109152
    },
    {
      "epoch": 20.96,
      "learning_rate": 0.001,
      "loss": 2.6009,
      "step": 109164
    },
    {
      "epoch": 20.96,
      "learning_rate": 0.001,
      "loss": 2.5927,
      "step": 109176
    },
    {
      "epoch": 20.97,
      "learning_rate": 0.001,
      "loss": 2.5894,
      "step": 109188
    },
    {
      "epoch": 20.97,
      "learning_rate": 0.001,
      "loss": 2.5915,
      "step": 109200
    },
    {
      "epoch": 20.97,
      "learning_rate": 0.001,
      "loss": 2.5844,
      "step": 109212
    },
    {
      "epoch": 20.97,
      "learning_rate": 0.001,
      "loss": 2.5824,
      "step": 109224
    },
    {
      "epoch": 20.97,
      "learning_rate": 0.001,
      "loss": 2.5924,
      "step": 109236
    },
    {
      "epoch": 20.98,
      "learning_rate": 0.001,
      "loss": 2.5927,
      "step": 109248
    },
    {
      "epoch": 20.98,
      "learning_rate": 0.001,
      "loss": 2.5919,
      "step": 109260
    },
    {
      "epoch": 20.98,
      "learning_rate": 0.001,
      "loss": 2.5869,
      "step": 109272
    },
    {
      "epoch": 20.98,
      "learning_rate": 0.001,
      "loss": 2.5921,
      "step": 109284
    },
    {
      "epoch": 20.99,
      "learning_rate": 0.001,
      "loss": 2.5836,
      "step": 109296
    },
    {
      "epoch": 20.99,
      "learning_rate": 0.001,
      "loss": 2.5932,
      "step": 109308
    },
    {
      "epoch": 20.99,
      "learning_rate": 0.001,
      "loss": 2.5817,
      "step": 109320
    },
    {
      "epoch": 20.99,
      "learning_rate": 0.001,
      "loss": 2.5863,
      "step": 109332
    },
    {
      "epoch": 21.0,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 109344
    },
    {
      "epoch": 21.0,
      "learning_rate": 0.001,
      "loss": 2.5905,
      "step": 109356
    },
    {
      "epoch": 21.0,
      "learning_rate": 0.001,
      "loss": 2.6043,
      "step": 109368
    },
    {
      "epoch": 21.0,
      "eval_ag_news_accuracy": 0.3196875,
      "eval_ag_news_bleu_score": 4.730510371239236,
      "eval_ag_news_bleu_score_sem": 0.15493241022512916,
      "eval_ag_news_emb_cos_sim": 0.8005703091621399,
      "eval_ag_news_emb_cos_sim_sem": 0.0073144084105930115,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5906450748443604,
      "eval_ag_news_n_ngrams_match_1": 13.804,
      "eval_ag_news_n_ngrams_match_2": 3.012,
      "eval_ag_news_n_ngrams_match_3": 0.822,
      "eval_ag_news_num_pred_words": 45.954,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.2574571578766,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34282707021054426,
      "eval_ag_news_runtime": 10.4718,
      "eval_ag_news_samples_per_second": 47.747,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.34755710795872863,
      "eval_ag_news_token_set_f1_sem": 0.004478178591860787,
      "eval_ag_news_token_set_precision": 0.3275949553581574,
      "eval_ag_news_token_set_recall": 0.39171616518682495,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 109375
    },
    {
      "epoch": 21.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.1121875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.008519808706273,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10933591519156527,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6628186702728271,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00969850482416196,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2805299758911133,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.158,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.876,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.672,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.604,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.589860951579723,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20973456911770016,
      "eval_anthropic_toxic_prompts_runtime": 10.3079,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.506,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35876685150393867,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00638851690340122,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4328364387192545,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3353883399722931,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 109375
    },
    {
      "epoch": 21.0,
      "eval_arxiv_accuracy": 0.347125,
      "eval_arxiv_bleu_score": 4.19404361346,
      "eval_arxiv_bleu_score_sem": 0.11947950330485461,
      "eval_arxiv_emb_cos_sim": 0.7596526741981506,
      "eval_arxiv_emb_cos_sim_sem": 0.006617105924137477,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4264883995056152,
      "eval_arxiv_n_ngrams_match_1": 14.846,
      "eval_arxiv_n_ngrams_match_2": 2.856,
      "eval_arxiv_n_ngrams_match_3": 0.62,
      "eval_arxiv_num_pred_words": 40.36,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.768406468246848,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35357518687409834,
      "eval_arxiv_runtime": 10.4697,
      "eval_arxiv_samples_per_second": 47.757,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.3498007105998447,
      "eval_arxiv_token_set_f1_sem": 0.00410151461643203,
      "eval_arxiv_token_set_precision": 0.2999278934988312,
      "eval_arxiv_token_set_recall": 0.4382757289031304,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 109375
    },
    {
      "epoch": 21.0,
      "eval_python_code_alpaca_accuracy": 0.154625,
      "eval_python_code_alpaca_bleu_score": 4.113769871700874,
      "eval_python_code_alpaca_bleu_score_sem": 0.12810463664688804,
      "eval_python_code_alpaca_emb_cos_sim": 0.7502652406692505,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009955928555243387,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9653851985931396,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.708,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.726,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.82,
      "eval_python_code_alpaca_num_pred_words": 44.962,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 19.402175492932308,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32136878047698314,
      "eval_python_code_alpaca_runtime": 10.3949,
      "eval_python_code_alpaca_samples_per_second": 48.101,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.4684747450950481,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005246657581335209,
      "eval_python_code_alpaca_token_set_precision": 0.5298615094570119,
      "eval_python_code_alpaca_token_set_recall": 0.4440887552401949,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 109375
    },
    {
      "epoch": 21.0,
      "eval_wikibio_accuracy": 0.32084375,
      "eval_wikibio_bleu_score": 5.700113263717459,
      "eval_wikibio_bleu_score_sem": 0.19907505100010206,
      "eval_wikibio_emb_cos_sim": 0.7308796644210815,
      "eval_wikibio_emb_cos_sim_sem": 0.009892154446545834,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7788538932800293,
      "eval_wikibio_n_ngrams_match_1": 9.648,
      "eval_wikibio_n_ngrams_match_2": 3.236,
      "eval_wikibio_n_ngrams_match_3": 1.146,
      "eval_wikibio_num_pred_words": 35.132,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.76585264222076,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3387349133127342,
      "eval_wikibio_runtime": 10.5438,
      "eval_wikibio_samples_per_second": 47.421,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.30832923022014436,
      "eval_wikibio_token_set_f1_sem": 0.005774482559036823,
      "eval_wikibio_token_set_precision": 0.31246205377761976,
      "eval_wikibio_token_set_recall": 0.3205897710855152,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 109375
    },
    {
      "epoch": 21.0,
      "eval_nq_accuracy": 0.5234375,
      "eval_nq_bleu_score": 11.772185945630039,
      "eval_nq_bleu_score_sem": 0.4861501353722335,
      "eval_nq_emb_cos_sim": 0.828109622001648,
      "eval_nq_emb_cos_sim_sem": 0.007422867398335226,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.216416120529175,
      "eval_nq_n_ngrams_match_1": 22.762,
      "eval_nq_n_ngrams_match_2": 8.378,
      "eval_nq_n_ngrams_match_3": 3.934,
      "eval_nq_num_pred_words": 49.014,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.174391961598474,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4390581071380426,
      "eval_nq_runtime": 10.6081,
      "eval_nq_samples_per_second": 47.134,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.45611953282382467,
      "eval_nq_token_set_f1_sem": 0.005094695798645029,
      "eval_nq_token_set_precision": 0.41226724652556246,
      "eval_nq_token_set_recall": 0.5193156242655752,
      "eval_nq_true_num_tokens": 64.0,
      "step": 109375
    },
    {
      "epoch": 21.0,
      "learning_rate": 0.001,
      "loss": 2.5761,
      "step": 109380
    },
    {
      "epoch": 21.0,
      "learning_rate": 0.001,
      "loss": 2.5736,
      "step": 109392
    },
    {
      "epoch": 21.01,
      "learning_rate": 0.001,
      "loss": 2.5761,
      "step": 109404
    },
    {
      "epoch": 21.01,
      "learning_rate": 0.001,
      "loss": 2.5678,
      "step": 109416
    },
    {
      "epoch": 21.01,
      "learning_rate": 0.001,
      "loss": 2.5752,
      "step": 109428
    },
    {
      "epoch": 21.01,
      "learning_rate": 0.001,
      "loss": 2.5707,
      "step": 109440
    },
    {
      "epoch": 21.02,
      "learning_rate": 0.001,
      "loss": 2.5797,
      "step": 109452
    },
    {
      "epoch": 21.02,
      "learning_rate": 0.001,
      "loss": 2.5759,
      "step": 109464
    },
    {
      "epoch": 21.02,
      "learning_rate": 0.001,
      "loss": 2.5779,
      "step": 109476
    },
    {
      "epoch": 21.02,
      "learning_rate": 0.001,
      "loss": 2.5827,
      "step": 109488
    },
    {
      "epoch": 21.03,
      "learning_rate": 0.001,
      "loss": 2.5625,
      "step": 109500
    },
    {
      "epoch": 21.03,
      "learning_rate": 0.001,
      "loss": 2.5727,
      "step": 109512
    },
    {
      "epoch": 21.03,
      "learning_rate": 0.001,
      "loss": 2.5668,
      "step": 109524
    },
    {
      "epoch": 21.03,
      "learning_rate": 0.001,
      "loss": 2.5755,
      "step": 109536
    },
    {
      "epoch": 21.03,
      "learning_rate": 0.001,
      "loss": 2.5776,
      "step": 109548
    },
    {
      "epoch": 21.04,
      "learning_rate": 0.001,
      "loss": 2.5773,
      "step": 109560
    },
    {
      "epoch": 21.04,
      "learning_rate": 0.001,
      "loss": 2.5795,
      "step": 109572
    },
    {
      "epoch": 21.04,
      "learning_rate": 0.001,
      "loss": 2.5734,
      "step": 109584
    },
    {
      "epoch": 21.04,
      "learning_rate": 0.001,
      "loss": 2.5727,
      "step": 109596
    },
    {
      "epoch": 21.05,
      "learning_rate": 0.001,
      "loss": 2.5815,
      "step": 109608
    },
    {
      "epoch": 21.05,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 109620
    },
    {
      "epoch": 21.05,
      "learning_rate": 0.001,
      "loss": 2.5762,
      "step": 109632
    },
    {
      "epoch": 21.05,
      "learning_rate": 0.001,
      "loss": 2.5624,
      "step": 109644
    },
    {
      "epoch": 21.06,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 109656
    },
    {
      "epoch": 21.06,
      "learning_rate": 0.001,
      "loss": 2.5547,
      "step": 109668
    },
    {
      "epoch": 21.06,
      "learning_rate": 0.001,
      "loss": 2.5775,
      "step": 109680
    },
    {
      "epoch": 21.06,
      "learning_rate": 0.001,
      "loss": 2.5729,
      "step": 109692
    },
    {
      "epoch": 21.06,
      "learning_rate": 0.001,
      "loss": 2.5743,
      "step": 109704
    },
    {
      "epoch": 21.07,
      "learning_rate": 0.001,
      "loss": 2.5728,
      "step": 109716
    },
    {
      "epoch": 21.07,
      "learning_rate": 0.001,
      "loss": 2.5842,
      "step": 109728
    },
    {
      "epoch": 21.07,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 109740
    },
    {
      "epoch": 21.07,
      "learning_rate": 0.001,
      "loss": 2.5823,
      "step": 109752
    },
    {
      "epoch": 21.08,
      "learning_rate": 0.001,
      "loss": 2.576,
      "step": 109764
    },
    {
      "epoch": 21.08,
      "learning_rate": 0.001,
      "loss": 2.57,
      "step": 109776
    },
    {
      "epoch": 21.08,
      "learning_rate": 0.001,
      "loss": 2.5748,
      "step": 109788
    },
    {
      "epoch": 21.08,
      "learning_rate": 0.001,
      "loss": 2.5776,
      "step": 109800
    },
    {
      "epoch": 21.09,
      "learning_rate": 0.001,
      "loss": 2.5847,
      "step": 109812
    },
    {
      "epoch": 21.09,
      "learning_rate": 0.001,
      "loss": 2.5664,
      "step": 109824
    },
    {
      "epoch": 21.09,
      "learning_rate": 0.001,
      "loss": 2.5742,
      "step": 109836
    },
    {
      "epoch": 21.09,
      "learning_rate": 0.001,
      "loss": 2.5701,
      "step": 109848
    },
    {
      "epoch": 21.09,
      "learning_rate": 0.001,
      "loss": 2.5723,
      "step": 109860
    },
    {
      "epoch": 21.1,
      "learning_rate": 0.001,
      "loss": 2.5624,
      "step": 109872
    },
    {
      "epoch": 21.1,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 109884
    },
    {
      "epoch": 21.1,
      "learning_rate": 0.001,
      "loss": 2.5749,
      "step": 109896
    },
    {
      "epoch": 21.1,
      "learning_rate": 0.001,
      "loss": 2.5818,
      "step": 109908
    },
    {
      "epoch": 21.11,
      "learning_rate": 0.001,
      "loss": 2.5769,
      "step": 109920
    },
    {
      "epoch": 21.11,
      "learning_rate": 0.001,
      "loss": 2.5793,
      "step": 109932
    },
    {
      "epoch": 21.11,
      "learning_rate": 0.001,
      "loss": 2.5702,
      "step": 109944
    },
    {
      "epoch": 21.11,
      "learning_rate": 0.001,
      "loss": 2.5792,
      "step": 109956
    },
    {
      "epoch": 21.12,
      "learning_rate": 0.001,
      "loss": 2.5654,
      "step": 109968
    },
    {
      "epoch": 21.12,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 109980
    },
    {
      "epoch": 21.12,
      "learning_rate": 0.001,
      "loss": 2.5736,
      "step": 109992
    },
    {
      "epoch": 21.12,
      "eval_ag_news_accuracy": 0.31775,
      "eval_ag_news_bleu_score": 4.702167748990216,
      "eval_ag_news_bleu_score_sem": 0.15081791411931167,
      "eval_ag_news_emb_cos_sim": 0.8027331829071045,
      "eval_ag_news_emb_cos_sim_sem": 0.0073558143965324245,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.606444835662842,
      "eval_ag_news_n_ngrams_match_1": 13.806,
      "eval_ag_news_n_ngrams_match_2": 2.974,
      "eval_ag_news_n_ngrams_match_3": 0.864,
      "eval_ag_news_num_pred_words": 46.518,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.83486575612089,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34237616129242276,
      "eval_ag_news_runtime": 12.0025,
      "eval_ag_news_samples_per_second": 41.658,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.3466558310744491,
      "eval_ag_news_token_set_f1_sem": 0.00435949853196166,
      "eval_ag_news_token_set_precision": 0.3306004090612312,
      "eval_ag_news_token_set_recall": 0.3809390945975503,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 110000
    },
    {
      "epoch": 21.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.11384375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9259785094080306,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10997722118238093,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6563278436660767,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010086220269058753,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.265777349472046,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.134,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.826,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.642,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.292,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.20046999828643,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20985560656279834,
      "eval_anthropic_toxic_prompts_runtime": 17.799,
      "eval_anthropic_toxic_prompts_samples_per_second": 28.091,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.056,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35484068968376375,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006543849288808072,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42912887243858927,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32790463746217585,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 110000
    },
    {
      "epoch": 21.12,
      "eval_arxiv_accuracy": 0.34540625,
      "eval_arxiv_bleu_score": 4.261046904853632,
      "eval_arxiv_bleu_score_sem": 0.1238057453247579,
      "eval_arxiv_emb_cos_sim": 0.7614809274673462,
      "eval_arxiv_emb_cos_sim_sem": 0.006998047580225161,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4448392391204834,
      "eval_arxiv_n_ngrams_match_1": 14.952,
      "eval_arxiv_n_ngrams_match_2": 2.87,
      "eval_arxiv_n_ngrams_match_3": 0.616,
      "eval_arxiv_num_pred_words": 40.082,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.338245077852708,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.356331577107159,
      "eval_arxiv_runtime": 17.2601,
      "eval_arxiv_samples_per_second": 28.969,
      "eval_arxiv_steps_per_second": 0.058,
      "eval_arxiv_token_set_f1": 0.3496846274647558,
      "eval_arxiv_token_set_f1_sem": 0.004071850335532273,
      "eval_arxiv_token_set_precision": 0.3018160043666443,
      "eval_arxiv_token_set_recall": 0.4319014158568833,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 110000
    },
    {
      "epoch": 21.12,
      "eval_python_code_alpaca_accuracy": 0.15865625,
      "eval_python_code_alpaca_bleu_score": 4.456097075892867,
      "eval_python_code_alpaca_bleu_score_sem": 0.13793966607153263,
      "eval_python_code_alpaca_emb_cos_sim": 0.7517263889312744,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008530632115537784,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9022841453552246,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.718,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.858,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.932,
      "eval_python_code_alpaca_num_pred_words": 44.3,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.215705205457805,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3246414445098242,
      "eval_python_code_alpaca_runtime": 12.3718,
      "eval_python_code_alpaca_samples_per_second": 40.415,
      "eval_python_code_alpaca_steps_per_second": 0.081,
      "eval_python_code_alpaca_token_set_f1": 0.4695769726186936,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005498479940050691,
      "eval_python_code_alpaca_token_set_precision": 0.5315452489571844,
      "eval_python_code_alpaca_token_set_recall": 0.4434547686631533,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 110000
    },
    {
      "epoch": 21.12,
      "eval_wikibio_accuracy": 0.32053125,
      "eval_wikibio_bleu_score": 6.233939655874294,
      "eval_wikibio_bleu_score_sem": 0.21826220314593694,
      "eval_wikibio_emb_cos_sim": 0.7423006892204285,
      "eval_wikibio_emb_cos_sim_sem": 0.00883132814769425,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8176321983337402,
      "eval_wikibio_n_ngrams_match_1": 10.396,
      "eval_wikibio_n_ngrams_match_2": 3.524,
      "eval_wikibio_n_ngrams_match_3": 1.282,
      "eval_wikibio_num_pred_words": 36.378,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.49635433920703,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3626337127284205,
      "eval_wikibio_runtime": 15.1044,
      "eval_wikibio_samples_per_second": 33.103,
      "eval_wikibio_steps_per_second": 0.066,
      "eval_wikibio_token_set_f1": 0.32924534421193435,
      "eval_wikibio_token_set_f1_sem": 0.005255782938895232,
      "eval_wikibio_token_set_precision": 0.33773896736532916,
      "eval_wikibio_token_set_recall": 0.3390088046755665,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 110000
    },
    {
      "epoch": 21.12,
      "eval_nq_accuracy": 0.525,
      "eval_nq_bleu_score": 11.387482124332884,
      "eval_nq_bleu_score_sem": 0.4816751599444313,
      "eval_nq_emb_cos_sim": 0.826781153678894,
      "eval_nq_emb_cos_sim_sem": 0.007380899015385994,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2170486450195312,
      "eval_nq_n_ngrams_match_1": 22.874,
      "eval_nq_n_ngrams_match_2": 8.34,
      "eval_nq_n_ngrams_match_3": 3.754,
      "eval_nq_num_pred_words": 49.292,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.180196824863867,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.441090070517958,
      "eval_nq_runtime": 11.5755,
      "eval_nq_samples_per_second": 43.195,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4568296963756491,
      "eval_nq_token_set_f1_sem": 0.005050259369300929,
      "eval_nq_token_set_precision": 0.4139612552603517,
      "eval_nq_token_set_recall": 0.5175484476628561,
      "eval_nq_true_num_tokens": 64.0,
      "step": 110000
    },
    {
      "epoch": 21.12,
      "learning_rate": 0.001,
      "loss": 2.57,
      "step": 110004
    },
    {
      "epoch": 21.12,
      "learning_rate": 0.001,
      "loss": 2.5801,
      "step": 110016
    },
    {
      "epoch": 21.13,
      "learning_rate": 0.001,
      "loss": 2.5897,
      "step": 110028
    },
    {
      "epoch": 21.13,
      "learning_rate": 0.001,
      "loss": 2.5614,
      "step": 110040
    },
    {
      "epoch": 21.13,
      "learning_rate": 0.001,
      "loss": 2.5782,
      "step": 110052
    },
    {
      "epoch": 21.13,
      "learning_rate": 0.001,
      "loss": 2.5854,
      "step": 110064
    },
    {
      "epoch": 21.14,
      "learning_rate": 0.001,
      "loss": 2.5781,
      "step": 110076
    },
    {
      "epoch": 21.14,
      "learning_rate": 0.001,
      "loss": 2.5763,
      "step": 110088
    },
    {
      "epoch": 21.14,
      "learning_rate": 0.001,
      "loss": 2.5847,
      "step": 110100
    },
    {
      "epoch": 21.14,
      "learning_rate": 0.001,
      "loss": 2.5859,
      "step": 110112
    },
    {
      "epoch": 21.15,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 110124
    },
    {
      "epoch": 21.15,
      "learning_rate": 0.001,
      "loss": 2.5742,
      "step": 110136
    },
    {
      "epoch": 21.15,
      "learning_rate": 0.001,
      "loss": 2.5826,
      "step": 110148
    },
    {
      "epoch": 21.15,
      "learning_rate": 0.001,
      "loss": 2.5816,
      "step": 110160
    },
    {
      "epoch": 21.15,
      "learning_rate": 0.001,
      "loss": 2.5789,
      "step": 110172
    },
    {
      "epoch": 21.16,
      "learning_rate": 0.001,
      "loss": 2.5779,
      "step": 110184
    },
    {
      "epoch": 21.16,
      "learning_rate": 0.001,
      "loss": 2.5816,
      "step": 110196
    },
    {
      "epoch": 21.16,
      "learning_rate": 0.001,
      "loss": 2.5784,
      "step": 110208
    },
    {
      "epoch": 21.16,
      "learning_rate": 0.001,
      "loss": 2.581,
      "step": 110220
    },
    {
      "epoch": 21.17,
      "learning_rate": 0.001,
      "loss": 2.581,
      "step": 110232
    },
    {
      "epoch": 21.17,
      "learning_rate": 0.001,
      "loss": 2.5775,
      "step": 110244
    },
    {
      "epoch": 21.17,
      "learning_rate": 0.001,
      "loss": 2.5828,
      "step": 110256
    },
    {
      "epoch": 21.17,
      "learning_rate": 0.001,
      "loss": 2.5739,
      "step": 110268
    },
    {
      "epoch": 21.18,
      "learning_rate": 0.001,
      "loss": 2.5747,
      "step": 110280
    },
    {
      "epoch": 21.18,
      "learning_rate": 0.001,
      "loss": 2.5673,
      "step": 110292
    },
    {
      "epoch": 21.18,
      "learning_rate": 0.001,
      "loss": 2.5828,
      "step": 110304
    },
    {
      "epoch": 21.18,
      "learning_rate": 0.001,
      "loss": 2.5812,
      "step": 110316
    },
    {
      "epoch": 21.18,
      "learning_rate": 0.001,
      "loss": 2.5835,
      "step": 110328
    },
    {
      "epoch": 21.19,
      "learning_rate": 0.001,
      "loss": 2.5749,
      "step": 110340
    },
    {
      "epoch": 21.19,
      "learning_rate": 0.001,
      "loss": 2.5905,
      "step": 110352
    },
    {
      "epoch": 21.19,
      "learning_rate": 0.001,
      "loss": 2.5875,
      "step": 110364
    },
    {
      "epoch": 21.19,
      "learning_rate": 0.001,
      "loss": 2.5772,
      "step": 110376
    },
    {
      "epoch": 21.2,
      "learning_rate": 0.001,
      "loss": 2.5816,
      "step": 110388
    },
    {
      "epoch": 21.2,
      "learning_rate": 0.001,
      "loss": 2.5726,
      "step": 110400
    },
    {
      "epoch": 21.2,
      "learning_rate": 0.001,
      "loss": 2.5642,
      "step": 110412
    },
    {
      "epoch": 21.2,
      "learning_rate": 0.001,
      "loss": 2.587,
      "step": 110424
    },
    {
      "epoch": 21.21,
      "learning_rate": 0.001,
      "loss": 2.5754,
      "step": 110436
    },
    {
      "epoch": 21.21,
      "learning_rate": 0.001,
      "loss": 2.5873,
      "step": 110448
    },
    {
      "epoch": 21.21,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 110460
    },
    {
      "epoch": 21.21,
      "learning_rate": 0.001,
      "loss": 2.5799,
      "step": 110472
    },
    {
      "epoch": 21.21,
      "learning_rate": 0.001,
      "loss": 2.5873,
      "step": 110484
    },
    {
      "epoch": 21.22,
      "learning_rate": 0.001,
      "loss": 2.5767,
      "step": 110496
    },
    {
      "epoch": 21.22,
      "learning_rate": 0.001,
      "loss": 2.5701,
      "step": 110508
    },
    {
      "epoch": 21.22,
      "learning_rate": 0.001,
      "loss": 2.5679,
      "step": 110520
    },
    {
      "epoch": 21.22,
      "learning_rate": 0.001,
      "loss": 2.58,
      "step": 110532
    },
    {
      "epoch": 21.23,
      "learning_rate": 0.001,
      "loss": 2.5881,
      "step": 110544
    },
    {
      "epoch": 21.23,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 110556
    },
    {
      "epoch": 21.23,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 110568
    },
    {
      "epoch": 21.23,
      "learning_rate": 0.001,
      "loss": 2.5783,
      "step": 110580
    },
    {
      "epoch": 21.24,
      "learning_rate": 0.001,
      "loss": 2.5766,
      "step": 110592
    },
    {
      "epoch": 21.24,
      "learning_rate": 0.001,
      "loss": 2.5782,
      "step": 110604
    },
    {
      "epoch": 21.24,
      "learning_rate": 0.001,
      "loss": 2.5824,
      "step": 110616
    },
    {
      "epoch": 21.24,
      "eval_ag_news_accuracy": 0.31928125,
      "eval_ag_news_bleu_score": 4.685418248974973,
      "eval_ag_news_bleu_score_sem": 0.15151162648658734,
      "eval_ag_news_emb_cos_sim": 0.7950277328491211,
      "eval_ag_news_emb_cos_sim_sem": 0.008317835311584313,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5835213661193848,
      "eval_ag_news_n_ngrams_match_1": 13.664,
      "eval_ag_news_n_ngrams_match_2": 2.99,
      "eval_ag_news_n_ngrams_match_3": 0.86,
      "eval_ag_news_num_pred_words": 46.146,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.000087395983975,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3398218563210976,
      "eval_ag_news_runtime": 11.5823,
      "eval_ag_news_samples_per_second": 43.169,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.3415182222972952,
      "eval_ag_news_token_set_f1_sem": 0.004371714021688004,
      "eval_ag_news_token_set_precision": 0.32511685373685956,
      "eval_ag_news_token_set_recall": 0.378285218091242,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 110625
    },
    {
      "epoch": 21.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.11253125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0056202436140365,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1178465298981663,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6523705720901489,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009578207181799738,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.289741277694702,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.99,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.82,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.168,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.835919706648728,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20736422846400374,
      "eval_anthropic_toxic_prompts_runtime": 10.1856,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.089,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.352508551236691,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065424098014101335,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42643162192414086,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.330258278791112,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 110625
    },
    {
      "epoch": 21.24,
      "eval_arxiv_accuracy": 0.348,
      "eval_arxiv_bleu_score": 4.053475405061131,
      "eval_arxiv_bleu_score_sem": 0.12518156857898258,
      "eval_arxiv_emb_cos_sim": 0.7488660216331482,
      "eval_arxiv_emb_cos_sim_sem": 0.00953834416564253,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.408355951309204,
      "eval_arxiv_n_ngrams_match_1": 14.282,
      "eval_arxiv_n_ngrams_match_2": 2.682,
      "eval_arxiv_n_ngrams_match_3": 0.59,
      "eval_arxiv_num_pred_words": 39.0,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.2155276036984,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3455712802885348,
      "eval_arxiv_runtime": 10.6547,
      "eval_arxiv_samples_per_second": 46.928,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.3395123837532162,
      "eval_arxiv_token_set_f1_sem": 0.00433835253524641,
      "eval_arxiv_token_set_precision": 0.2898921560301158,
      "eval_arxiv_token_set_recall": 0.4321801776109789,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 110625
    },
    {
      "epoch": 21.24,
      "eval_python_code_alpaca_accuracy": 0.15646875,
      "eval_python_code_alpaca_bleu_score": 4.400363717217963,
      "eval_python_code_alpaca_bleu_score_sem": 0.13434013825950306,
      "eval_python_code_alpaca_emb_cos_sim": 0.741919755935669,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010611431904688242,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.937800884246826,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.572,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.742,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.85,
      "eval_python_code_alpaca_num_pred_words": 42.246,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.87429388269237,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32680303249898707,
      "eval_python_code_alpaca_runtime": 11.6033,
      "eval_python_code_alpaca_samples_per_second": 43.091,
      "eval_python_code_alpaca_steps_per_second": 0.086,
      "eval_python_code_alpaca_token_set_f1": 0.46202769887690065,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005539596575597648,
      "eval_python_code_alpaca_token_set_precision": 0.5201054141111333,
      "eval_python_code_alpaca_token_set_recall": 0.43946663686839127,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 110625
    },
    {
      "epoch": 21.24,
      "eval_wikibio_accuracy": 0.32128125,
      "eval_wikibio_bleu_score": 5.852205602286266,
      "eval_wikibio_bleu_score_sem": 0.2096834733090805,
      "eval_wikibio_emb_cos_sim": 0.7330366969108582,
      "eval_wikibio_emb_cos_sim_sem": 0.010082510687845285,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.784851551055908,
      "eval_wikibio_n_ngrams_match_1": 9.952,
      "eval_wikibio_n_ngrams_match_2": 3.3,
      "eval_wikibio_n_ngrams_match_3": 1.194,
      "eval_wikibio_num_pred_words": 36.088,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.02913399513809,
      "eval_wikibio_pred_num_tokens": 62.9921875,
      "eval_wikibio_rouge_score": 0.34965874710505507,
      "eval_wikibio_runtime": 10.0436,
      "eval_wikibio_samples_per_second": 49.783,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3158548127246789,
      "eval_wikibio_token_set_f1_sem": 0.005649844857506648,
      "eval_wikibio_token_set_precision": 0.32367832119585815,
      "eval_wikibio_token_set_recall": 0.3247660459846288,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 110625
    },
    {
      "epoch": 21.24,
      "eval_nq_accuracy": 0.5248125,
      "eval_nq_bleu_score": 11.168789323686935,
      "eval_nq_bleu_score_sem": 0.4613882408217578,
      "eval_nq_emb_cos_sim": 0.8255674839019775,
      "eval_nq_emb_cos_sim_sem": 0.0077005044945446756,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.212205648422241,
      "eval_nq_n_ngrams_match_1": 22.85,
      "eval_nq_n_ngrams_match_2": 8.21,
      "eval_nq_n_ngrams_match_3": 3.702,
      "eval_nq_num_pred_words": 48.866,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.135844648287291,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4429615894331756,
      "eval_nq_runtime": 11.097,
      "eval_nq_samples_per_second": 45.057,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.4566688510209377,
      "eval_nq_token_set_f1_sem": 0.0049535964522480795,
      "eval_nq_token_set_precision": 0.41578913599685846,
      "eval_nq_token_set_recall": 0.51470473217856,
      "eval_nq_true_num_tokens": 64.0,
      "step": 110625
    },
    {
      "epoch": 21.24,
      "learning_rate": 0.001,
      "loss": 2.5765,
      "step": 110628
    },
    {
      "epoch": 21.24,
      "learning_rate": 0.001,
      "loss": 2.5714,
      "step": 110640
    },
    {
      "epoch": 21.25,
      "learning_rate": 0.001,
      "loss": 2.5741,
      "step": 110652
    },
    {
      "epoch": 21.25,
      "learning_rate": 0.001,
      "loss": 2.5745,
      "step": 110664
    },
    {
      "epoch": 21.25,
      "learning_rate": 0.001,
      "loss": 2.5694,
      "step": 110676
    },
    {
      "epoch": 21.25,
      "learning_rate": 0.001,
      "loss": 2.5861,
      "step": 110688
    },
    {
      "epoch": 21.26,
      "learning_rate": 0.001,
      "loss": 2.5782,
      "step": 110700
    },
    {
      "epoch": 21.26,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 110712
    },
    {
      "epoch": 21.26,
      "learning_rate": 0.001,
      "loss": 2.5797,
      "step": 110724
    },
    {
      "epoch": 21.26,
      "learning_rate": 0.001,
      "loss": 2.5811,
      "step": 110736
    },
    {
      "epoch": 21.26,
      "learning_rate": 0.001,
      "loss": 2.5876,
      "step": 110748
    },
    {
      "epoch": 21.27,
      "learning_rate": 0.001,
      "loss": 2.5897,
      "step": 110760
    },
    {
      "epoch": 21.27,
      "learning_rate": 0.001,
      "loss": 2.5867,
      "step": 110772
    },
    {
      "epoch": 21.27,
      "learning_rate": 0.001,
      "loss": 2.5766,
      "step": 110784
    },
    {
      "epoch": 21.27,
      "learning_rate": 0.001,
      "loss": 2.5843,
      "step": 110796
    },
    {
      "epoch": 21.28,
      "learning_rate": 0.001,
      "loss": 2.5802,
      "step": 110808
    },
    {
      "epoch": 21.28,
      "learning_rate": 0.001,
      "loss": 2.5878,
      "step": 110820
    },
    {
      "epoch": 21.28,
      "learning_rate": 0.001,
      "loss": 2.5864,
      "step": 110832
    },
    {
      "epoch": 21.28,
      "learning_rate": 0.001,
      "loss": 2.5873,
      "step": 110844
    },
    {
      "epoch": 21.29,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 110856
    },
    {
      "epoch": 21.29,
      "learning_rate": 0.001,
      "loss": 2.5789,
      "step": 110868
    },
    {
      "epoch": 21.29,
      "learning_rate": 0.001,
      "loss": 2.5909,
      "step": 110880
    },
    {
      "epoch": 21.29,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 110892
    },
    {
      "epoch": 21.29,
      "learning_rate": 0.001,
      "loss": 2.5847,
      "step": 110904
    },
    {
      "epoch": 21.3,
      "learning_rate": 0.001,
      "loss": 2.5885,
      "step": 110916
    },
    {
      "epoch": 21.3,
      "learning_rate": 0.001,
      "loss": 2.5822,
      "step": 110928
    },
    {
      "epoch": 21.3,
      "learning_rate": 0.001,
      "loss": 2.5815,
      "step": 110940
    },
    {
      "epoch": 21.3,
      "learning_rate": 0.001,
      "loss": 2.5913,
      "step": 110952
    },
    {
      "epoch": 21.31,
      "learning_rate": 0.001,
      "loss": 2.5797,
      "step": 110964
    },
    {
      "epoch": 21.31,
      "learning_rate": 0.001,
      "loss": 2.5866,
      "step": 110976
    },
    {
      "epoch": 21.31,
      "learning_rate": 0.001,
      "loss": 2.5775,
      "step": 110988
    },
    {
      "epoch": 21.31,
      "learning_rate": 0.001,
      "loss": 2.5872,
      "step": 111000
    },
    {
      "epoch": 21.32,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 111012
    },
    {
      "epoch": 21.32,
      "learning_rate": 0.001,
      "loss": 2.5792,
      "step": 111024
    },
    {
      "epoch": 21.32,
      "learning_rate": 0.001,
      "loss": 2.5824,
      "step": 111036
    },
    {
      "epoch": 21.32,
      "learning_rate": 0.001,
      "loss": 2.5778,
      "step": 111048
    },
    {
      "epoch": 21.32,
      "learning_rate": 0.001,
      "loss": 2.5777,
      "step": 111060
    },
    {
      "epoch": 21.33,
      "learning_rate": 0.001,
      "loss": 2.5829,
      "step": 111072
    },
    {
      "epoch": 21.33,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 111084
    },
    {
      "epoch": 21.33,
      "learning_rate": 0.001,
      "loss": 2.5757,
      "step": 111096
    },
    {
      "epoch": 21.33,
      "learning_rate": 0.001,
      "loss": 2.5702,
      "step": 111108
    },
    {
      "epoch": 21.34,
      "learning_rate": 0.001,
      "loss": 2.5759,
      "step": 111120
    },
    {
      "epoch": 21.34,
      "learning_rate": 0.001,
      "loss": 2.5758,
      "step": 111132
    },
    {
      "epoch": 21.34,
      "learning_rate": 0.001,
      "loss": 2.5878,
      "step": 111144
    },
    {
      "epoch": 21.34,
      "learning_rate": 0.001,
      "loss": 2.5754,
      "step": 111156
    },
    {
      "epoch": 21.35,
      "learning_rate": 0.001,
      "loss": 2.5835,
      "step": 111168
    },
    {
      "epoch": 21.35,
      "learning_rate": 0.001,
      "loss": 2.5759,
      "step": 111180
    },
    {
      "epoch": 21.35,
      "learning_rate": 0.001,
      "loss": 2.5918,
      "step": 111192
    },
    {
      "epoch": 21.35,
      "learning_rate": 0.001,
      "loss": 2.5711,
      "step": 111204
    },
    {
      "epoch": 21.35,
      "learning_rate": 0.001,
      "loss": 2.5851,
      "step": 111216
    },
    {
      "epoch": 21.36,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 111228
    },
    {
      "epoch": 21.36,
      "learning_rate": 0.001,
      "loss": 2.5836,
      "step": 111240
    },
    {
      "epoch": 21.36,
      "eval_ag_news_accuracy": 0.31834375,
      "eval_ag_news_bleu_score": 4.866329716281025,
      "eval_ag_news_bleu_score_sem": 0.15617882672746772,
      "eval_ag_news_emb_cos_sim": 0.802810788154602,
      "eval_ag_news_emb_cos_sim_sem": 0.00814371021677311,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.584831476211548,
      "eval_ag_news_n_ngrams_match_1": 13.862,
      "eval_ag_news_n_ngrams_match_2": 3.072,
      "eval_ag_news_n_ngrams_match_3": 0.916,
      "eval_ag_news_num_pred_words": 46.536,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.047282382363775,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3443373521212875,
      "eval_ag_news_runtime": 13.1096,
      "eval_ag_news_samples_per_second": 38.14,
      "eval_ag_news_steps_per_second": 0.076,
      "eval_ag_news_token_set_f1": 0.3475438675235002,
      "eval_ag_news_token_set_f1_sem": 0.004396316955014515,
      "eval_ag_news_token_set_precision": 0.3325826833379386,
      "eval_ag_news_token_set_recall": 0.38045849206885424,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 111250
    },
    {
      "epoch": 21.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.1129375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1110791230873875,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11863615150761044,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6728488206863403,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009503023130956877,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.27882981300354,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.078,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.894,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.688,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.54469226474684,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21070630427718445,
      "eval_anthropic_toxic_prompts_runtime": 9.8523,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.75,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3504519479027751,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006408439586069482,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.42777988175727255,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3232340079842992,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 111250
    },
    {
      "epoch": 21.36,
      "eval_arxiv_accuracy": 0.3461875,
      "eval_arxiv_bleu_score": 4.272763842617122,
      "eval_arxiv_bleu_score_sem": 0.12094276008984688,
      "eval_arxiv_emb_cos_sim": 0.7589943408966064,
      "eval_arxiv_emb_cos_sim_sem": 0.007845967756734946,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4353740215301514,
      "eval_arxiv_n_ngrams_match_1": 15.04,
      "eval_arxiv_n_ngrams_match_2": 2.916,
      "eval_arxiv_n_ngrams_match_3": 0.63,
      "eval_arxiv_num_pred_words": 40.826,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.043021152734035,
      "eval_arxiv_pred_num_tokens": 62.9921875,
      "eval_arxiv_rouge_score": 0.3568400506653353,
      "eval_arxiv_runtime": 10.5306,
      "eval_arxiv_samples_per_second": 47.481,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3511031462298726,
      "eval_arxiv_token_set_f1_sem": 0.004124125656696252,
      "eval_arxiv_token_set_precision": 0.3039353808198982,
      "eval_arxiv_token_set_recall": 0.43306408840869004,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 111250
    },
    {
      "epoch": 21.36,
      "eval_python_code_alpaca_accuracy": 0.15834375,
      "eval_python_code_alpaca_bleu_score": 4.475306735052449,
      "eval_python_code_alpaca_bleu_score_sem": 0.14627040540837266,
      "eval_python_code_alpaca_emb_cos_sim": 0.7522677183151245,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007894331265401199,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9292328357696533,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.78,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.85,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.922,
      "eval_python_code_alpaca_num_pred_words": 43.71,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.713268838016493,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3265031362519286,
      "eval_python_code_alpaca_runtime": 10.2115,
      "eval_python_code_alpaca_samples_per_second": 48.964,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.46749664919678147,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005420524718216274,
      "eval_python_code_alpaca_token_set_precision": 0.5282148022006583,
      "eval_python_code_alpaca_token_set_recall": 0.4401149781164481,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 111250
    },
    {
      "epoch": 21.36,
      "eval_wikibio_accuracy": 0.31890625,
      "eval_wikibio_bleu_score": 5.774908833709817,
      "eval_wikibio_bleu_score_sem": 0.20948880929675023,
      "eval_wikibio_emb_cos_sim": 0.7310364842414856,
      "eval_wikibio_emb_cos_sim_sem": 0.010684866861317904,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.8123817443847656,
      "eval_wikibio_n_ngrams_match_1": 10.01,
      "eval_wikibio_n_ngrams_match_2": 3.342,
      "eval_wikibio_n_ngrams_match_3": 1.172,
      "eval_wikibio_num_pred_words": 36.324,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 45.258103834879456,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3515645100503063,
      "eval_wikibio_runtime": 10.0767,
      "eval_wikibio_samples_per_second": 49.619,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3168065314978394,
      "eval_wikibio_token_set_f1_sem": 0.005617337118590732,
      "eval_wikibio_token_set_precision": 0.32493484859713717,
      "eval_wikibio_token_set_recall": 0.32456879125337457,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 111250
    },
    {
      "epoch": 21.36,
      "eval_nq_accuracy": 0.52415625,
      "eval_nq_bleu_score": 11.733117241066784,
      "eval_nq_bleu_score_sem": 0.4855792083687693,
      "eval_nq_emb_cos_sim": 0.8233238458633423,
      "eval_nq_emb_cos_sim_sem": 0.007838283859415804,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.216200113296509,
      "eval_nq_n_ngrams_match_1": 22.878,
      "eval_nq_n_ngrams_match_2": 8.394,
      "eval_nq_n_ngrams_match_3": 3.946,
      "eval_nq_num_pred_words": 49.062,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.172410440598595,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4420714674237494,
      "eval_nq_runtime": 10.3619,
      "eval_nq_samples_per_second": 48.254,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.45572913082868083,
      "eval_nq_token_set_f1_sem": 0.005075308348249063,
      "eval_nq_token_set_precision": 0.4142225872774345,
      "eval_nq_token_set_recall": 0.5157395076313773,
      "eval_nq_true_num_tokens": 64.0,
      "step": 111250
    },
    {
      "epoch": 21.36,
      "learning_rate": 0.001,
      "loss": 2.5865,
      "step": 111252
    },
    {
      "epoch": 21.36,
      "learning_rate": 0.001,
      "loss": 2.5788,
      "step": 111264
    },
    {
      "epoch": 21.37,
      "learning_rate": 0.001,
      "loss": 2.5664,
      "step": 111276
    },
    {
      "epoch": 21.37,
      "learning_rate": 0.001,
      "loss": 2.5769,
      "step": 111288
    },
    {
      "epoch": 21.37,
      "learning_rate": 0.001,
      "loss": 2.5829,
      "step": 111300
    },
    {
      "epoch": 21.37,
      "learning_rate": 0.001,
      "loss": 2.5878,
      "step": 111312
    },
    {
      "epoch": 21.38,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 111324
    },
    {
      "epoch": 21.38,
      "learning_rate": 0.001,
      "loss": 2.5806,
      "step": 111336
    },
    {
      "epoch": 21.38,
      "learning_rate": 0.001,
      "loss": 2.5756,
      "step": 111348
    },
    {
      "epoch": 21.38,
      "learning_rate": 0.001,
      "loss": 2.586,
      "step": 111360
    },
    {
      "epoch": 21.38,
      "learning_rate": 0.001,
      "loss": 2.5797,
      "step": 111372
    },
    {
      "epoch": 21.39,
      "learning_rate": 0.001,
      "loss": 2.5869,
      "step": 111384
    },
    {
      "epoch": 21.39,
      "learning_rate": 0.001,
      "loss": 2.5878,
      "step": 111396
    },
    {
      "epoch": 21.39,
      "learning_rate": 0.001,
      "loss": 2.5771,
      "step": 111408
    },
    {
      "epoch": 21.39,
      "learning_rate": 0.001,
      "loss": 2.5814,
      "step": 111420
    },
    {
      "epoch": 21.4,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 111432
    },
    {
      "epoch": 21.4,
      "learning_rate": 0.001,
      "loss": 2.5805,
      "step": 111444
    },
    {
      "epoch": 21.4,
      "learning_rate": 0.001,
      "loss": 2.5863,
      "step": 111456
    },
    {
      "epoch": 21.4,
      "learning_rate": 0.001,
      "loss": 2.5783,
      "step": 111468
    },
    {
      "epoch": 21.41,
      "learning_rate": 0.001,
      "loss": 2.5803,
      "step": 111480
    },
    {
      "epoch": 21.41,
      "learning_rate": 0.001,
      "loss": 2.5868,
      "step": 111492
    },
    {
      "epoch": 21.41,
      "learning_rate": 0.001,
      "loss": 2.5826,
      "step": 111504
    },
    {
      "epoch": 21.41,
      "learning_rate": 0.001,
      "loss": 2.5856,
      "step": 111516
    },
    {
      "epoch": 21.41,
      "learning_rate": 0.001,
      "loss": 2.5765,
      "step": 111528
    },
    {
      "epoch": 21.42,
      "learning_rate": 0.001,
      "loss": 2.5833,
      "step": 111540
    },
    {
      "epoch": 21.42,
      "learning_rate": 0.001,
      "loss": 2.5737,
      "step": 111552
    },
    {
      "epoch": 21.42,
      "learning_rate": 0.001,
      "loss": 2.5764,
      "step": 111564
    },
    {
      "epoch": 21.42,
      "learning_rate": 0.001,
      "loss": 2.5821,
      "step": 111576
    },
    {
      "epoch": 21.43,
      "learning_rate": 0.001,
      "loss": 2.5835,
      "step": 111588
    },
    {
      "epoch": 21.43,
      "learning_rate": 0.001,
      "loss": 2.5808,
      "step": 111600
    },
    {
      "epoch": 21.43,
      "learning_rate": 0.001,
      "loss": 2.5848,
      "step": 111612
    },
    {
      "epoch": 21.43,
      "learning_rate": 0.001,
      "loss": 2.5826,
      "step": 111624
    },
    {
      "epoch": 21.44,
      "learning_rate": 0.001,
      "loss": 2.5913,
      "step": 111636
    },
    {
      "epoch": 21.44,
      "learning_rate": 0.001,
      "loss": 2.5878,
      "step": 111648
    },
    {
      "epoch": 21.44,
      "learning_rate": 0.001,
      "loss": 2.5825,
      "step": 111660
    },
    {
      "epoch": 21.44,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 111672
    },
    {
      "epoch": 21.44,
      "learning_rate": 0.001,
      "loss": 2.5842,
      "step": 111684
    },
    {
      "epoch": 21.45,
      "learning_rate": 0.001,
      "loss": 2.5928,
      "step": 111696
    },
    {
      "epoch": 21.45,
      "learning_rate": 0.001,
      "loss": 2.5843,
      "step": 111708
    },
    {
      "epoch": 21.45,
      "learning_rate": 0.001,
      "loss": 2.574,
      "step": 111720
    },
    {
      "epoch": 21.45,
      "learning_rate": 0.001,
      "loss": 2.5885,
      "step": 111732
    },
    {
      "epoch": 21.46,
      "learning_rate": 0.001,
      "loss": 2.5913,
      "step": 111744
    },
    {
      "epoch": 21.46,
      "learning_rate": 0.001,
      "loss": 2.5859,
      "step": 111756
    },
    {
      "epoch": 21.46,
      "learning_rate": 0.001,
      "loss": 2.5841,
      "step": 111768
    },
    {
      "epoch": 21.46,
      "learning_rate": 0.001,
      "loss": 2.582,
      "step": 111780
    },
    {
      "epoch": 21.47,
      "learning_rate": 0.001,
      "loss": 2.5822,
      "step": 111792
    },
    {
      "epoch": 21.47,
      "learning_rate": 0.001,
      "loss": 2.5875,
      "step": 111804
    },
    {
      "epoch": 21.47,
      "learning_rate": 0.001,
      "loss": 2.5754,
      "step": 111816
    },
    {
      "epoch": 21.47,
      "learning_rate": 0.001,
      "loss": 2.5811,
      "step": 111828
    },
    {
      "epoch": 21.47,
      "learning_rate": 0.001,
      "loss": 2.5802,
      "step": 111840
    },
    {
      "epoch": 21.48,
      "learning_rate": 0.001,
      "loss": 2.5843,
      "step": 111852
    },
    {
      "epoch": 21.48,
      "learning_rate": 0.001,
      "loss": 2.5703,
      "step": 111864
    },
    {
      "epoch": 21.48,
      "eval_ag_news_accuracy": 0.317875,
      "eval_ag_news_bleu_score": 4.700494953317216,
      "eval_ag_news_bleu_score_sem": 0.15883812852175397,
      "eval_ag_news_emb_cos_sim": 0.8048298954963684,
      "eval_ag_news_emb_cos_sim_sem": 0.0070206251201368405,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.590454339981079,
      "eval_ag_news_n_ngrams_match_1": 13.818,
      "eval_ag_news_n_ngrams_match_2": 3.026,
      "eval_ag_news_n_ngrams_match_3": 0.852,
      "eval_ag_news_num_pred_words": 46.816,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.25054225622004,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34322967174665137,
      "eval_ag_news_runtime": 10.4165,
      "eval_ag_news_samples_per_second": 48.001,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3462083173163345,
      "eval_ag_news_token_set_f1_sem": 0.004433211647136026,
      "eval_ag_news_token_set_precision": 0.33192278867157937,
      "eval_ag_news_token_set_recall": 0.38017136136788854,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 111875
    },
    {
      "epoch": 21.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.11278125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1011487188661198,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1175005576017215,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6694596409797668,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009040785472447741,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.287785530090332,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.074,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.854,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.102,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.78348671053744,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20937319846717908,
      "eval_anthropic_toxic_prompts_runtime": 10.1695,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.167,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3501391823374047,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062402111439853955,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4291342180395337,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32489331562091195,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 111875
    },
    {
      "epoch": 21.48,
      "eval_arxiv_accuracy": 0.3473125,
      "eval_arxiv_bleu_score": 4.2278988098088925,
      "eval_arxiv_bleu_score_sem": 0.11929453568295216,
      "eval_arxiv_emb_cos_sim": 0.7575228214263916,
      "eval_arxiv_emb_cos_sim_sem": 0.007924757338759015,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.435016632080078,
      "eval_arxiv_n_ngrams_match_1": 14.93,
      "eval_arxiv_n_ngrams_match_2": 2.83,
      "eval_arxiv_n_ngrams_match_3": 0.634,
      "eval_arxiv_num_pred_words": 41.004,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.03192868675887,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3542914729304386,
      "eval_arxiv_runtime": 10.2452,
      "eval_arxiv_samples_per_second": 48.803,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3481463723415783,
      "eval_arxiv_token_set_f1_sem": 0.004256780699375363,
      "eval_arxiv_token_set_precision": 0.3014698302217242,
      "eval_arxiv_token_set_recall": 0.4282530571108426,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 111875
    },
    {
      "epoch": 21.48,
      "eval_python_code_alpaca_accuracy": 0.1600625,
      "eval_python_code_alpaca_bleu_score": 4.5413277231968205,
      "eval_python_code_alpaca_bleu_score_sem": 0.14724763818708514,
      "eval_python_code_alpaca_emb_cos_sim": 0.7540905475616455,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00882398274225221,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.900806188583374,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.906,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.944,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.954,
      "eval_python_code_alpaca_num_pred_words": 44.42,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.188803065592246,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32853446970023503,
      "eval_python_code_alpaca_runtime": 10.4058,
      "eval_python_code_alpaca_samples_per_second": 48.05,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.47538907576823586,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005781947445105544,
      "eval_python_code_alpaca_token_set_precision": 0.5416553829993164,
      "eval_python_code_alpaca_token_set_recall": 0.4463131670101364,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 111875
    },
    {
      "epoch": 21.48,
      "eval_wikibio_accuracy": 0.3231875,
      "eval_wikibio_bleu_score": 6.025139888412912,
      "eval_wikibio_bleu_score_sem": 0.21193780008115048,
      "eval_wikibio_emb_cos_sim": 0.7234340906143188,
      "eval_wikibio_emb_cos_sim_sem": 0.010278749707234549,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.770280361175537,
      "eval_wikibio_n_ngrams_match_1": 9.996,
      "eval_wikibio_n_ngrams_match_2": 3.374,
      "eval_wikibio_n_ngrams_match_3": 1.238,
      "eval_wikibio_num_pred_words": 35.894,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.39222862687157,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3498012550354983,
      "eval_wikibio_runtime": 10.3669,
      "eval_wikibio_samples_per_second": 48.231,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.32076553825869053,
      "eval_wikibio_token_set_f1_sem": 0.005478533508575749,
      "eval_wikibio_token_set_precision": 0.3264525553093949,
      "eval_wikibio_token_set_recall": 0.3308082398231639,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 111875
    },
    {
      "epoch": 21.48,
      "eval_nq_accuracy": 0.52659375,
      "eval_nq_bleu_score": 11.441439523851317,
      "eval_nq_bleu_score_sem": 0.4981648400823459,
      "eval_nq_emb_cos_sim": 0.8239423036575317,
      "eval_nq_emb_cos_sim_sem": 0.007759101126320471,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2113282680511475,
      "eval_nq_n_ngrams_match_1": 22.888,
      "eval_nq_n_ngrams_match_2": 8.378,
      "eval_nq_n_ngrams_match_3": 3.826,
      "eval_nq_num_pred_words": 49.072,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.127832552861125,
      "eval_nq_pred_num_tokens": 62.9921875,
      "eval_nq_rouge_score": 0.44206409795837986,
      "eval_nq_runtime": 10.6945,
      "eval_nq_samples_per_second": 46.753,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.45777626861500753,
      "eval_nq_token_set_f1_sem": 0.005109313056666424,
      "eval_nq_token_set_precision": 0.4145768546797821,
      "eval_nq_token_set_recall": 0.5208834102521768,
      "eval_nq_true_num_tokens": 64.0,
      "step": 111875
    },
    {
      "epoch": 21.48,
      "learning_rate": 0.001,
      "loss": 2.5837,
      "step": 111876
    },
    {
      "epoch": 21.48,
      "learning_rate": 0.001,
      "loss": 2.576,
      "step": 111888
    },
    {
      "epoch": 21.49,
      "learning_rate": 0.001,
      "loss": 2.5789,
      "step": 111900
    },
    {
      "epoch": 21.49,
      "learning_rate": 0.001,
      "loss": 2.5881,
      "step": 111912
    },
    {
      "epoch": 21.49,
      "learning_rate": 0.001,
      "loss": 2.5722,
      "step": 111924
    },
    {
      "epoch": 21.49,
      "learning_rate": 0.001,
      "loss": 2.5845,
      "step": 111936
    },
    {
      "epoch": 21.5,
      "learning_rate": 0.001,
      "loss": 2.571,
      "step": 111948
    },
    {
      "epoch": 21.5,
      "learning_rate": 0.001,
      "loss": 2.5748,
      "step": 111960
    },
    {
      "epoch": 21.5,
      "learning_rate": 0.001,
      "loss": 2.5785,
      "step": 111972
    },
    {
      "epoch": 21.5,
      "learning_rate": 0.001,
      "loss": 2.579,
      "step": 111984
    },
    {
      "epoch": 21.5,
      "learning_rate": 0.001,
      "loss": 2.5712,
      "step": 111996
    },
    {
      "epoch": 21.51,
      "learning_rate": 0.001,
      "loss": 2.5877,
      "step": 112008
    },
    {
      "epoch": 21.51,
      "learning_rate": 0.001,
      "loss": 2.5856,
      "step": 112020
    },
    {
      "epoch": 21.51,
      "learning_rate": 0.001,
      "loss": 2.58,
      "step": 112032
    },
    {
      "epoch": 21.51,
      "learning_rate": 0.001,
      "loss": 2.5785,
      "step": 112044
    },
    {
      "epoch": 21.52,
      "learning_rate": 0.001,
      "loss": 2.5707,
      "step": 112056
    },
    {
      "epoch": 21.52,
      "learning_rate": 0.001,
      "loss": 2.5829,
      "step": 112068
    },
    {
      "epoch": 21.52,
      "learning_rate": 0.001,
      "loss": 2.5756,
      "step": 112080
    },
    {
      "epoch": 21.52,
      "learning_rate": 0.001,
      "loss": 2.5809,
      "step": 112092
    },
    {
      "epoch": 21.53,
      "learning_rate": 0.001,
      "loss": 2.573,
      "step": 112104
    },
    {
      "epoch": 21.53,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 112116
    },
    {
      "epoch": 21.53,
      "learning_rate": 0.001,
      "loss": 2.577,
      "step": 112128
    },
    {
      "epoch": 21.53,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 112140
    },
    {
      "epoch": 21.53,
      "learning_rate": 0.001,
      "loss": 2.5806,
      "step": 112152
    },
    {
      "epoch": 21.54,
      "learning_rate": 0.001,
      "loss": 2.5782,
      "step": 112164
    },
    {
      "epoch": 21.54,
      "learning_rate": 0.001,
      "loss": 2.5751,
      "step": 112176
    },
    {
      "epoch": 21.54,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 112188
    },
    {
      "epoch": 21.54,
      "learning_rate": 0.001,
      "loss": 2.5899,
      "step": 112200
    },
    {
      "epoch": 21.55,
      "learning_rate": 0.001,
      "loss": 2.5828,
      "step": 112212
    },
    {
      "epoch": 21.55,
      "learning_rate": 0.001,
      "loss": 2.5876,
      "step": 112224
    },
    {
      "epoch": 21.55,
      "learning_rate": 0.001,
      "loss": 2.5753,
      "step": 112236
    },
    {
      "epoch": 21.55,
      "learning_rate": 0.001,
      "loss": 2.5745,
      "step": 112248
    },
    {
      "epoch": 21.56,
      "learning_rate": 0.001,
      "loss": 2.5688,
      "step": 112260
    },
    {
      "epoch": 21.56,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 112272
    },
    {
      "epoch": 21.56,
      "learning_rate": 0.001,
      "loss": 2.5815,
      "step": 112284
    },
    {
      "epoch": 21.56,
      "learning_rate": 0.001,
      "loss": 2.5783,
      "step": 112296
    },
    {
      "epoch": 21.56,
      "learning_rate": 0.001,
      "loss": 2.5795,
      "step": 112308
    },
    {
      "epoch": 21.57,
      "learning_rate": 0.001,
      "loss": 2.5839,
      "step": 112320
    },
    {
      "epoch": 21.57,
      "learning_rate": 0.001,
      "loss": 2.5815,
      "step": 112332
    },
    {
      "epoch": 21.57,
      "learning_rate": 0.001,
      "loss": 2.5808,
      "step": 112344
    },
    {
      "epoch": 21.57,
      "learning_rate": 0.001,
      "loss": 2.5842,
      "step": 112356
    },
    {
      "epoch": 21.58,
      "learning_rate": 0.001,
      "loss": 2.5867,
      "step": 112368
    },
    {
      "epoch": 21.58,
      "learning_rate": 0.001,
      "loss": 2.5754,
      "step": 112380
    },
    {
      "epoch": 21.58,
      "learning_rate": 0.001,
      "loss": 2.582,
      "step": 112392
    },
    {
      "epoch": 21.58,
      "learning_rate": 0.001,
      "loss": 2.5732,
      "step": 112404
    },
    {
      "epoch": 21.59,
      "learning_rate": 0.001,
      "loss": 2.5794,
      "step": 112416
    },
    {
      "epoch": 21.59,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 112428
    },
    {
      "epoch": 21.59,
      "learning_rate": 0.001,
      "loss": 2.5865,
      "step": 112440
    },
    {
      "epoch": 21.59,
      "learning_rate": 0.001,
      "loss": 2.5934,
      "step": 112452
    },
    {
      "epoch": 21.59,
      "learning_rate": 0.001,
      "loss": 2.5797,
      "step": 112464
    },
    {
      "epoch": 21.6,
      "learning_rate": 0.001,
      "loss": 2.5865,
      "step": 112476
    },
    {
      "epoch": 21.6,
      "learning_rate": 0.001,
      "loss": 2.5858,
      "step": 112488
    },
    {
      "epoch": 21.6,
      "learning_rate": 0.001,
      "loss": 2.5847,
      "step": 112500
    },
    {
      "epoch": 21.6,
      "eval_ag_news_accuracy": 0.31659375,
      "eval_ag_news_bleu_score": 4.542593252956022,
      "eval_ag_news_bleu_score_sem": 0.15056853216180663,
      "eval_ag_news_emb_cos_sim": 0.8039220571517944,
      "eval_ag_news_emb_cos_sim_sem": 0.007440447902520332,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5941953659057617,
      "eval_ag_news_n_ngrams_match_1": 13.644,
      "eval_ag_news_n_ngrams_match_2": 2.894,
      "eval_ag_news_n_ngrams_match_3": 0.762,
      "eval_ag_news_num_pred_words": 46.446,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.386410459360135,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34007294930622134,
      "eval_ag_news_runtime": 10.5067,
      "eval_ag_news_samples_per_second": 47.589,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.34305723136402444,
      "eval_ag_news_token_set_f1_sem": 0.004487632643678576,
      "eval_ag_news_token_set_precision": 0.32657992058290375,
      "eval_ag_news_token_set_recall": 0.37782337585284304,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 112500
    },
    {
      "epoch": 21.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.113375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.8859277667501018,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1045962308953717,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6659275889396667,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00853308519383002,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2842369079589844,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.964,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.71,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.61,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.702,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.688610676001964,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20716561635871236,
      "eval_anthropic_toxic_prompts_runtime": 13.8982,
      "eval_anthropic_toxic_prompts_samples_per_second": 35.976,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.072,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34519600134113004,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063488696199608145,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4228826040609721,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31747264310184414,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 112500
    },
    {
      "epoch": 21.6,
      "eval_arxiv_accuracy": 0.3473125,
      "eval_arxiv_bleu_score": 4.129073917253408,
      "eval_arxiv_bleu_score_sem": 0.11209592716180401,
      "eval_arxiv_emb_cos_sim": 0.7514575123786926,
      "eval_arxiv_emb_cos_sim_sem": 0.0076645861599902824,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.429063081741333,
      "eval_arxiv_n_ngrams_match_1": 14.658,
      "eval_arxiv_n_ngrams_match_2": 2.774,
      "eval_arxiv_n_ngrams_match_3": 0.6,
      "eval_arxiv_num_pred_words": 40.286,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.84772740708994,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3507662362923506,
      "eval_arxiv_runtime": 10.5315,
      "eval_arxiv_samples_per_second": 47.477,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.34238324366268114,
      "eval_arxiv_token_set_f1_sem": 0.004075691673402702,
      "eval_arxiv_token_set_precision": 0.2943535155839137,
      "eval_arxiv_token_set_recall": 0.4264493475804481,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 112500
    },
    {
      "epoch": 21.6,
      "eval_python_code_alpaca_accuracy": 0.15915625,
      "eval_python_code_alpaca_bleu_score": 4.598021978450534,
      "eval_python_code_alpaca_bleu_score_sem": 0.14576880316475482,
      "eval_python_code_alpaca_emb_cos_sim": 0.7366642951965332,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010637163872793159,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.916404962539673,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.61,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.83,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.954,
      "eval_python_code_alpaca_num_pred_words": 42.024,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.474750509842,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3280987002626782,
      "eval_python_code_alpaca_runtime": 9.9969,
      "eval_python_code_alpaca_samples_per_second": 50.016,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4624031093236953,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0060482280178062115,
      "eval_python_code_alpaca_token_set_precision": 0.5211010530049532,
      "eval_python_code_alpaca_token_set_recall": 0.43874641900649025,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 112500
    },
    {
      "epoch": 21.6,
      "eval_wikibio_accuracy": 0.3219375,
      "eval_wikibio_bleu_score": 5.9554492938360895,
      "eval_wikibio_bleu_score_sem": 0.2327490139601978,
      "eval_wikibio_emb_cos_sim": 0.7273236513137817,
      "eval_wikibio_emb_cos_sim_sem": 0.01112296952215398,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7361032962799072,
      "eval_wikibio_n_ngrams_match_1": 9.732,
      "eval_wikibio_n_ngrams_match_2": 3.274,
      "eval_wikibio_n_ngrams_match_3": 1.2,
      "eval_wikibio_num_pred_words": 35.486,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.93426596962284,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34392227813541165,
      "eval_wikibio_runtime": 10.0523,
      "eval_wikibio_samples_per_second": 49.74,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.31246454278590186,
      "eval_wikibio_token_set_f1_sem": 0.005588013104639214,
      "eval_wikibio_token_set_precision": 0.3165813151304886,
      "eval_wikibio_token_set_recall": 0.3276856118047234,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 112500
    },
    {
      "epoch": 21.6,
      "eval_nq_accuracy": 0.52353125,
      "eval_nq_bleu_score": 11.57150796177806,
      "eval_nq_bleu_score_sem": 0.4883282667698482,
      "eval_nq_emb_cos_sim": 0.8306972980499268,
      "eval_nq_emb_cos_sim_sem": 0.007129528625421943,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.215700149536133,
      "eval_nq_n_ngrams_match_1": 22.702,
      "eval_nq_n_ngrams_match_2": 8.34,
      "eval_nq_n_ngrams_match_3": 3.852,
      "eval_nq_num_pred_words": 48.866,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.167825713977082,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4385532255405237,
      "eval_nq_runtime": 11.5807,
      "eval_nq_samples_per_second": 43.175,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4535784028235237,
      "eval_nq_token_set_f1_sem": 0.005143847754891399,
      "eval_nq_token_set_precision": 0.41012864452621106,
      "eval_nq_token_set_recall": 0.5164164402228356,
      "eval_nq_true_num_tokens": 64.0,
      "step": 112500
    },
    {
      "epoch": 21.6,
      "learning_rate": 0.001,
      "loss": 2.5782,
      "step": 112512
    },
    {
      "epoch": 21.61,
      "learning_rate": 0.001,
      "loss": 2.5839,
      "step": 112524
    },
    {
      "epoch": 21.61,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 112536
    },
    {
      "epoch": 21.61,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 112548
    },
    {
      "epoch": 21.61,
      "learning_rate": 0.001,
      "loss": 2.5757,
      "step": 112560
    },
    {
      "epoch": 21.62,
      "learning_rate": 0.001,
      "loss": 2.5795,
      "step": 112572
    },
    {
      "epoch": 21.62,
      "learning_rate": 0.001,
      "loss": 2.5719,
      "step": 112584
    },
    {
      "epoch": 21.62,
      "learning_rate": 0.001,
      "loss": 2.5807,
      "step": 112596
    },
    {
      "epoch": 21.62,
      "learning_rate": 0.001,
      "loss": 2.5901,
      "step": 112608
    },
    {
      "epoch": 21.62,
      "learning_rate": 0.001,
      "loss": 2.5776,
      "step": 112620
    },
    {
      "epoch": 21.63,
      "learning_rate": 0.001,
      "loss": 2.5867,
      "step": 112632
    },
    {
      "epoch": 21.63,
      "learning_rate": 0.001,
      "loss": 2.5821,
      "step": 112644
    },
    {
      "epoch": 21.63,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 112656
    },
    {
      "epoch": 21.63,
      "learning_rate": 0.001,
      "loss": 2.5736,
      "step": 112668
    },
    {
      "epoch": 21.64,
      "learning_rate": 0.001,
      "loss": 2.58,
      "step": 112680
    },
    {
      "epoch": 21.64,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 112692
    },
    {
      "epoch": 21.64,
      "learning_rate": 0.001,
      "loss": 2.5928,
      "step": 112704
    },
    {
      "epoch": 21.64,
      "learning_rate": 0.001,
      "loss": 2.5799,
      "step": 112716
    },
    {
      "epoch": 21.65,
      "learning_rate": 0.001,
      "loss": 2.5816,
      "step": 112728
    },
    {
      "epoch": 21.65,
      "learning_rate": 0.001,
      "loss": 2.5803,
      "step": 112740
    },
    {
      "epoch": 21.65,
      "learning_rate": 0.001,
      "loss": 2.5876,
      "step": 112752
    },
    {
      "epoch": 21.65,
      "learning_rate": 0.001,
      "loss": 2.5861,
      "step": 112764
    },
    {
      "epoch": 21.65,
      "learning_rate": 0.001,
      "loss": 2.5914,
      "step": 112776
    },
    {
      "epoch": 21.66,
      "learning_rate": 0.001,
      "loss": 2.59,
      "step": 112788
    },
    {
      "epoch": 21.66,
      "learning_rate": 0.001,
      "loss": 2.5785,
      "step": 112800
    },
    {
      "epoch": 21.66,
      "learning_rate": 0.001,
      "loss": 2.5744,
      "step": 112812
    },
    {
      "epoch": 21.66,
      "learning_rate": 0.001,
      "loss": 2.5801,
      "step": 112824
    },
    {
      "epoch": 21.67,
      "learning_rate": 0.001,
      "loss": 2.5859,
      "step": 112836
    },
    {
      "epoch": 21.67,
      "learning_rate": 0.001,
      "loss": 2.5836,
      "step": 112848
    },
    {
      "epoch": 21.67,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 112860
    },
    {
      "epoch": 21.67,
      "learning_rate": 0.001,
      "loss": 2.5874,
      "step": 112872
    },
    {
      "epoch": 21.68,
      "learning_rate": 0.001,
      "loss": 2.5806,
      "step": 112884
    },
    {
      "epoch": 21.68,
      "learning_rate": 0.001,
      "loss": 2.5837,
      "step": 112896
    },
    {
      "epoch": 21.68,
      "learning_rate": 0.001,
      "loss": 2.591,
      "step": 112908
    },
    {
      "epoch": 21.68,
      "learning_rate": 0.001,
      "loss": 2.5713,
      "step": 112920
    },
    {
      "epoch": 21.68,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 112932
    },
    {
      "epoch": 21.69,
      "learning_rate": 0.001,
      "loss": 2.5774,
      "step": 112944
    },
    {
      "epoch": 21.69,
      "learning_rate": 0.001,
      "loss": 2.5837,
      "step": 112956
    },
    {
      "epoch": 21.69,
      "learning_rate": 0.001,
      "loss": 2.5795,
      "step": 112968
    },
    {
      "epoch": 21.69,
      "learning_rate": 0.001,
      "loss": 2.5656,
      "step": 112980
    },
    {
      "epoch": 21.7,
      "learning_rate": 0.001,
      "loss": 2.5881,
      "step": 112992
    },
    {
      "epoch": 21.7,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 113004
    },
    {
      "epoch": 21.7,
      "learning_rate": 0.001,
      "loss": 2.5916,
      "step": 113016
    },
    {
      "epoch": 21.7,
      "learning_rate": 0.001,
      "loss": 2.5933,
      "step": 113028
    },
    {
      "epoch": 21.71,
      "learning_rate": 0.001,
      "loss": 2.5796,
      "step": 113040
    },
    {
      "epoch": 21.71,
      "learning_rate": 0.001,
      "loss": 2.5783,
      "step": 113052
    },
    {
      "epoch": 21.71,
      "learning_rate": 0.001,
      "loss": 2.5743,
      "step": 113064
    },
    {
      "epoch": 21.71,
      "learning_rate": 0.001,
      "loss": 2.576,
      "step": 113076
    },
    {
      "epoch": 21.71,
      "learning_rate": 0.001,
      "loss": 2.5817,
      "step": 113088
    },
    {
      "epoch": 21.72,
      "learning_rate": 0.001,
      "loss": 2.588,
      "step": 113100
    },
    {
      "epoch": 21.72,
      "learning_rate": 0.001,
      "loss": 2.5824,
      "step": 113112
    },
    {
      "epoch": 21.72,
      "learning_rate": 0.001,
      "loss": 2.5866,
      "step": 113124
    },
    {
      "epoch": 21.72,
      "eval_ag_news_accuracy": 0.3189375,
      "eval_ag_news_bleu_score": 4.6172591221544845,
      "eval_ag_news_bleu_score_sem": 0.15087499742524083,
      "eval_ag_news_emb_cos_sim": 0.81067955493927,
      "eval_ag_news_emb_cos_sim_sem": 0.007655286852993058,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.589211940765381,
      "eval_ag_news_n_ngrams_match_1": 14.002,
      "eval_ag_news_n_ngrams_match_2": 2.968,
      "eval_ag_news_n_ngrams_match_3": 0.798,
      "eval_ag_news_num_pred_words": 46.7,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.2055325767371,
      "eval_ag_news_pred_num_tokens": 62.8671875,
      "eval_ag_news_rouge_score": 0.3444437879338732,
      "eval_ag_news_runtime": 10.721,
      "eval_ag_news_samples_per_second": 46.637,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.3472914339932452,
      "eval_ag_news_token_set_f1_sem": 0.004401285825883925,
      "eval_ag_news_token_set_precision": 0.3326362956793719,
      "eval_ag_news_token_set_recall": 0.3793297658040768,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 113125
    },
    {
      "epoch": 21.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.1140625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0357400856539316,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11563018913891956,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.675912618637085,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008846460841943186,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.266468048095703,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.078,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.856,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.696,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.242,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.218572877950766,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2089497499670066,
      "eval_anthropic_toxic_prompts_runtime": 9.9702,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.15,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.350794366431882,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006364557450579552,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4298825004927714,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3202640670747388,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 113125
    },
    {
      "epoch": 21.72,
      "eval_arxiv_accuracy": 0.3450625,
      "eval_arxiv_bleu_score": 4.427283609380793,
      "eval_arxiv_bleu_score_sem": 0.1201839663356153,
      "eval_arxiv_emb_cos_sim": 0.7643361687660217,
      "eval_arxiv_emb_cos_sim_sem": 0.007089860538439095,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4291634559631348,
      "eval_arxiv_n_ngrams_match_1": 15.162,
      "eval_arxiv_n_ngrams_match_2": 3.006,
      "eval_arxiv_n_ngrams_match_3": 0.678,
      "eval_arxiv_num_pred_words": 40.802,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.850823879123165,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36038550233441047,
      "eval_arxiv_runtime": 10.2204,
      "eval_arxiv_samples_per_second": 48.922,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3540935244536806,
      "eval_arxiv_token_set_f1_sem": 0.004124901185254456,
      "eval_arxiv_token_set_precision": 0.30606504947354496,
      "eval_arxiv_token_set_recall": 0.43679745600451936,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 113125
    },
    {
      "epoch": 21.72,
      "eval_python_code_alpaca_accuracy": 0.15503125,
      "eval_python_code_alpaca_bleu_score": 4.456264498652783,
      "eval_python_code_alpaca_bleu_score_sem": 0.14173129545961294,
      "eval_python_code_alpaca_emb_cos_sim": 0.7401628494262695,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00912275121463381,
      "eval_python_code_alpaca_emb_top1_equal": 0.1875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9323348999023438,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.608,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.708,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.868,
      "eval_python_code_alpaca_num_pred_words": 42.276,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.771408728296073,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33434563887115953,
      "eval_python_code_alpaca_runtime": 9.9309,
      "eval_python_code_alpaca_samples_per_second": 50.348,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.46630211155272705,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005577378640235889,
      "eval_python_code_alpaca_token_set_precision": 0.524713249797721,
      "eval_python_code_alpaca_token_set_recall": 0.4413909157763783,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 113125
    },
    {
      "epoch": 21.72,
      "eval_wikibio_accuracy": 0.32284375,
      "eval_wikibio_bleu_score": 6.075281154920749,
      "eval_wikibio_bleu_score_sem": 0.20602332021792208,
      "eval_wikibio_emb_cos_sim": 0.7318436503410339,
      "eval_wikibio_emb_cos_sim_sem": 0.009524317167622497,
      "eval_wikibio_emb_top1_equal": 0.1328125,
      "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7428438663482666,
      "eval_wikibio_n_ngrams_match_1": 10.158,
      "eval_wikibio_n_ngrams_match_2": 3.434,
      "eval_wikibio_n_ngrams_match_3": 1.204,
      "eval_wikibio_num_pred_words": 35.708,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.217881619390496,
      "eval_wikibio_pred_num_tokens": 62.9609375,
      "eval_wikibio_rouge_score": 0.3573053501457158,
      "eval_wikibio_runtime": 10.1174,
      "eval_wikibio_samples_per_second": 49.42,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3272909505723658,
      "eval_wikibio_token_set_f1_sem": 0.0051825386297305685,
      "eval_wikibio_token_set_precision": 0.3313673683276995,
      "eval_wikibio_token_set_recall": 0.3390362766352453,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 113125
    },
    {
      "epoch": 21.72,
      "eval_nq_accuracy": 0.5245625,
      "eval_nq_bleu_score": 11.483588269998057,
      "eval_nq_bleu_score_sem": 0.4788229567031315,
      "eval_nq_emb_cos_sim": 0.828566312789917,
      "eval_nq_emb_cos_sim_sem": 0.007296827247863397,
      "eval_nq_emb_top1_equal": 0.3671875,
      "eval_nq_emb_top1_equal_sem": 0.04277397517748991,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2093870639801025,
      "eval_nq_n_ngrams_match_1": 22.964,
      "eval_nq_n_ngrams_match_2": 8.378,
      "eval_nq_n_ngrams_match_3": 3.866,
      "eval_nq_num_pred_words": 49.022,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.110130754110362,
      "eval_nq_pred_num_tokens": 62.9609375,
      "eval_nq_rouge_score": 0.4429517913784128,
      "eval_nq_runtime": 10.5228,
      "eval_nq_samples_per_second": 47.516,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4580778356921578,
      "eval_nq_token_set_f1_sem": 0.005034516547220299,
      "eval_nq_token_set_precision": 0.4159114277974363,
      "eval_nq_token_set_recall": 0.5185378573581648,
      "eval_nq_true_num_tokens": 64.0,
      "step": 113125
    },
    {
      "epoch": 21.72,
      "learning_rate": 0.001,
      "loss": 2.5793,
      "step": 113136
    },
    {
      "epoch": 21.73,
      "learning_rate": 0.001,
      "loss": 2.5851,
      "step": 113148
    },
    {
      "epoch": 21.73,
      "learning_rate": 0.001,
      "loss": 2.583,
      "step": 113160
    },
    {
      "epoch": 21.73,
      "learning_rate": 0.001,
      "loss": 2.5801,
      "step": 113172
    },
    {
      "epoch": 21.73,
      "learning_rate": 0.001,
      "loss": 2.5904,
      "step": 113184
    },
    {
      "epoch": 21.74,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 113196
    },
    {
      "epoch": 21.74,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 113208
    },
    {
      "epoch": 21.74,
      "learning_rate": 0.001,
      "loss": 2.5799,
      "step": 113220
    },
    {
      "epoch": 21.74,
      "learning_rate": 0.001,
      "loss": 2.5871,
      "step": 113232
    },
    {
      "epoch": 21.74,
      "learning_rate": 0.001,
      "loss": 2.5889,
      "step": 113244
    },
    {
      "epoch": 21.75,
      "learning_rate": 0.001,
      "loss": 2.578,
      "step": 113256
    },
    {
      "epoch": 21.75,
      "learning_rate": 0.001,
      "loss": 2.5901,
      "step": 113268
    },
    {
      "epoch": 21.75,
      "learning_rate": 0.001,
      "loss": 2.5802,
      "step": 113280
    },
    {
      "epoch": 21.75,
      "learning_rate": 0.001,
      "loss": 2.5794,
      "step": 113292
    },
    {
      "epoch": 21.76,
      "learning_rate": 0.001,
      "loss": 2.5797,
      "step": 113304
    },
    {
      "epoch": 21.76,
      "learning_rate": 0.001,
      "loss": 2.5762,
      "step": 113316
    },
    {
      "epoch": 21.76,
      "learning_rate": 0.001,
      "loss": 2.5802,
      "step": 113328
    },
    {
      "epoch": 21.76,
      "learning_rate": 0.001,
      "loss": 2.5804,
      "step": 113340
    },
    {
      "epoch": 21.76,
      "learning_rate": 0.001,
      "loss": 2.5808,
      "step": 113352
    },
    {
      "epoch": 21.77,
      "learning_rate": 0.001,
      "loss": 2.5651,
      "step": 113364
    },
    {
      "epoch": 21.77,
      "learning_rate": 0.001,
      "loss": 2.5766,
      "step": 113376
    },
    {
      "epoch": 21.77,
      "learning_rate": 0.001,
      "loss": 2.5875,
      "step": 113388
    },
    {
      "epoch": 21.77,
      "learning_rate": 0.001,
      "loss": 2.5784,
      "step": 113400
    },
    {
      "epoch": 21.78,
      "learning_rate": 0.001,
      "loss": 2.5804,
      "step": 113412
    },
    {
      "epoch": 21.78,
      "learning_rate": 0.001,
      "loss": 2.5923,
      "step": 113424
    },
    {
      "epoch": 21.78,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 113436
    },
    {
      "epoch": 21.78,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 113448
    },
    {
      "epoch": 21.79,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 113460
    },
    {
      "epoch": 21.79,
      "learning_rate": 0.001,
      "loss": 2.5757,
      "step": 113472
    },
    {
      "epoch": 21.79,
      "learning_rate": 0.001,
      "loss": 2.5769,
      "step": 113484
    },
    {
      "epoch": 21.79,
      "learning_rate": 0.001,
      "loss": 2.5841,
      "step": 113496
    },
    {
      "epoch": 21.79,
      "learning_rate": 0.001,
      "loss": 2.5903,
      "step": 113508
    },
    {
      "epoch": 21.8,
      "learning_rate": 0.001,
      "loss": 2.5828,
      "step": 113520
    },
    {
      "epoch": 21.8,
      "learning_rate": 0.001,
      "loss": 2.5777,
      "step": 113532
    },
    {
      "epoch": 21.8,
      "learning_rate": 0.001,
      "loss": 2.5859,
      "step": 113544
    },
    {
      "epoch": 21.8,
      "learning_rate": 0.001,
      "loss": 2.5857,
      "step": 113556
    },
    {
      "epoch": 21.81,
      "learning_rate": 0.001,
      "loss": 2.579,
      "step": 113568
    },
    {
      "epoch": 21.81,
      "learning_rate": 0.001,
      "loss": 2.5841,
      "step": 113580
    },
    {
      "epoch": 21.81,
      "learning_rate": 0.001,
      "loss": 2.5785,
      "step": 113592
    },
    {
      "epoch": 21.81,
      "learning_rate": 0.001,
      "loss": 2.5804,
      "step": 113604
    },
    {
      "epoch": 21.82,
      "learning_rate": 0.001,
      "loss": 2.5844,
      "step": 113616
    },
    {
      "epoch": 21.82,
      "learning_rate": 0.001,
      "loss": 2.5825,
      "step": 113628
    },
    {
      "epoch": 21.82,
      "learning_rate": 0.001,
      "loss": 2.5868,
      "step": 113640
    },
    {
      "epoch": 21.82,
      "learning_rate": 0.001,
      "loss": 2.5841,
      "step": 113652
    },
    {
      "epoch": 21.82,
      "learning_rate": 0.001,
      "loss": 2.5928,
      "step": 113664
    },
    {
      "epoch": 21.83,
      "learning_rate": 0.001,
      "loss": 2.5829,
      "step": 113676
    },
    {
      "epoch": 21.83,
      "learning_rate": 0.001,
      "loss": 2.5827,
      "step": 113688
    },
    {
      "epoch": 21.83,
      "learning_rate": 0.001,
      "loss": 2.5828,
      "step": 113700
    },
    {
      "epoch": 21.83,
      "learning_rate": 0.001,
      "loss": 2.5889,
      "step": 113712
    },
    {
      "epoch": 21.84,
      "learning_rate": 0.001,
      "loss": 2.5815,
      "step": 113724
    },
    {
      "epoch": 21.84,
      "learning_rate": 0.001,
      "loss": 2.5881,
      "step": 113736
    },
    {
      "epoch": 21.84,
      "learning_rate": 0.001,
      "loss": 2.5848,
      "step": 113748
    },
    {
      "epoch": 21.84,
      "eval_ag_news_accuracy": 0.31846875,
      "eval_ag_news_bleu_score": 4.679012024183408,
      "eval_ag_news_bleu_score_sem": 0.15355513120994704,
      "eval_ag_news_emb_cos_sim": 0.8066794276237488,
      "eval_ag_news_emb_cos_sim_sem": 0.00664133114240813,
      "eval_ag_news_emb_top1_equal": 0.3046875,
      "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5819995403289795,
      "eval_ag_news_n_ngrams_match_1": 13.88,
      "eval_ag_news_n_ngrams_match_2": 3.024,
      "eval_ag_news_n_ngrams_match_3": 0.83,
      "eval_ag_news_num_pred_words": 46.786,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.945343200657504,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3421566200228286,
      "eval_ag_news_runtime": 10.5682,
      "eval_ag_news_samples_per_second": 47.312,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.34573068272148605,
      "eval_ag_news_token_set_f1_sem": 0.004417048525383739,
      "eval_ag_news_token_set_precision": 0.33024695872557813,
      "eval_ag_news_token_set_recall": 0.37763815965764136,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 113750
    },
    {
      "epoch": 21.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.11309375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9948367131837115,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1284381535585758,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6698676943778992,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00935611897338314,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.3021464347839355,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.106,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.848,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.15,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 27.170896933066505,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2081280578046948,
      "eval_anthropic_toxic_prompts_runtime": 10.0333,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.834,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3511642363875221,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006447759182461809,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4289451355331152,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32736353587670647,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 113750
    },
    {
      "epoch": 21.84,
      "eval_arxiv_accuracy": 0.3448125,
      "eval_arxiv_bleu_score": 4.083958030466865,
      "eval_arxiv_bleu_score_sem": 0.11825863123114955,
      "eval_arxiv_emb_cos_sim": 0.7606527209281921,
      "eval_arxiv_emb_cos_sim_sem": 0.006978115413602048,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4391751289367676,
      "eval_arxiv_n_ngrams_match_1": 14.782,
      "eval_arxiv_n_ngrams_match_2": 2.802,
      "eval_arxiv_n_ngrams_match_3": 0.562,
      "eval_arxiv_num_pred_words": 40.082,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.16124355604401,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35442063763571935,
      "eval_arxiv_runtime": 10.36,
      "eval_arxiv_samples_per_second": 48.262,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.34785914848926547,
      "eval_arxiv_token_set_f1_sem": 0.004248403809319432,
      "eval_arxiv_token_set_precision": 0.29804952986992483,
      "eval_arxiv_token_set_recall": 0.43695701385419244,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 113750
    },
    {
      "epoch": 21.84,
      "eval_python_code_alpaca_accuracy": 0.1583125,
      "eval_python_code_alpaca_bleu_score": 4.42816420078413,
      "eval_python_code_alpaca_bleu_score_sem": 0.14064540216470575,
      "eval_python_code_alpaca_emb_cos_sim": 0.7494914531707764,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008940605706259886,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9011332988739014,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.548,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.82,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.916,
      "eval_python_code_alpaca_num_pred_words": 43.736,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.194753783464844,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.324992975679671,
      "eval_python_code_alpaca_runtime": 10.4937,
      "eval_python_code_alpaca_samples_per_second": 47.648,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.46851271096077945,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005361809575827445,
      "eval_python_code_alpaca_token_set_precision": 0.5237733636941833,
      "eval_python_code_alpaca_token_set_recall": 0.4470571152903056,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 113750
    },
    {
      "epoch": 21.84,
      "eval_wikibio_accuracy": 0.3195625,
      "eval_wikibio_bleu_score": 5.633856736686618,
      "eval_wikibio_bleu_score_sem": 0.1939018278312027,
      "eval_wikibio_emb_cos_sim": 0.7343205809593201,
      "eval_wikibio_emb_cos_sim_sem": 0.009796639809652527,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.778465509414673,
      "eval_wikibio_n_ngrams_match_1": 10.0,
      "eval_wikibio_n_ngrams_match_2": 3.276,
      "eval_wikibio_n_ngrams_match_3": 1.128,
      "eval_wikibio_num_pred_words": 36.438,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.74885799163862,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34729071016340013,
      "eval_wikibio_runtime": 10.2246,
      "eval_wikibio_samples_per_second": 48.902,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.31660626453372026,
      "eval_wikibio_token_set_f1_sem": 0.005326242510062092,
      "eval_wikibio_token_set_precision": 0.3257330726863965,
      "eval_wikibio_token_set_recall": 0.3250680312480864,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 113750
    },
    {
      "epoch": 21.84,
      "eval_nq_accuracy": 0.5249375,
      "eval_nq_bleu_score": 11.618509505421393,
      "eval_nq_bleu_score_sem": 0.47727477421093933,
      "eval_nq_emb_cos_sim": 0.8236499428749084,
      "eval_nq_emb_cos_sim_sem": 0.008470734858358725,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.208742141723633,
      "eval_nq_n_ngrams_match_1": 22.912,
      "eval_nq_n_ngrams_match_2": 8.388,
      "eval_nq_n_ngrams_match_3": 3.882,
      "eval_nq_num_pred_words": 49.054,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.104257322184747,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4416679699544236,
      "eval_nq_runtime": 10.8646,
      "eval_nq_samples_per_second": 46.021,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.4579354744572694,
      "eval_nq_token_set_f1_sem": 0.005138784443154923,
      "eval_nq_token_set_precision": 0.41480579463748113,
      "eval_nq_token_set_recall": 0.5218710305350167,
      "eval_nq_true_num_tokens": 64.0,
      "step": 113750
    },
    {
      "epoch": 21.84,
      "learning_rate": 0.001,
      "loss": 2.5823,
      "step": 113760
    },
    {
      "epoch": 21.85,
      "learning_rate": 0.001,
      "loss": 2.5877,
      "step": 113772
    },
    {
      "epoch": 21.85,
      "learning_rate": 0.001,
      "loss": 2.5873,
      "step": 113784
    },
    {
      "epoch": 21.85,
      "learning_rate": 0.001,
      "loss": 2.5923,
      "step": 113796
    },
    {
      "epoch": 21.85,
      "learning_rate": 0.001,
      "loss": 2.5882,
      "step": 113808
    },
    {
      "epoch": 21.85,
      "learning_rate": 0.001,
      "loss": 2.5896,
      "step": 113820
    },
    {
      "epoch": 21.86,
      "learning_rate": 0.001,
      "loss": 2.5859,
      "step": 113832
    },
    {
      "epoch": 21.86,
      "learning_rate": 0.001,
      "loss": 2.5798,
      "step": 113844
    },
    {
      "epoch": 21.86,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 113856
    },
    {
      "epoch": 21.86,
      "learning_rate": 0.001,
      "loss": 2.5871,
      "step": 113868
    },
    {
      "epoch": 21.87,
      "learning_rate": 0.001,
      "loss": 2.5927,
      "step": 113880
    },
    {
      "epoch": 21.87,
      "learning_rate": 0.001,
      "loss": 2.58,
      "step": 113892
    },
    {
      "epoch": 21.87,
      "learning_rate": 0.001,
      "loss": 2.5899,
      "step": 113904
    },
    {
      "epoch": 21.87,
      "learning_rate": 0.001,
      "loss": 2.5821,
      "step": 113916
    },
    {
      "epoch": 21.88,
      "learning_rate": 0.001,
      "loss": 2.587,
      "step": 113928
    },
    {
      "epoch": 21.88,
      "learning_rate": 0.001,
      "loss": 2.5925,
      "step": 113940
    },
    {
      "epoch": 21.88,
      "learning_rate": 0.001,
      "loss": 2.5779,
      "step": 113952
    },
    {
      "epoch": 21.88,
      "learning_rate": 0.001,
      "loss": 2.5882,
      "step": 113964
    },
    {
      "epoch": 21.88,
      "learning_rate": 0.001,
      "loss": 2.5791,
      "step": 113976
    },
    {
      "epoch": 21.89,
      "learning_rate": 0.001,
      "loss": 2.5859,
      "step": 113988
    },
    {
      "epoch": 21.89,
      "learning_rate": 0.001,
      "loss": 2.5756,
      "step": 114000
    },
    {
      "epoch": 21.89,
      "learning_rate": 0.001,
      "loss": 2.5958,
      "step": 114012
    },
    {
      "epoch": 21.89,
      "learning_rate": 0.001,
      "loss": 2.5881,
      "step": 114024
    },
    {
      "epoch": 21.9,
      "learning_rate": 0.001,
      "loss": 2.5822,
      "step": 114036
    },
    {
      "epoch": 21.9,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 114048
    },
    {
      "epoch": 21.9,
      "learning_rate": 0.001,
      "loss": 2.5839,
      "step": 114060
    },
    {
      "epoch": 21.9,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 114072
    },
    {
      "epoch": 21.91,
      "learning_rate": 0.001,
      "loss": 2.5752,
      "step": 114084
    },
    {
      "epoch": 21.91,
      "learning_rate": 0.001,
      "loss": 2.584,
      "step": 114096
    },
    {
      "epoch": 21.91,
      "learning_rate": 0.001,
      "loss": 2.5825,
      "step": 114108
    },
    {
      "epoch": 21.91,
      "learning_rate": 0.001,
      "loss": 2.5895,
      "step": 114120
    },
    {
      "epoch": 21.91,
      "learning_rate": 0.001,
      "loss": 2.5892,
      "step": 114132
    },
    {
      "epoch": 21.92,
      "learning_rate": 0.001,
      "loss": 2.5817,
      "step": 114144
    },
    {
      "epoch": 21.92,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 114156
    },
    {
      "epoch": 21.92,
      "learning_rate": 0.001,
      "loss": 2.5812,
      "step": 114168
    },
    {
      "epoch": 21.92,
      "learning_rate": 0.001,
      "loss": 2.5858,
      "step": 114180
    },
    {
      "epoch": 21.93,
      "learning_rate": 0.001,
      "loss": 2.5889,
      "step": 114192
    },
    {
      "epoch": 21.93,
      "learning_rate": 0.001,
      "loss": 2.5748,
      "step": 114204
    },
    {
      "epoch": 21.93,
      "learning_rate": 0.001,
      "loss": 2.5693,
      "step": 114216
    },
    {
      "epoch": 21.93,
      "learning_rate": 0.001,
      "loss": 2.577,
      "step": 114228
    },
    {
      "epoch": 21.94,
      "learning_rate": 0.001,
      "loss": 2.5798,
      "step": 114240
    },
    {
      "epoch": 21.94,
      "learning_rate": 0.001,
      "loss": 2.5747,
      "step": 114252
    },
    {
      "epoch": 21.94,
      "learning_rate": 0.001,
      "loss": 2.5838,
      "step": 114264
    },
    {
      "epoch": 21.94,
      "learning_rate": 0.001,
      "loss": 2.5734,
      "step": 114276
    },
    {
      "epoch": 21.94,
      "learning_rate": 0.001,
      "loss": 2.5847,
      "step": 114288
    },
    {
      "epoch": 21.95,
      "learning_rate": 0.001,
      "loss": 2.5845,
      "step": 114300
    },
    {
      "epoch": 21.95,
      "learning_rate": 0.001,
      "loss": 2.5797,
      "step": 114312
    },
    {
      "epoch": 21.95,
      "learning_rate": 0.001,
      "loss": 2.5792,
      "step": 114324
    },
    {
      "epoch": 21.95,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 114336
    },
    {
      "epoch": 21.96,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 114348
    },
    {
      "epoch": 21.96,
      "learning_rate": 0.001,
      "loss": 2.5846,
      "step": 114360
    },
    {
      "epoch": 21.96,
      "learning_rate": 0.001,
      "loss": 2.5857,
      "step": 114372
    },
    {
      "epoch": 21.96,
      "eval_ag_news_accuracy": 0.32103125,
      "eval_ag_news_bleu_score": 4.784761900490624,
      "eval_ag_news_bleu_score_sem": 0.15790721125078844,
      "eval_ag_news_emb_cos_sim": 0.8021398782730103,
      "eval_ag_news_emb_cos_sim_sem": 0.007815886760699312,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5692100524902344,
      "eval_ag_news_n_ngrams_match_1": 13.88,
      "eval_ag_news_n_ngrams_match_2": 3.058,
      "eval_ag_news_n_ngrams_match_3": 0.904,
      "eval_ag_news_num_pred_words": 46.168,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.4885479858758,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34493416482914646,
      "eval_ag_news_runtime": 10.9115,
      "eval_ag_news_samples_per_second": 45.823,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.34895214995523965,
      "eval_ag_news_token_set_f1_sem": 0.004623837165741866,
      "eval_ag_news_token_set_precision": 0.3312751277647174,
      "eval_ag_news_token_set_recall": 0.38771738803002936,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 114375
    },
    {
      "epoch": 21.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.113,
      "eval_anthropic_toxic_prompts_bleu_score": 2.906965262187682,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1102732317782224,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.673775851726532,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008469590966548358,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.29018497467041,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.158,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.806,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.622,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.648,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.84782936500722,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21085188577868202,
      "eval_anthropic_toxic_prompts_runtime": 9.8936,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.538,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.34963139807541843,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006226400424160567,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43294736046057675,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31569602391112955,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 114375
    },
    {
      "epoch": 21.96,
      "eval_arxiv_accuracy": 0.344625,
      "eval_arxiv_bleu_score": 4.2328438586134,
      "eval_arxiv_bleu_score_sem": 0.11858662820281565,
      "eval_arxiv_emb_cos_sim": 0.7667374014854431,
      "eval_arxiv_emb_cos_sim_sem": 0.007132719932107627,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.42853045463562,
      "eval_arxiv_n_ngrams_match_1": 14.93,
      "eval_arxiv_n_ngrams_match_2": 2.928,
      "eval_arxiv_n_ngrams_match_3": 0.616,
      "eval_arxiv_num_pred_words": 40.052,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.83130144616762,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35780127197904593,
      "eval_arxiv_runtime": 11.2411,
      "eval_arxiv_samples_per_second": 44.48,
      "eval_arxiv_steps_per_second": 0.089,
      "eval_arxiv_token_set_f1": 0.3496929306406429,
      "eval_arxiv_token_set_f1_sem": 0.004551344344202184,
      "eval_arxiv_token_set_precision": 0.30148368624472965,
      "eval_arxiv_token_set_recall": 0.43757430553172916,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 114375
    },
    {
      "epoch": 21.96,
      "eval_python_code_alpaca_accuracy": 0.15815625,
      "eval_python_code_alpaca_bleu_score": 4.63821649394017,
      "eval_python_code_alpaca_bleu_score_sem": 0.1505187540499886,
      "eval_python_code_alpaca_emb_cos_sim": 0.7470142245292664,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010516779961100394,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.92989182472229,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.732,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.828,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.922,
      "eval_python_code_alpaca_num_pred_words": 42.168,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.72560473961348,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3322823791242405,
      "eval_python_code_alpaca_runtime": 9.9436,
      "eval_python_code_alpaca_samples_per_second": 50.284,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4716686909205951,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005666082812173575,
      "eval_python_code_alpaca_token_set_precision": 0.5285477863338287,
      "eval_python_code_alpaca_token_set_recall": 0.4490487275601845,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 114375
    },
    {
      "epoch": 21.96,
      "eval_wikibio_accuracy": 0.32621875,
      "eval_wikibio_bleu_score": 5.937238016981768,
      "eval_wikibio_bleu_score_sem": 0.2151262452784868,
      "eval_wikibio_emb_cos_sim": 0.7314822673797607,
      "eval_wikibio_emb_cos_sim_sem": 0.01095206866125386,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.725485324859619,
      "eval_wikibio_n_ngrams_match_1": 10.116,
      "eval_wikibio_n_ngrams_match_2": 3.346,
      "eval_wikibio_n_ngrams_match_3": 1.168,
      "eval_wikibio_num_pred_words": 36.184,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.491364649902714,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3566688101601526,
      "eval_wikibio_runtime": 10.5295,
      "eval_wikibio_samples_per_second": 47.485,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.31979976001231264,
      "eval_wikibio_token_set_f1_sem": 0.005290779582982193,
      "eval_wikibio_token_set_precision": 0.3291476561237887,
      "eval_wikibio_token_set_recall": 0.32477579009700175,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 114375
    },
    {
      "epoch": 21.96,
      "eval_nq_accuracy": 0.52428125,
      "eval_nq_bleu_score": 11.417613425987117,
      "eval_nq_bleu_score_sem": 0.4654381066058178,
      "eval_nq_emb_cos_sim": 0.8305596709251404,
      "eval_nq_emb_cos_sim_sem": 0.007424187780073731,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.205254316329956,
      "eval_nq_n_ngrams_match_1": 22.996,
      "eval_nq_n_ngrams_match_2": 8.338,
      "eval_nq_n_ngrams_match_3": 3.748,
      "eval_nq_num_pred_words": 48.95,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.072558574289197,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.444935522803793,
      "eval_nq_runtime": 10.5871,
      "eval_nq_samples_per_second": 47.227,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.4591798232474928,
      "eval_nq_token_set_f1_sem": 0.004980554517841983,
      "eval_nq_token_set_precision": 0.4171823812040491,
      "eval_nq_token_set_recall": 0.5191321275823937,
      "eval_nq_true_num_tokens": 64.0,
      "step": 114375
    },
    {
      "epoch": 21.96,
      "learning_rate": 0.001,
      "loss": 2.577,
      "step": 114384
    },
    {
      "epoch": 21.97,
      "learning_rate": 0.001,
      "loss": 2.5815,
      "step": 114396
    },
    {
      "epoch": 21.97,
      "learning_rate": 0.001,
      "loss": 2.5853,
      "step": 114408
    },
    {
      "epoch": 21.97,
      "learning_rate": 0.001,
      "loss": 2.5877,
      "step": 114420
    },
    {
      "epoch": 21.97,
      "learning_rate": 0.001,
      "loss": 2.5869,
      "step": 114432
    },
    {
      "epoch": 21.97,
      "learning_rate": 0.001,
      "loss": 2.5762,
      "step": 114444
    },
    {
      "epoch": 21.98,
      "learning_rate": 0.001,
      "loss": 2.5729,
      "step": 114456
    },
    {
      "epoch": 21.98,
      "learning_rate": 0.001,
      "loss": 2.5837,
      "step": 114468
    },
    {
      "epoch": 21.98,
      "learning_rate": 0.001,
      "loss": 2.5772,
      "step": 114480
    },
    {
      "epoch": 21.98,
      "learning_rate": 0.001,
      "loss": 2.5802,
      "step": 114492
    },
    {
      "epoch": 21.99,
      "learning_rate": 0.001,
      "loss": 2.5837,
      "step": 114504
    },
    {
      "epoch": 21.99,
      "learning_rate": 0.001,
      "loss": 2.5747,
      "step": 114516
    },
    {
      "epoch": 21.99,
      "learning_rate": 0.001,
      "loss": 2.5857,
      "step": 114528
    },
    {
      "epoch": 21.99,
      "learning_rate": 0.001,
      "loss": 2.5865,
      "step": 114540
    },
    {
      "epoch": 22.0,
      "learning_rate": 0.001,
      "loss": 2.5844,
      "step": 114552
    },
    {
      "epoch": 22.0,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 114564
    },
    {
      "epoch": 22.0,
      "learning_rate": 0.001,
      "loss": 2.5845,
      "step": 114576
    },
    {
      "epoch": 22.0,
      "learning_rate": 0.001,
      "loss": 2.5774,
      "step": 114588
    },
    {
      "epoch": 22.0,
      "learning_rate": 0.001,
      "loss": 2.5665,
      "step": 114600
    },
    {
      "epoch": 22.01,
      "learning_rate": 0.001,
      "loss": 2.5669,
      "step": 114612
    },
    {
      "epoch": 22.01,
      "learning_rate": 0.001,
      "loss": 2.5675,
      "step": 114624
    },
    {
      "epoch": 22.01,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 114636
    },
    {
      "epoch": 22.01,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 114648
    },
    {
      "epoch": 22.02,
      "learning_rate": 0.001,
      "loss": 2.5549,
      "step": 114660
    },
    {
      "epoch": 22.02,
      "learning_rate": 0.001,
      "loss": 2.5688,
      "step": 114672
    },
    {
      "epoch": 22.02,
      "learning_rate": 0.001,
      "loss": 2.5604,
      "step": 114684
    },
    {
      "epoch": 22.02,
      "learning_rate": 0.001,
      "loss": 2.5664,
      "step": 114696
    },
    {
      "epoch": 22.03,
      "learning_rate": 0.001,
      "loss": 2.5629,
      "step": 114708
    },
    {
      "epoch": 22.03,
      "learning_rate": 0.001,
      "loss": 2.5669,
      "step": 114720
    },
    {
      "epoch": 22.03,
      "learning_rate": 0.001,
      "loss": 2.567,
      "step": 114732
    },
    {
      "epoch": 22.03,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 114744
    },
    {
      "epoch": 22.03,
      "learning_rate": 0.001,
      "loss": 2.5689,
      "step": 114756
    },
    {
      "epoch": 22.04,
      "learning_rate": 0.001,
      "loss": 2.5636,
      "step": 114768
    },
    {
      "epoch": 22.04,
      "learning_rate": 0.001,
      "loss": 2.566,
      "step": 114780
    },
    {
      "epoch": 22.04,
      "learning_rate": 0.001,
      "loss": 2.5557,
      "step": 114792
    },
    {
      "epoch": 22.04,
      "learning_rate": 0.001,
      "loss": 2.5716,
      "step": 114804
    },
    {
      "epoch": 22.05,
      "learning_rate": 0.001,
      "loss": 2.5717,
      "step": 114816
    },
    {
      "epoch": 22.05,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 114828
    },
    {
      "epoch": 22.05,
      "learning_rate": 0.001,
      "loss": 2.5569,
      "step": 114840
    },
    {
      "epoch": 22.05,
      "learning_rate": 0.001,
      "loss": 2.5656,
      "step": 114852
    },
    {
      "epoch": 22.06,
      "learning_rate": 0.001,
      "loss": 2.5566,
      "step": 114864
    },
    {
      "epoch": 22.06,
      "learning_rate": 0.001,
      "loss": 2.5607,
      "step": 114876
    },
    {
      "epoch": 22.06,
      "learning_rate": 0.001,
      "loss": 2.5646,
      "step": 114888
    },
    {
      "epoch": 22.06,
      "learning_rate": 0.001,
      "loss": 2.573,
      "step": 114900
    },
    {
      "epoch": 22.06,
      "learning_rate": 0.001,
      "loss": 2.5651,
      "step": 114912
    },
    {
      "epoch": 22.07,
      "learning_rate": 0.001,
      "loss": 2.5649,
      "step": 114924
    },
    {
      "epoch": 22.07,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 114936
    },
    {
      "epoch": 22.07,
      "learning_rate": 0.001,
      "loss": 2.564,
      "step": 114948
    },
    {
      "epoch": 22.07,
      "learning_rate": 0.001,
      "loss": 2.5804,
      "step": 114960
    },
    {
      "epoch": 22.08,
      "learning_rate": 0.001,
      "loss": 2.5794,
      "step": 114972
    },
    {
      "epoch": 22.08,
      "learning_rate": 0.001,
      "loss": 2.5696,
      "step": 114984
    },
    {
      "epoch": 22.08,
      "learning_rate": 0.001,
      "loss": 2.5688,
      "step": 114996
    },
    {
      "epoch": 22.08,
      "eval_ag_news_accuracy": 0.32159375,
      "eval_ag_news_bleu_score": 4.7238797361331475,
      "eval_ag_news_bleu_score_sem": 0.15886998226372415,
      "eval_ag_news_emb_cos_sim": 0.8122814893722534,
      "eval_ag_news_emb_cos_sim_sem": 0.006870719316695718,
      "eval_ag_news_emb_top1_equal": 0.2890625,
      "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.582092761993408,
      "eval_ag_news_n_ngrams_match_1": 13.96,
      "eval_ag_news_n_ngrams_match_2": 3.05,
      "eval_ag_news_n_ngrams_match_3": 0.864,
      "eval_ag_news_num_pred_words": 46.922,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.94869424157151,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.344847234060312,
      "eval_ag_news_runtime": 10.3756,
      "eval_ag_news_samples_per_second": 48.19,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.34868912365276444,
      "eval_ag_news_token_set_f1_sem": 0.0044073167591970296,
      "eval_ag_news_token_set_precision": 0.3332341760416378,
      "eval_ag_news_token_set_recall": 0.38273018711868484,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 115000
    },
    {
      "epoch": 22.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11371875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0856486725872965,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11414719573617015,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6693856120109558,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010228048365564959,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.284191370010376,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.168,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.408,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.6873953590923,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.96875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21133468214565337,
      "eval_anthropic_toxic_prompts_runtime": 9.9183,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.412,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3543421457270459,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006548411078583062,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.433028727004398,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32447431356186524,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 115000
    },
    {
      "epoch": 22.08,
      "eval_arxiv_accuracy": 0.345,
      "eval_arxiv_bleu_score": 4.332887674090163,
      "eval_arxiv_bleu_score_sem": 0.1219026245576228,
      "eval_arxiv_emb_cos_sim": 0.7662792205810547,
      "eval_arxiv_emb_cos_sim_sem": 0.006597461541464352,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.428192377090454,
      "eval_arxiv_n_ngrams_match_1": 14.974,
      "eval_arxiv_n_ngrams_match_2": 2.902,
      "eval_arxiv_n_ngrams_match_3": 0.664,
      "eval_arxiv_num_pred_words": 40.288,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.820879837215674,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3596958667814193,
      "eval_arxiv_runtime": 10.7978,
      "eval_arxiv_samples_per_second": 46.306,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.35176024393843747,
      "eval_arxiv_token_set_f1_sem": 0.004080474562135315,
      "eval_arxiv_token_set_precision": 0.3026004170903108,
      "eval_arxiv_token_set_recall": 0.4365537162085265,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 115000
    },
    {
      "epoch": 22.08,
      "eval_python_code_alpaca_accuracy": 0.15759375,
      "eval_python_code_alpaca_bleu_score": 4.592611588898136,
      "eval_python_code_alpaca_bleu_score_sem": 0.1515017279756423,
      "eval_python_code_alpaca_emb_cos_sim": 0.7440839409828186,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010540835274887988,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9287099838256836,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.696,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.876,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.98,
      "eval_python_code_alpaca_num_pred_words": 43.064,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.703487126442802,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3278541879311717,
      "eval_python_code_alpaca_runtime": 9.8815,
      "eval_python_code_alpaca_samples_per_second": 50.6,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.46579912369083176,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005772838048274028,
      "eval_python_code_alpaca_token_set_precision": 0.5238379719629166,
      "eval_python_code_alpaca_token_set_recall": 0.4414236591984351,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 115000
    },
    {
      "epoch": 22.08,
      "eval_wikibio_accuracy": 0.32103125,
      "eval_wikibio_bleu_score": 5.988426808877446,
      "eval_wikibio_bleu_score_sem": 0.20765197765749627,
      "eval_wikibio_emb_cos_sim": 0.7449048757553101,
      "eval_wikibio_emb_cos_sim_sem": 0.008843595514788952,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7470011711120605,
      "eval_wikibio_n_ngrams_match_1": 10.144,
      "eval_wikibio_n_ngrams_match_2": 3.364,
      "eval_wikibio_n_ngrams_match_3": 1.208,
      "eval_wikibio_num_pred_words": 35.814,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.393759555544186,
      "eval_wikibio_pred_num_tokens": 62.9921875,
      "eval_wikibio_rouge_score": 0.35676634076127417,
      "eval_wikibio_runtime": 10.2449,
      "eval_wikibio_samples_per_second": 48.805,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.3230679948872902,
      "eval_wikibio_token_set_f1_sem": 0.005287042962957077,
      "eval_wikibio_token_set_precision": 0.32933225668108,
      "eval_wikibio_token_set_recall": 0.33355628885650834,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 115000
    },
    {
      "epoch": 22.08,
      "eval_nq_accuracy": 0.526,
      "eval_nq_bleu_score": 11.709103363297908,
      "eval_nq_bleu_score_sem": 0.4913395272078558,
      "eval_nq_emb_cos_sim": 0.8308370113372803,
      "eval_nq_emb_cos_sim_sem": 0.007315120955750648,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2016963958740234,
      "eval_nq_n_ngrams_match_1": 22.848,
      "eval_nq_n_ngrams_match_2": 8.432,
      "eval_nq_n_ngrams_match_3": 3.956,
      "eval_nq_num_pred_words": 49.092,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.0403364883505,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44107554008541505,
      "eval_nq_runtime": 11.058,
      "eval_nq_samples_per_second": 45.216,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.45775410017236257,
      "eval_nq_token_set_f1_sem": 0.0050027542273605105,
      "eval_nq_token_set_precision": 0.4144863007077681,
      "eval_nq_token_set_recall": 0.519926324232164,
      "eval_nq_true_num_tokens": 64.0,
      "step": 115000
    },
    {
      "epoch": 22.08,
      "learning_rate": 0.001,
      "loss": 2.5619,
      "step": 115008
    },
    {
      "epoch": 22.09,
      "learning_rate": 0.001,
      "loss": 2.5885,
      "step": 115020
    },
    {
      "epoch": 22.09,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 115032
    },
    {
      "epoch": 22.09,
      "learning_rate": 0.001,
      "loss": 2.5653,
      "step": 115044
    },
    {
      "epoch": 22.09,
      "learning_rate": 0.001,
      "loss": 2.5769,
      "step": 115056
    },
    {
      "epoch": 22.09,
      "learning_rate": 0.001,
      "loss": 2.5717,
      "step": 115068
    },
    {
      "epoch": 22.1,
      "learning_rate": 0.001,
      "loss": 2.5765,
      "step": 115080
    },
    {
      "epoch": 22.1,
      "learning_rate": 0.001,
      "loss": 2.5641,
      "step": 115092
    },
    {
      "epoch": 22.1,
      "learning_rate": 0.001,
      "loss": 2.5689,
      "step": 115104
    },
    {
      "epoch": 22.1,
      "learning_rate": 0.001,
      "loss": 2.5675,
      "step": 115116
    },
    {
      "epoch": 22.11,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 115128
    },
    {
      "epoch": 22.11,
      "learning_rate": 0.001,
      "loss": 2.5565,
      "step": 115140
    },
    {
      "epoch": 22.11,
      "learning_rate": 0.001,
      "loss": 2.5763,
      "step": 115152
    },
    {
      "epoch": 22.11,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 115164
    },
    {
      "epoch": 22.12,
      "learning_rate": 0.001,
      "loss": 2.5714,
      "step": 115176
    },
    {
      "epoch": 22.12,
      "learning_rate": 0.001,
      "loss": 2.57,
      "step": 115188
    },
    {
      "epoch": 22.12,
      "learning_rate": 0.001,
      "loss": 2.5684,
      "step": 115200
    },
    {
      "epoch": 22.12,
      "learning_rate": 0.001,
      "loss": 2.5679,
      "step": 115212
    },
    {
      "epoch": 22.12,
      "learning_rate": 0.001,
      "loss": 2.5705,
      "step": 115224
    },
    {
      "epoch": 22.13,
      "learning_rate": 0.001,
      "loss": 2.5732,
      "step": 115236
    },
    {
      "epoch": 22.13,
      "learning_rate": 0.001,
      "loss": 2.5709,
      "step": 115248
    },
    {
      "epoch": 22.13,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 115260
    },
    {
      "epoch": 22.13,
      "learning_rate": 0.001,
      "loss": 2.5621,
      "step": 115272
    },
    {
      "epoch": 22.14,
      "learning_rate": 0.001,
      "loss": 2.5707,
      "step": 115284
    },
    {
      "epoch": 22.14,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 115296
    },
    {
      "epoch": 22.14,
      "learning_rate": 0.001,
      "loss": 2.5687,
      "step": 115308
    },
    {
      "epoch": 22.14,
      "learning_rate": 0.001,
      "loss": 2.5854,
      "step": 115320
    },
    {
      "epoch": 22.15,
      "learning_rate": 0.001,
      "loss": 2.5709,
      "step": 115332
    },
    {
      "epoch": 22.15,
      "learning_rate": 0.001,
      "loss": 2.582,
      "step": 115344
    },
    {
      "epoch": 22.15,
      "learning_rate": 0.001,
      "loss": 2.5622,
      "step": 115356
    },
    {
      "epoch": 22.15,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 115368
    },
    {
      "epoch": 22.15,
      "learning_rate": 0.001,
      "loss": 2.5702,
      "step": 115380
    },
    {
      "epoch": 22.16,
      "learning_rate": 0.001,
      "loss": 2.5684,
      "step": 115392
    },
    {
      "epoch": 22.16,
      "learning_rate": 0.001,
      "loss": 2.5657,
      "step": 115404
    },
    {
      "epoch": 22.16,
      "learning_rate": 0.001,
      "loss": 2.5677,
      "step": 115416
    },
    {
      "epoch": 22.16,
      "learning_rate": 0.001,
      "loss": 2.5675,
      "step": 115428
    },
    {
      "epoch": 22.17,
      "learning_rate": 0.001,
      "loss": 2.5735,
      "step": 115440
    },
    {
      "epoch": 22.17,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 115452
    },
    {
      "epoch": 22.17,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 115464
    },
    {
      "epoch": 22.17,
      "learning_rate": 0.001,
      "loss": 2.5771,
      "step": 115476
    },
    {
      "epoch": 22.18,
      "learning_rate": 0.001,
      "loss": 2.5651,
      "step": 115488
    },
    {
      "epoch": 22.18,
      "learning_rate": 0.001,
      "loss": 2.5718,
      "step": 115500
    },
    {
      "epoch": 22.18,
      "learning_rate": 0.001,
      "loss": 2.5596,
      "step": 115512
    },
    {
      "epoch": 22.18,
      "learning_rate": 0.001,
      "loss": 2.5677,
      "step": 115524
    },
    {
      "epoch": 22.18,
      "learning_rate": 0.001,
      "loss": 2.5818,
      "step": 115536
    },
    {
      "epoch": 22.19,
      "learning_rate": 0.001,
      "loss": 2.5791,
      "step": 115548
    },
    {
      "epoch": 22.19,
      "learning_rate": 0.001,
      "loss": 2.5753,
      "step": 115560
    },
    {
      "epoch": 22.19,
      "learning_rate": 0.001,
      "loss": 2.566,
      "step": 115572
    },
    {
      "epoch": 22.19,
      "learning_rate": 0.001,
      "loss": 2.5741,
      "step": 115584
    },
    {
      "epoch": 22.2,
      "learning_rate": 0.001,
      "loss": 2.5733,
      "step": 115596
    },
    {
      "epoch": 22.2,
      "learning_rate": 0.001,
      "loss": 2.5716,
      "step": 115608
    },
    {
      "epoch": 22.2,
      "learning_rate": 0.001,
      "loss": 2.5712,
      "step": 115620
    },
    {
      "epoch": 22.2,
      "eval_ag_news_accuracy": 0.3208125,
      "eval_ag_news_bleu_score": 4.8383610674653665,
      "eval_ag_news_bleu_score_sem": 0.15287793482284692,
      "eval_ag_news_emb_cos_sim": 0.8092503547668457,
      "eval_ag_news_emb_cos_sim_sem": 0.007799535100566101,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.58366060256958,
      "eval_ag_news_n_ngrams_match_1": 13.95,
      "eval_ag_news_n_ngrams_match_2": 3.148,
      "eval_ag_news_n_ngrams_match_3": 0.882,
      "eval_ag_news_num_pred_words": 46.736,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 36.00510026933896,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3456168995319642,
      "eval_ag_news_runtime": 10.4292,
      "eval_ag_news_samples_per_second": 47.942,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.34918508997128606,
      "eval_ag_news_token_set_f1_sem": 0.004444407429988588,
      "eval_ag_news_token_set_precision": 0.33353074725350246,
      "eval_ag_news_token_set_recall": 0.38303365382193366,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 115625
    },
    {
      "epoch": 22.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.112625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.119223642899152,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12109346857277077,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6737870573997498,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009638637860030266,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2874598503112793,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.124,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.888,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.708,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.138,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.77476529077541,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21176920879582883,
      "eval_anthropic_toxic_prompts_runtime": 9.9911,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.044,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3504143829702812,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006300040865617722,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4360810895301237,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31946068739308475,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 115625
    },
    {
      "epoch": 22.2,
      "eval_arxiv_accuracy": 0.34375,
      "eval_arxiv_bleu_score": 4.309186753701695,
      "eval_arxiv_bleu_score_sem": 0.12135217228988915,
      "eval_arxiv_emb_cos_sim": 0.771766185760498,
      "eval_arxiv_emb_cos_sim_sem": 0.006912421647663724,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4498324394226074,
      "eval_arxiv_n_ngrams_match_1": 15.26,
      "eval_arxiv_n_ngrams_match_2": 2.978,
      "eval_arxiv_n_ngrams_match_3": 0.656,
      "eval_arxiv_num_pred_words": 41.552,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.49511452701101,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35855996161871206,
      "eval_arxiv_runtime": 10.2964,
      "eval_arxiv_samples_per_second": 48.561,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.35263050234596155,
      "eval_arxiv_token_set_f1_sem": 0.004315855523778874,
      "eval_arxiv_token_set_precision": 0.3070109782381153,
      "eval_arxiv_token_set_recall": 0.43150787570777377,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 115625
    },
    {
      "epoch": 22.2,
      "eval_python_code_alpaca_accuracy": 0.15634375,
      "eval_python_code_alpaca_bleu_score": 4.648977876646551,
      "eval_python_code_alpaca_bleu_score_sem": 0.15743110360638743,
      "eval_python_code_alpaca_emb_cos_sim": 0.7463724613189697,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00996857787815272,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9322311878204346,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.614,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.872,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.986,
      "eval_python_code_alpaca_num_pred_words": 42.91,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.76946200736748,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.327085223619867,
      "eval_python_code_alpaca_runtime": 10.07,
      "eval_python_code_alpaca_samples_per_second": 49.652,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.4624767135484843,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00585234104111037,
      "eval_python_code_alpaca_token_set_precision": 0.5209808810214367,
      "eval_python_code_alpaca_token_set_recall": 0.4415241516758691,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 115625
    },
    {
      "epoch": 22.2,
      "eval_wikibio_accuracy": 0.31828125,
      "eval_wikibio_bleu_score": 5.894394280844617,
      "eval_wikibio_bleu_score_sem": 0.21338569848021935,
      "eval_wikibio_emb_cos_sim": 0.7446073293685913,
      "eval_wikibio_emb_cos_sim_sem": 0.00900049176818231,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.77712345123291,
      "eval_wikibio_n_ngrams_match_1": 10.312,
      "eval_wikibio_n_ngrams_match_2": 3.436,
      "eval_wikibio_n_ngrams_match_3": 1.212,
      "eval_wikibio_num_pred_words": 37.434,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.69018385968419,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35711947377654774,
      "eval_wikibio_runtime": 10.0381,
      "eval_wikibio_samples_per_second": 49.81,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3225553033294478,
      "eval_wikibio_token_set_f1_sem": 0.005162530664144254,
      "eval_wikibio_token_set_precision": 0.3326185636482031,
      "eval_wikibio_token_set_recall": 0.32612116799793156,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 115625
    },
    {
      "epoch": 22.2,
      "eval_nq_accuracy": 0.5248125,
      "eval_nq_bleu_score": 11.671781350251909,
      "eval_nq_bleu_score_sem": 0.5040887017446044,
      "eval_nq_emb_cos_sim": 0.8326694369316101,
      "eval_nq_emb_cos_sim_sem": 0.006603992985894525,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.204833984375,
      "eval_nq_n_ngrams_match_1": 22.932,
      "eval_nq_n_ngrams_match_2": 8.464,
      "eval_nq_n_ngrams_match_3": 3.904,
      "eval_nq_num_pred_words": 48.994,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.068745889360002,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44350902934874165,
      "eval_nq_runtime": 10.3744,
      "eval_nq_samples_per_second": 48.196,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.45723941450951905,
      "eval_nq_token_set_f1_sem": 0.005266574943958842,
      "eval_nq_token_set_precision": 0.4147737792573533,
      "eval_nq_token_set_recall": 0.5175651205144508,
      "eval_nq_true_num_tokens": 64.0,
      "step": 115625
    },
    {
      "epoch": 22.2,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 115632
    },
    {
      "epoch": 22.21,
      "learning_rate": 0.001,
      "loss": 2.5713,
      "step": 115644
    },
    {
      "epoch": 22.21,
      "learning_rate": 0.001,
      "loss": 2.5732,
      "step": 115656
    },
    {
      "epoch": 22.21,
      "learning_rate": 0.001,
      "loss": 2.5675,
      "step": 115668
    },
    {
      "epoch": 22.21,
      "learning_rate": 0.001,
      "loss": 2.5721,
      "step": 115680
    },
    {
      "epoch": 22.21,
      "learning_rate": 0.001,
      "loss": 2.5739,
      "step": 115692
    },
    {
      "epoch": 22.22,
      "learning_rate": 0.001,
      "loss": 2.5784,
      "step": 115704
    },
    {
      "epoch": 22.22,
      "learning_rate": 0.001,
      "loss": 2.5696,
      "step": 115716
    },
    {
      "epoch": 22.22,
      "learning_rate": 0.001,
      "loss": 2.5825,
      "step": 115728
    },
    {
      "epoch": 22.22,
      "learning_rate": 0.001,
      "loss": 2.5727,
      "step": 115740
    },
    {
      "epoch": 22.23,
      "learning_rate": 0.001,
      "loss": 2.5766,
      "step": 115752
    },
    {
      "epoch": 22.23,
      "learning_rate": 0.001,
      "loss": 2.5828,
      "step": 115764
    },
    {
      "epoch": 22.23,
      "learning_rate": 0.001,
      "loss": 2.5728,
      "step": 115776
    },
    {
      "epoch": 22.23,
      "learning_rate": 0.001,
      "loss": 2.578,
      "step": 115788
    },
    {
      "epoch": 22.24,
      "learning_rate": 0.001,
      "loss": 2.5744,
      "step": 115800
    },
    {
      "epoch": 22.24,
      "learning_rate": 0.001,
      "loss": 2.5778,
      "step": 115812
    },
    {
      "epoch": 22.24,
      "learning_rate": 0.001,
      "loss": 2.5717,
      "step": 115824
    },
    {
      "epoch": 22.24,
      "learning_rate": 0.001,
      "loss": 2.5735,
      "step": 115836
    },
    {
      "epoch": 22.24,
      "learning_rate": 0.001,
      "loss": 2.5716,
      "step": 115848
    },
    {
      "epoch": 22.25,
      "learning_rate": 0.001,
      "loss": 2.5655,
      "step": 115860
    },
    {
      "epoch": 22.25,
      "learning_rate": 0.001,
      "loss": 2.5875,
      "step": 115872
    },
    {
      "epoch": 22.25,
      "learning_rate": 0.001,
      "loss": 2.5812,
      "step": 115884
    },
    {
      "epoch": 22.25,
      "learning_rate": 0.001,
      "loss": 2.5792,
      "step": 115896
    },
    {
      "epoch": 22.26,
      "learning_rate": 0.001,
      "loss": 2.5745,
      "step": 115908
    },
    {
      "epoch": 22.26,
      "learning_rate": 0.001,
      "loss": 2.5741,
      "step": 115920
    },
    {
      "epoch": 22.26,
      "learning_rate": 0.001,
      "loss": 2.579,
      "step": 115932
    },
    {
      "epoch": 22.26,
      "learning_rate": 0.001,
      "loss": 2.569,
      "step": 115944
    },
    {
      "epoch": 22.26,
      "learning_rate": 0.001,
      "loss": 2.5792,
      "step": 115956
    },
    {
      "epoch": 22.27,
      "learning_rate": 0.001,
      "loss": 2.5788,
      "step": 115968
    },
    {
      "epoch": 22.27,
      "learning_rate": 0.001,
      "loss": 2.5793,
      "step": 115980
    },
    {
      "epoch": 22.27,
      "learning_rate": 0.001,
      "loss": 2.5835,
      "step": 115992
    },
    {
      "epoch": 22.27,
      "learning_rate": 0.001,
      "loss": 2.5798,
      "step": 116004
    },
    {
      "epoch": 22.28,
      "learning_rate": 0.001,
      "loss": 2.5644,
      "step": 116016
    },
    {
      "epoch": 22.28,
      "learning_rate": 0.001,
      "loss": 2.5875,
      "step": 116028
    },
    {
      "epoch": 22.28,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 116040
    },
    {
      "epoch": 22.28,
      "learning_rate": 0.001,
      "loss": 2.5772,
      "step": 116052
    },
    {
      "epoch": 22.29,
      "learning_rate": 0.001,
      "loss": 2.5715,
      "step": 116064
    },
    {
      "epoch": 22.29,
      "learning_rate": 0.001,
      "loss": 2.5716,
      "step": 116076
    },
    {
      "epoch": 22.29,
      "learning_rate": 0.001,
      "loss": 2.5705,
      "step": 116088
    },
    {
      "epoch": 22.29,
      "learning_rate": 0.001,
      "loss": 2.5782,
      "step": 116100
    },
    {
      "epoch": 22.29,
      "learning_rate": 0.001,
      "loss": 2.5703,
      "step": 116112
    },
    {
      "epoch": 22.3,
      "learning_rate": 0.001,
      "loss": 2.5742,
      "step": 116124
    },
    {
      "epoch": 22.3,
      "learning_rate": 0.001,
      "loss": 2.5743,
      "step": 116136
    },
    {
      "epoch": 22.3,
      "learning_rate": 0.001,
      "loss": 2.5815,
      "step": 116148
    },
    {
      "epoch": 22.3,
      "learning_rate": 0.001,
      "loss": 2.571,
      "step": 116160
    },
    {
      "epoch": 22.31,
      "learning_rate": 0.001,
      "loss": 2.577,
      "step": 116172
    },
    {
      "epoch": 22.31,
      "learning_rate": 0.001,
      "loss": 2.5721,
      "step": 116184
    },
    {
      "epoch": 22.31,
      "learning_rate": 0.001,
      "loss": 2.5739,
      "step": 116196
    },
    {
      "epoch": 22.31,
      "learning_rate": 0.001,
      "loss": 2.5846,
      "step": 116208
    },
    {
      "epoch": 22.32,
      "learning_rate": 0.001,
      "loss": 2.5596,
      "step": 116220
    },
    {
      "epoch": 22.32,
      "learning_rate": 0.001,
      "loss": 2.585,
      "step": 116232
    },
    {
      "epoch": 22.32,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 116244
    },
    {
      "epoch": 22.32,
      "eval_ag_news_accuracy": 0.32046875,
      "eval_ag_news_bleu_score": 4.820447644939628,
      "eval_ag_news_bleu_score_sem": 0.15686168539058712,
      "eval_ag_news_emb_cos_sim": 0.806586503982544,
      "eval_ag_news_emb_cos_sim_sem": 0.0072461252476492995,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5799827575683594,
      "eval_ag_news_n_ngrams_match_1": 13.988,
      "eval_ag_news_n_ngrams_match_2": 3.094,
      "eval_ag_news_n_ngrams_match_3": 0.878,
      "eval_ag_news_num_pred_words": 47.312,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.87292230531959,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34703636655467196,
      "eval_ag_news_runtime": 11.0111,
      "eval_ag_news_samples_per_second": 45.409,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.3476490374831776,
      "eval_ag_news_token_set_f1_sem": 0.004285788192314468,
      "eval_ag_news_token_set_precision": 0.33428771966129356,
      "eval_ag_news_token_set_recall": 0.3771505388639559,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 116250
    },
    {
      "epoch": 22.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.11371875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.016444840418683,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11264137508637179,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.659735918045044,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009070119679011324,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2803163528442383,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.08,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.83,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.666,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.496,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.584181351135637,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21058227510647462,
      "eval_anthropic_toxic_prompts_runtime": 9.968,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.16,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3537007728632725,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064541196798620975,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43178570591401677,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32573036917127296,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 116250
    },
    {
      "epoch": 22.32,
      "eval_arxiv_accuracy": 0.34390625,
      "eval_arxiv_bleu_score": 4.185872758914649,
      "eval_arxiv_bleu_score_sem": 0.11666933817858623,
      "eval_arxiv_emb_cos_sim": 0.7674313187599182,
      "eval_arxiv_emb_cos_sim_sem": 0.006895747959266109,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4447033405303955,
      "eval_arxiv_n_ngrams_match_1": 14.88,
      "eval_arxiv_n_ngrams_match_2": 2.884,
      "eval_arxiv_n_ngrams_match_3": 0.598,
      "eval_arxiv_num_pred_words": 40.786,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.333986543901734,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35431787802701264,
      "eval_arxiv_runtime": 10.2408,
      "eval_arxiv_samples_per_second": 48.824,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.349699944822291,
      "eval_arxiv_token_set_f1_sem": 0.004142739999432971,
      "eval_arxiv_token_set_precision": 0.3010176361794418,
      "eval_arxiv_token_set_recall": 0.4346348579163179,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 116250
    },
    {
      "epoch": 22.32,
      "eval_python_code_alpaca_accuracy": 0.1589375,
      "eval_python_code_alpaca_bleu_score": 4.361097722848697,
      "eval_python_code_alpaca_bleu_score_sem": 0.139940458071686,
      "eval_python_code_alpaca_emb_cos_sim": 0.7433726787567139,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009855297686067565,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9113898277282715,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.498,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.722,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.878,
      "eval_python_code_alpaca_num_pred_words": 43.142,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.382329092074873,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3251099019183774,
      "eval_python_code_alpaca_runtime": 10.0262,
      "eval_python_code_alpaca_samples_per_second": 49.869,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.47102202508762725,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006076332489951335,
      "eval_python_code_alpaca_token_set_precision": 0.5173190498229226,
      "eval_python_code_alpaca_token_set_recall": 0.45752735963416313,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 116250
    },
    {
      "epoch": 22.32,
      "eval_wikibio_accuracy": 0.32015625,
      "eval_wikibio_bleu_score": 5.82156297278923,
      "eval_wikibio_bleu_score_sem": 0.19800734201388231,
      "eval_wikibio_emb_cos_sim": 0.737400472164154,
      "eval_wikibio_emb_cos_sim_sem": 0.009803398451937954,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7829058170318604,
      "eval_wikibio_n_ngrams_match_1": 10.12,
      "eval_wikibio_n_ngrams_match_2": 3.38,
      "eval_wikibio_n_ngrams_match_3": 1.214,
      "eval_wikibio_num_pred_words": 36.704,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.94354830157379,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3495130925947355,
      "eval_wikibio_runtime": 10.5039,
      "eval_wikibio_samples_per_second": 47.601,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.31841487571440225,
      "eval_wikibio_token_set_f1_sem": 0.005544290379750777,
      "eval_wikibio_token_set_precision": 0.32771621765992043,
      "eval_wikibio_token_set_recall": 0.3259368214359848,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 116250
    },
    {
      "epoch": 22.32,
      "eval_nq_accuracy": 0.52646875,
      "eval_nq_bleu_score": 11.665484202363233,
      "eval_nq_bleu_score_sem": 0.47570836770547587,
      "eval_nq_emb_cos_sim": 0.8337132930755615,
      "eval_nq_emb_cos_sim_sem": 0.00710044269013787,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.207057476043701,
      "eval_nq_n_ngrams_match_1": 22.818,
      "eval_nq_n_ngrams_match_2": 8.358,
      "eval_nq_n_ngrams_match_3": 3.894,
      "eval_nq_num_pred_words": 49.104,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.088932604470212,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4389140926684925,
      "eval_nq_runtime": 10.596,
      "eval_nq_samples_per_second": 47.188,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.4552582167522271,
      "eval_nq_token_set_f1_sem": 0.005076647761822282,
      "eval_nq_token_set_precision": 0.41312373732761054,
      "eval_nq_token_set_recall": 0.5167785659627977,
      "eval_nq_true_num_tokens": 64.0,
      "step": 116250
    },
    {
      "epoch": 22.32,
      "learning_rate": 0.001,
      "loss": 2.5794,
      "step": 116256
    },
    {
      "epoch": 22.32,
      "learning_rate": 0.001,
      "loss": 2.581,
      "step": 116268
    },
    {
      "epoch": 22.33,
      "learning_rate": 0.001,
      "loss": 2.5765,
      "step": 116280
    },
    {
      "epoch": 22.33,
      "learning_rate": 0.001,
      "loss": 2.5763,
      "step": 116292
    },
    {
      "epoch": 22.33,
      "learning_rate": 0.001,
      "loss": 2.5699,
      "step": 116304
    },
    {
      "epoch": 22.33,
      "learning_rate": 0.001,
      "loss": 2.5696,
      "step": 116316
    },
    {
      "epoch": 22.34,
      "learning_rate": 0.001,
      "loss": 2.5773,
      "step": 116328
    },
    {
      "epoch": 22.34,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 116340
    },
    {
      "epoch": 22.34,
      "learning_rate": 0.001,
      "loss": 2.5749,
      "step": 116352
    },
    {
      "epoch": 22.34,
      "learning_rate": 0.001,
      "loss": 2.5717,
      "step": 116364
    },
    {
      "epoch": 22.35,
      "learning_rate": 0.001,
      "loss": 2.5635,
      "step": 116376
    },
    {
      "epoch": 22.35,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 116388
    },
    {
      "epoch": 22.35,
      "learning_rate": 0.001,
      "loss": 2.5808,
      "step": 116400
    },
    {
      "epoch": 22.35,
      "learning_rate": 0.001,
      "loss": 2.5753,
      "step": 116412
    },
    {
      "epoch": 22.35,
      "learning_rate": 0.001,
      "loss": 2.5804,
      "step": 116424
    },
    {
      "epoch": 22.36,
      "learning_rate": 0.001,
      "loss": 2.5772,
      "step": 116436
    },
    {
      "epoch": 22.36,
      "learning_rate": 0.001,
      "loss": 2.5725,
      "step": 116448
    },
    {
      "epoch": 22.36,
      "learning_rate": 0.001,
      "loss": 2.5729,
      "step": 116460
    },
    {
      "epoch": 22.36,
      "learning_rate": 0.001,
      "loss": 2.5789,
      "step": 116472
    },
    {
      "epoch": 22.37,
      "learning_rate": 0.001,
      "loss": 2.5738,
      "step": 116484
    },
    {
      "epoch": 22.37,
      "learning_rate": 0.001,
      "loss": 2.5786,
      "step": 116496
    },
    {
      "epoch": 22.37,
      "learning_rate": 0.001,
      "loss": 2.5698,
      "step": 116508
    },
    {
      "epoch": 22.37,
      "learning_rate": 0.001,
      "loss": 2.5788,
      "step": 116520
    },
    {
      "epoch": 22.38,
      "learning_rate": 0.001,
      "loss": 2.5683,
      "step": 116532
    },
    {
      "epoch": 22.38,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 116544
    },
    {
      "epoch": 22.38,
      "learning_rate": 0.001,
      "loss": 2.5766,
      "step": 116556
    },
    {
      "epoch": 22.38,
      "learning_rate": 0.001,
      "loss": 2.5675,
      "step": 116568
    },
    {
      "epoch": 22.38,
      "learning_rate": 0.001,
      "loss": 2.5803,
      "step": 116580
    },
    {
      "epoch": 22.39,
      "learning_rate": 0.001,
      "loss": 2.5794,
      "step": 116592
    },
    {
      "epoch": 22.39,
      "learning_rate": 0.001,
      "loss": 2.5782,
      "step": 116604
    },
    {
      "epoch": 22.39,
      "learning_rate": 0.001,
      "loss": 2.576,
      "step": 116616
    },
    {
      "epoch": 22.39,
      "learning_rate": 0.001,
      "loss": 2.5761,
      "step": 116628
    },
    {
      "epoch": 22.4,
      "learning_rate": 0.001,
      "loss": 2.5839,
      "step": 116640
    },
    {
      "epoch": 22.4,
      "learning_rate": 0.001,
      "loss": 2.5761,
      "step": 116652
    },
    {
      "epoch": 22.4,
      "learning_rate": 0.001,
      "loss": 2.5694,
      "step": 116664
    },
    {
      "epoch": 22.4,
      "learning_rate": 0.001,
      "loss": 2.5778,
      "step": 116676
    },
    {
      "epoch": 22.41,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 116688
    },
    {
      "epoch": 22.41,
      "learning_rate": 0.001,
      "loss": 2.5723,
      "step": 116700
    },
    {
      "epoch": 22.41,
      "learning_rate": 0.001,
      "loss": 2.567,
      "step": 116712
    },
    {
      "epoch": 22.41,
      "learning_rate": 0.001,
      "loss": 2.5848,
      "step": 116724
    },
    {
      "epoch": 22.41,
      "learning_rate": 0.001,
      "loss": 2.5751,
      "step": 116736
    },
    {
      "epoch": 22.42,
      "learning_rate": 0.001,
      "loss": 2.5731,
      "step": 116748
    },
    {
      "epoch": 22.42,
      "learning_rate": 0.001,
      "loss": 2.5731,
      "step": 116760
    },
    {
      "epoch": 22.42,
      "learning_rate": 0.001,
      "loss": 2.5788,
      "step": 116772
    },
    {
      "epoch": 22.42,
      "learning_rate": 0.001,
      "loss": 2.5796,
      "step": 116784
    },
    {
      "epoch": 22.43,
      "learning_rate": 0.001,
      "loss": 2.5725,
      "step": 116796
    },
    {
      "epoch": 22.43,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 116808
    },
    {
      "epoch": 22.43,
      "learning_rate": 0.001,
      "loss": 2.5763,
      "step": 116820
    },
    {
      "epoch": 22.43,
      "learning_rate": 0.001,
      "loss": 2.5799,
      "step": 116832
    },
    {
      "epoch": 22.44,
      "learning_rate": 0.001,
      "loss": 2.5776,
      "step": 116844
    },
    {
      "epoch": 22.44,
      "learning_rate": 0.001,
      "loss": 2.5648,
      "step": 116856
    },
    {
      "epoch": 22.44,
      "learning_rate": 0.001,
      "loss": 2.5747,
      "step": 116868
    },
    {
      "epoch": 22.44,
      "eval_ag_news_accuracy": 0.32215625,
      "eval_ag_news_bleu_score": 4.856594314520586,
      "eval_ag_news_bleu_score_sem": 0.1598958726882593,
      "eval_ag_news_emb_cos_sim": 0.8080046772956848,
      "eval_ag_news_emb_cos_sim_sem": 0.007112505908435098,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.582242250442505,
      "eval_ag_news_n_ngrams_match_1": 14.19,
      "eval_ag_news_n_ngrams_match_2": 3.144,
      "eval_ag_news_n_ngrams_match_3": 0.898,
      "eval_ag_news_num_pred_words": 46.968,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.954068557809826,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3498322419113007,
      "eval_ag_news_runtime": 10.5398,
      "eval_ag_news_samples_per_second": 47.439,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3531902275364227,
      "eval_ag_news_token_set_f1_sem": 0.004257954253477402,
      "eval_ag_news_token_set_precision": 0.33842815531173076,
      "eval_ag_news_token_set_recall": 0.38570623803257226,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 116875
    },
    {
      "epoch": 22.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.11390625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.108279336497537,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11760437814455241,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6740961670875549,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008986192132735974,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.275770425796509,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.162,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.726,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.352,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.463605873398034,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21285200780460556,
      "eval_anthropic_toxic_prompts_runtime": 11.5867,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.153,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.086,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3545118763706072,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066613576403408,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43568408752204074,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3227649481386179,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 116875
    },
    {
      "epoch": 22.44,
      "eval_arxiv_accuracy": 0.34609375,
      "eval_arxiv_bleu_score": 4.264890275315649,
      "eval_arxiv_bleu_score_sem": 0.11801758505991256,
      "eval_arxiv_emb_cos_sim": 0.7583524584770203,
      "eval_arxiv_emb_cos_sim_sem": 0.008303041767756989,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4266624450683594,
      "eval_arxiv_n_ngrams_match_1": 14.852,
      "eval_arxiv_n_ngrams_match_2": 2.94,
      "eval_arxiv_n_ngrams_match_3": 0.682,
      "eval_arxiv_num_pred_words": 40.196,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.77376203890849,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3538376492635257,
      "eval_arxiv_runtime": 10.2673,
      "eval_arxiv_samples_per_second": 48.698,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3502531526681534,
      "eval_arxiv_token_set_f1_sem": 0.004261380431489897,
      "eval_arxiv_token_set_precision": 0.29970747966491473,
      "eval_arxiv_token_set_recall": 0.44259492888542024,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 116875
    },
    {
      "epoch": 22.44,
      "eval_python_code_alpaca_accuracy": 0.16034375,
      "eval_python_code_alpaca_bleu_score": 4.434277339316382,
      "eval_python_code_alpaca_bleu_score_sem": 0.133387632881832,
      "eval_python_code_alpaca_emb_cos_sim": 0.7473664283752441,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010136398726389004,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.890942096710205,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.644,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.768,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.86,
      "eval_python_code_alpaca_num_pred_words": 42.488,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.01026902678664,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3293255267957039,
      "eval_python_code_alpaca_runtime": 10.0451,
      "eval_python_code_alpaca_samples_per_second": 49.776,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.469584089644997,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005865255611461517,
      "eval_python_code_alpaca_token_set_precision": 0.5238862778191908,
      "eval_python_code_alpaca_token_set_recall": 0.453314890228134,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 116875
    },
    {
      "epoch": 22.44,
      "eval_wikibio_accuracy": 0.321375,
      "eval_wikibio_bleu_score": 5.818789102742222,
      "eval_wikibio_bleu_score_sem": 0.20686515782275233,
      "eval_wikibio_emb_cos_sim": 0.7244291305541992,
      "eval_wikibio_emb_cos_sim_sem": 0.009842413262768883,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7834208011627197,
      "eval_wikibio_n_ngrams_match_1": 10.086,
      "eval_wikibio_n_ngrams_match_2": 3.376,
      "eval_wikibio_n_ngrams_match_3": 1.19,
      "eval_wikibio_num_pred_words": 36.738,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.966184359707846,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3511563437729036,
      "eval_wikibio_runtime": 10.0888,
      "eval_wikibio_samples_per_second": 49.56,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.31948827103476674,
      "eval_wikibio_token_set_f1_sem": 0.005409714146322703,
      "eval_wikibio_token_set_precision": 0.32987331494848554,
      "eval_wikibio_token_set_recall": 0.3282077246133953,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 116875
    },
    {
      "epoch": 22.44,
      "eval_nq_accuracy": 0.52575,
      "eval_nq_bleu_score": 11.58256673813752,
      "eval_nq_bleu_score_sem": 0.464927684853318,
      "eval_nq_emb_cos_sim": 0.8292089700698853,
      "eval_nq_emb_cos_sim_sem": 0.007278743615674191,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.205331563949585,
      "eval_nq_n_ngrams_match_1": 22.92,
      "eval_nq_n_ngrams_match_2": 8.388,
      "eval_nq_n_ngrams_match_3": 3.848,
      "eval_nq_num_pred_words": 49.1,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.073259434912563,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44242775204923657,
      "eval_nq_runtime": 10.5388,
      "eval_nq_samples_per_second": 47.444,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4578089833272444,
      "eval_nq_token_set_f1_sem": 0.004977922166256775,
      "eval_nq_token_set_precision": 0.41615849446692316,
      "eval_nq_token_set_recall": 0.5172310825982276,
      "eval_nq_true_num_tokens": 64.0,
      "step": 116875
    },
    {
      "epoch": 22.44,
      "learning_rate": 0.001,
      "loss": 2.587,
      "step": 116880
    },
    {
      "epoch": 22.44,
      "learning_rate": 0.001,
      "loss": 2.5801,
      "step": 116892
    },
    {
      "epoch": 22.45,
      "learning_rate": 0.001,
      "loss": 2.5726,
      "step": 116904
    },
    {
      "epoch": 22.45,
      "learning_rate": 0.001,
      "loss": 2.5827,
      "step": 116916
    },
    {
      "epoch": 22.45,
      "learning_rate": 0.001,
      "loss": 2.5747,
      "step": 116928
    },
    {
      "epoch": 22.45,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 116940
    },
    {
      "epoch": 22.46,
      "learning_rate": 0.001,
      "loss": 2.5701,
      "step": 116952
    },
    {
      "epoch": 22.46,
      "learning_rate": 0.001,
      "loss": 2.5714,
      "step": 116964
    },
    {
      "epoch": 22.46,
      "learning_rate": 0.001,
      "loss": 2.5804,
      "step": 116976
    },
    {
      "epoch": 22.46,
      "learning_rate": 0.001,
      "loss": 2.5831,
      "step": 116988
    },
    {
      "epoch": 22.47,
      "learning_rate": 0.001,
      "loss": 2.5924,
      "step": 117000
    },
    {
      "epoch": 22.47,
      "learning_rate": 0.001,
      "loss": 2.5755,
      "step": 117012
    },
    {
      "epoch": 22.47,
      "learning_rate": 0.001,
      "loss": 2.5724,
      "step": 117024
    },
    {
      "epoch": 22.47,
      "learning_rate": 0.001,
      "loss": 2.5715,
      "step": 117036
    },
    {
      "epoch": 22.47,
      "learning_rate": 0.001,
      "loss": 2.5769,
      "step": 117048
    },
    {
      "epoch": 22.48,
      "learning_rate": 0.001,
      "loss": 2.5776,
      "step": 117060
    },
    {
      "epoch": 22.48,
      "learning_rate": 0.001,
      "loss": 2.5742,
      "step": 117072
    },
    {
      "epoch": 22.48,
      "learning_rate": 0.001,
      "loss": 2.5741,
      "step": 117084
    },
    {
      "epoch": 22.48,
      "learning_rate": 0.001,
      "loss": 2.5765,
      "step": 117096
    },
    {
      "epoch": 22.49,
      "learning_rate": 0.001,
      "loss": 2.5719,
      "step": 117108
    },
    {
      "epoch": 22.49,
      "learning_rate": 0.001,
      "loss": 2.5707,
      "step": 117120
    },
    {
      "epoch": 22.49,
      "learning_rate": 0.001,
      "loss": 2.5737,
      "step": 117132
    },
    {
      "epoch": 22.49,
      "learning_rate": 0.001,
      "loss": 2.5713,
      "step": 117144
    },
    {
      "epoch": 22.5,
      "learning_rate": 0.001,
      "loss": 2.5804,
      "step": 117156
    },
    {
      "epoch": 22.5,
      "learning_rate": 0.001,
      "loss": 2.5727,
      "step": 117168
    },
    {
      "epoch": 22.5,
      "learning_rate": 0.001,
      "loss": 2.5753,
      "step": 117180
    },
    {
      "epoch": 22.5,
      "learning_rate": 0.001,
      "loss": 2.5686,
      "step": 117192
    },
    {
      "epoch": 22.5,
      "learning_rate": 0.001,
      "loss": 2.5804,
      "step": 117204
    },
    {
      "epoch": 22.51,
      "learning_rate": 0.001,
      "loss": 2.5776,
      "step": 117216
    },
    {
      "epoch": 22.51,
      "learning_rate": 0.001,
      "loss": 2.5714,
      "step": 117228
    },
    {
      "epoch": 22.51,
      "learning_rate": 0.001,
      "loss": 2.5648,
      "step": 117240
    },
    {
      "epoch": 22.51,
      "learning_rate": 0.001,
      "loss": 2.5706,
      "step": 117252
    },
    {
      "epoch": 22.52,
      "learning_rate": 0.001,
      "loss": 2.5773,
      "step": 117264
    },
    {
      "epoch": 22.52,
      "learning_rate": 0.001,
      "loss": 2.5811,
      "step": 117276
    },
    {
      "epoch": 22.52,
      "learning_rate": 0.001,
      "loss": 2.5717,
      "step": 117288
    },
    {
      "epoch": 22.52,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 117300
    },
    {
      "epoch": 22.53,
      "learning_rate": 0.001,
      "loss": 2.5784,
      "step": 117312
    },
    {
      "epoch": 22.53,
      "learning_rate": 0.001,
      "loss": 2.5681,
      "step": 117324
    },
    {
      "epoch": 22.53,
      "learning_rate": 0.001,
      "loss": 2.577,
      "step": 117336
    },
    {
      "epoch": 22.53,
      "learning_rate": 0.001,
      "loss": 2.5697,
      "step": 117348
    },
    {
      "epoch": 22.53,
      "learning_rate": 0.001,
      "loss": 2.5727,
      "step": 117360
    },
    {
      "epoch": 22.54,
      "learning_rate": 0.001,
      "loss": 2.5821,
      "step": 117372
    },
    {
      "epoch": 22.54,
      "learning_rate": 0.001,
      "loss": 2.5609,
      "step": 117384
    },
    {
      "epoch": 22.54,
      "learning_rate": 0.001,
      "loss": 2.5816,
      "step": 117396
    },
    {
      "epoch": 22.54,
      "learning_rate": 0.001,
      "loss": 2.5865,
      "step": 117408
    },
    {
      "epoch": 22.55,
      "learning_rate": 0.001,
      "loss": 2.5806,
      "step": 117420
    },
    {
      "epoch": 22.55,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 117432
    },
    {
      "epoch": 22.55,
      "learning_rate": 0.001,
      "loss": 2.5662,
      "step": 117444
    },
    {
      "epoch": 22.55,
      "learning_rate": 0.001,
      "loss": 2.5839,
      "step": 117456
    },
    {
      "epoch": 22.56,
      "learning_rate": 0.001,
      "loss": 2.5809,
      "step": 117468
    },
    {
      "epoch": 22.56,
      "learning_rate": 0.001,
      "loss": 2.5772,
      "step": 117480
    },
    {
      "epoch": 22.56,
      "learning_rate": 0.001,
      "loss": 2.5753,
      "step": 117492
    },
    {
      "epoch": 22.56,
      "eval_ag_news_accuracy": 0.3210625,
      "eval_ag_news_bleu_score": 4.751313847520546,
      "eval_ag_news_bleu_score_sem": 0.15357630148775284,
      "eval_ag_news_emb_cos_sim": 0.8032387495040894,
      "eval_ag_news_emb_cos_sim_sem": 0.007929164977150944,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5686678886413574,
      "eval_ag_news_n_ngrams_match_1": 14.074,
      "eval_ag_news_n_ngrams_match_2": 3.066,
      "eval_ag_news_n_ngrams_match_3": 0.866,
      "eval_ag_news_num_pred_words": 46.748,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.469312592947226,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3459832340936717,
      "eval_ag_news_runtime": 10.5136,
      "eval_ag_news_samples_per_second": 47.557,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3496529264097084,
      "eval_ag_news_token_set_f1_sem": 0.004607486911821557,
      "eval_ag_news_token_set_precision": 0.33522189722840523,
      "eval_ag_news_token_set_recall": 0.37955676522116066,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 117500
    },
    {
      "epoch": 22.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.111875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0575816694187474,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11457866858044319,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.656312108039856,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009500901300552155,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2598910331726074,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.138,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.706,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.722,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.04669876174836,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21074573188862467,
      "eval_anthropic_toxic_prompts_runtime": 9.8163,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.936,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3523485744391414,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006537204923483806,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43178132247169376,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.324154576788585,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 117500
    },
    {
      "epoch": 22.56,
      "eval_arxiv_accuracy": 0.34596875,
      "eval_arxiv_bleu_score": 4.404601112288306,
      "eval_arxiv_bleu_score_sem": 0.1209762459064129,
      "eval_arxiv_emb_cos_sim": 0.757699728012085,
      "eval_arxiv_emb_cos_sim_sem": 0.007478353065855309,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4282429218292236,
      "eval_arxiv_n_ngrams_match_1": 15.358,
      "eval_arxiv_n_ngrams_match_2": 3.054,
      "eval_arxiv_n_ngrams_match_3": 0.682,
      "eval_arxiv_num_pred_words": 40.7,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.822437709906495,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36524552461244275,
      "eval_arxiv_runtime": 12.4637,
      "eval_arxiv_samples_per_second": 40.116,
      "eval_arxiv_steps_per_second": 0.08,
      "eval_arxiv_token_set_f1": 0.3607402806172713,
      "eval_arxiv_token_set_f1_sem": 0.00425810251188602,
      "eval_arxiv_token_set_precision": 0.3110062180736336,
      "eval_arxiv_token_set_recall": 0.4480569332203351,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 117500
    },
    {
      "epoch": 22.56,
      "eval_python_code_alpaca_accuracy": 0.15896875,
      "eval_python_code_alpaca_bleu_score": 4.48379496275208,
      "eval_python_code_alpaca_bleu_score_sem": 0.1473345045875255,
      "eval_python_code_alpaca_emb_cos_sim": 0.7411110401153564,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010823657750553294,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8746705055236816,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.514,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.762,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.904,
      "eval_python_code_alpaca_num_pred_words": 42.514,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.719584654212152,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32422284766209397,
      "eval_python_code_alpaca_runtime": 10.0745,
      "eval_python_code_alpaca_samples_per_second": 49.63,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.46409111519362245,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005765989168419287,
      "eval_python_code_alpaca_token_set_precision": 0.5161061677503002,
      "eval_python_code_alpaca_token_set_recall": 0.4461122420485951,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 117500
    },
    {
      "epoch": 22.56,
      "eval_wikibio_accuracy": 0.322625,
      "eval_wikibio_bleu_score": 5.890929405674165,
      "eval_wikibio_bleu_score_sem": 0.21081142732845903,
      "eval_wikibio_emb_cos_sim": 0.7339615821838379,
      "eval_wikibio_emb_cos_sim_sem": 0.010007291075598992,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7456119060516357,
      "eval_wikibio_n_ngrams_match_1": 9.922,
      "eval_wikibio_n_ngrams_match_2": 3.346,
      "eval_wikibio_n_ngrams_match_3": 1.226,
      "eval_wikibio_num_pred_words": 35.99,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.334904278869544,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34904511274310346,
      "eval_wikibio_runtime": 10.1967,
      "eval_wikibio_samples_per_second": 49.036,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.31552256679354507,
      "eval_wikibio_token_set_f1_sem": 0.005655064206334394,
      "eval_wikibio_token_set_precision": 0.32342106186795944,
      "eval_wikibio_token_set_recall": 0.3260212041482342,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 117500
    },
    {
      "epoch": 22.56,
      "eval_nq_accuracy": 0.52715625,
      "eval_nq_bleu_score": 11.389046873894975,
      "eval_nq_bleu_score_sem": 0.47600020163262186,
      "eval_nq_emb_cos_sim": 0.8283698558807373,
      "eval_nq_emb_cos_sim_sem": 0.007455267788127975,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1999282836914062,
      "eval_nq_n_ngrams_match_1": 22.87,
      "eval_nq_n_ngrams_match_2": 8.262,
      "eval_nq_n_ngrams_match_3": 3.766,
      "eval_nq_num_pred_words": 48.834,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.024366281989233,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44442499810861613,
      "eval_nq_runtime": 10.5316,
      "eval_nq_samples_per_second": 47.476,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4579732896661942,
      "eval_nq_token_set_f1_sem": 0.005053132762283063,
      "eval_nq_token_set_precision": 0.41472750289136123,
      "eval_nq_token_set_recall": 0.5204610951023767,
      "eval_nq_true_num_tokens": 64.0,
      "step": 117500
    },
    {
      "epoch": 22.56,
      "learning_rate": 0.001,
      "loss": 2.5692,
      "step": 117504
    },
    {
      "epoch": 22.56,
      "learning_rate": 0.001,
      "loss": 2.5689,
      "step": 117516
    },
    {
      "epoch": 22.57,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 117528
    },
    {
      "epoch": 22.57,
      "learning_rate": 0.001,
      "loss": 2.5948,
      "step": 117540
    },
    {
      "epoch": 22.57,
      "learning_rate": 0.001,
      "loss": 2.5832,
      "step": 117552
    },
    {
      "epoch": 22.57,
      "learning_rate": 0.001,
      "loss": 2.5831,
      "step": 117564
    },
    {
      "epoch": 22.58,
      "learning_rate": 0.001,
      "loss": 2.5871,
      "step": 117576
    },
    {
      "epoch": 22.58,
      "learning_rate": 0.001,
      "loss": 2.5762,
      "step": 117588
    },
    {
      "epoch": 22.58,
      "learning_rate": 0.001,
      "loss": 2.5775,
      "step": 117600
    },
    {
      "epoch": 22.58,
      "learning_rate": 0.001,
      "loss": 2.595,
      "step": 117612
    },
    {
      "epoch": 22.59,
      "learning_rate": 0.001,
      "loss": 2.5849,
      "step": 117624
    },
    {
      "epoch": 22.59,
      "learning_rate": 0.001,
      "loss": 2.5846,
      "step": 117636
    },
    {
      "epoch": 22.59,
      "learning_rate": 0.001,
      "loss": 2.5769,
      "step": 117648
    },
    {
      "epoch": 22.59,
      "learning_rate": 0.001,
      "loss": 2.5681,
      "step": 117660
    },
    {
      "epoch": 22.59,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 117672
    },
    {
      "epoch": 22.6,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 117684
    },
    {
      "epoch": 22.6,
      "learning_rate": 0.001,
      "loss": 2.5845,
      "step": 117696
    },
    {
      "epoch": 22.6,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 117708
    },
    {
      "epoch": 22.6,
      "learning_rate": 0.001,
      "loss": 2.5801,
      "step": 117720
    },
    {
      "epoch": 22.61,
      "learning_rate": 0.001,
      "loss": 2.5731,
      "step": 117732
    },
    {
      "epoch": 22.61,
      "learning_rate": 0.001,
      "loss": 2.5777,
      "step": 117744
    },
    {
      "epoch": 22.61,
      "learning_rate": 0.001,
      "loss": 2.5701,
      "step": 117756
    },
    {
      "epoch": 22.61,
      "learning_rate": 0.001,
      "loss": 2.5764,
      "step": 117768
    },
    {
      "epoch": 22.62,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 117780
    },
    {
      "epoch": 22.62,
      "learning_rate": 0.001,
      "loss": 2.5736,
      "step": 117792
    },
    {
      "epoch": 22.62,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 117804
    },
    {
      "epoch": 22.62,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 117816
    },
    {
      "epoch": 22.62,
      "learning_rate": 0.001,
      "loss": 2.582,
      "step": 117828
    },
    {
      "epoch": 22.63,
      "learning_rate": 0.001,
      "loss": 2.5686,
      "step": 117840
    },
    {
      "epoch": 22.63,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 117852
    },
    {
      "epoch": 22.63,
      "learning_rate": 0.001,
      "loss": 2.5786,
      "step": 117864
    },
    {
      "epoch": 22.63,
      "learning_rate": 0.001,
      "loss": 2.5764,
      "step": 117876
    },
    {
      "epoch": 22.64,
      "learning_rate": 0.001,
      "loss": 2.5814,
      "step": 117888
    },
    {
      "epoch": 22.64,
      "learning_rate": 0.001,
      "loss": 2.5788,
      "step": 117900
    },
    {
      "epoch": 22.64,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 117912
    },
    {
      "epoch": 22.64,
      "learning_rate": 0.001,
      "loss": 2.5875,
      "step": 117924
    },
    {
      "epoch": 22.65,
      "learning_rate": 0.001,
      "loss": 2.5765,
      "step": 117936
    },
    {
      "epoch": 22.65,
      "learning_rate": 0.001,
      "loss": 2.5888,
      "step": 117948
    },
    {
      "epoch": 22.65,
      "learning_rate": 0.001,
      "loss": 2.5736,
      "step": 117960
    },
    {
      "epoch": 22.65,
      "learning_rate": 0.001,
      "loss": 2.5772,
      "step": 117972
    },
    {
      "epoch": 22.65,
      "learning_rate": 0.001,
      "loss": 2.5726,
      "step": 117984
    },
    {
      "epoch": 22.66,
      "learning_rate": 0.001,
      "loss": 2.5873,
      "step": 117996
    },
    {
      "epoch": 22.66,
      "learning_rate": 0.001,
      "loss": 2.5891,
      "step": 118008
    },
    {
      "epoch": 22.66,
      "learning_rate": 0.001,
      "loss": 2.5699,
      "step": 118020
    },
    {
      "epoch": 22.66,
      "learning_rate": 0.001,
      "loss": 2.5778,
      "step": 118032
    },
    {
      "epoch": 22.67,
      "learning_rate": 0.001,
      "loss": 2.5731,
      "step": 118044
    },
    {
      "epoch": 22.67,
      "learning_rate": 0.001,
      "loss": 2.5779,
      "step": 118056
    },
    {
      "epoch": 22.67,
      "learning_rate": 0.001,
      "loss": 2.5681,
      "step": 118068
    },
    {
      "epoch": 22.67,
      "learning_rate": 0.001,
      "loss": 2.581,
      "step": 118080
    },
    {
      "epoch": 22.68,
      "learning_rate": 0.001,
      "loss": 2.5785,
      "step": 118092
    },
    {
      "epoch": 22.68,
      "learning_rate": 0.001,
      "loss": 2.5797,
      "step": 118104
    },
    {
      "epoch": 22.68,
      "learning_rate": 0.001,
      "loss": 2.5667,
      "step": 118116
    },
    {
      "epoch": 22.68,
      "eval_ag_news_accuracy": 0.3206875,
      "eval_ag_news_bleu_score": 4.840546160324322,
      "eval_ag_news_bleu_score_sem": 0.15717995941611823,
      "eval_ag_news_emb_cos_sim": 0.8073430061340332,
      "eval_ag_news_emb_cos_sim_sem": 0.007231543755153546,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.572967767715454,
      "eval_ag_news_n_ngrams_match_1": 13.846,
      "eval_ag_news_n_ngrams_match_2": 3.076,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.692,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.622154713765795,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3436576714142795,
      "eval_ag_news_runtime": 11.5141,
      "eval_ag_news_samples_per_second": 43.425,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.34528038605404987,
      "eval_ag_news_token_set_f1_sem": 0.004380625888641731,
      "eval_ag_news_token_set_precision": 0.32926774330513486,
      "eval_ag_news_token_set_recall": 0.37960968660890493,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 118125
    },
    {
      "epoch": 22.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.11371875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1456554674371224,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1219128571149683,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6706522107124329,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009408666873853113,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2667617797851562,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.28,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.956,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.594,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.226275234815024,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2149039943129452,
      "eval_anthropic_toxic_prompts_runtime": 9.9279,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.363,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35698278637877773,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006647900886517928,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4450859525827544,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3228706847834766,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 118125
    },
    {
      "epoch": 22.68,
      "eval_arxiv_accuracy": 0.3433125,
      "eval_arxiv_bleu_score": 4.323463857586938,
      "eval_arxiv_bleu_score_sem": 0.12050510206508024,
      "eval_arxiv_emb_cos_sim": 0.7659394145011902,
      "eval_arxiv_emb_cos_sim_sem": 0.006534922862263774,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.44219708442688,
      "eval_arxiv_n_ngrams_match_1": 14.92,
      "eval_arxiv_n_ngrams_match_2": 2.932,
      "eval_arxiv_n_ngrams_match_3": 0.666,
      "eval_arxiv_num_pred_words": 39.906,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.25555387610984,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3572795954756781,
      "eval_arxiv_runtime": 10.3216,
      "eval_arxiv_samples_per_second": 48.442,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.34989747195975485,
      "eval_arxiv_token_set_f1_sem": 0.004190585429222164,
      "eval_arxiv_token_set_precision": 0.30072492311625165,
      "eval_arxiv_token_set_recall": 0.4362196950083837,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 118125
    },
    {
      "epoch": 22.68,
      "eval_python_code_alpaca_accuracy": 0.15675,
      "eval_python_code_alpaca_bleu_score": 4.395425837996771,
      "eval_python_code_alpaca_bleu_score_sem": 0.14135301310053985,
      "eval_python_code_alpaca_emb_cos_sim": 0.7417300343513489,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009967324667954029,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8914570808410645,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.434,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.678,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.878,
      "eval_python_code_alpaca_num_pred_words": 42.928,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.01954641817758,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32670841440180876,
      "eval_python_code_alpaca_runtime": 10.7363,
      "eval_python_code_alpaca_samples_per_second": 46.571,
      "eval_python_code_alpaca_steps_per_second": 0.093,
      "eval_python_code_alpaca_token_set_f1": 0.46292960382541193,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005693270406237749,
      "eval_python_code_alpaca_token_set_precision": 0.5117938139124939,
      "eval_python_code_alpaca_token_set_recall": 0.44617442796793894,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 118125
    },
    {
      "epoch": 22.68,
      "eval_wikibio_accuracy": 0.31978125,
      "eval_wikibio_bleu_score": 5.703654430766345,
      "eval_wikibio_bleu_score_sem": 0.2065429911401896,
      "eval_wikibio_emb_cos_sim": 0.7414698600769043,
      "eval_wikibio_emb_cos_sim_sem": 0.008842983726941036,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.762153387069702,
      "eval_wikibio_n_ngrams_match_1": 10.058,
      "eval_wikibio_n_ngrams_match_2": 3.268,
      "eval_wikibio_n_ngrams_match_3": 1.158,
      "eval_wikibio_num_pred_words": 36.9,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.04101021299951,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3478530142638878,
      "eval_wikibio_runtime": 10.0028,
      "eval_wikibio_samples_per_second": 49.986,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3132170005221746,
      "eval_wikibio_token_set_f1_sem": 0.005376814575488786,
      "eval_wikibio_token_set_precision": 0.32415168566383196,
      "eval_wikibio_token_set_recall": 0.3170845095153825,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 118125
    },
    {
      "epoch": 22.68,
      "eval_nq_accuracy": 0.52753125,
      "eval_nq_bleu_score": 11.697604780616196,
      "eval_nq_bleu_score_sem": 0.4794663338808472,
      "eval_nq_emb_cos_sim": 0.8264201879501343,
      "eval_nq_emb_cos_sim_sem": 0.006818167464958697,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.19657564163208,
      "eval_nq_n_ngrams_match_1": 23.102,
      "eval_nq_n_ngrams_match_2": 8.454,
      "eval_nq_n_ngrams_match_3": 3.878,
      "eval_nq_num_pred_words": 49.11,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.994161473281862,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45084280812714295,
      "eval_nq_runtime": 10.4674,
      "eval_nq_samples_per_second": 47.767,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46362520961188725,
      "eval_nq_token_set_f1_sem": 0.004846504152292297,
      "eval_nq_token_set_precision": 0.42061919822566746,
      "eval_nq_token_set_recall": 0.5258279988193315,
      "eval_nq_true_num_tokens": 64.0,
      "step": 118125
    },
    {
      "epoch": 22.68,
      "learning_rate": 0.001,
      "loss": 2.5705,
      "step": 118128
    },
    {
      "epoch": 22.68,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 118140
    },
    {
      "epoch": 22.69,
      "learning_rate": 0.001,
      "loss": 2.576,
      "step": 118152
    },
    {
      "epoch": 22.69,
      "learning_rate": 0.001,
      "loss": 2.5664,
      "step": 118164
    },
    {
      "epoch": 22.69,
      "learning_rate": 0.001,
      "loss": 2.5682,
      "step": 118176
    },
    {
      "epoch": 22.69,
      "learning_rate": 0.001,
      "loss": 2.5805,
      "step": 118188
    },
    {
      "epoch": 22.7,
      "learning_rate": 0.001,
      "loss": 2.5669,
      "step": 118200
    },
    {
      "epoch": 22.7,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 118212
    },
    {
      "epoch": 22.7,
      "learning_rate": 0.001,
      "loss": 2.5832,
      "step": 118224
    },
    {
      "epoch": 22.7,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 118236
    },
    {
      "epoch": 22.71,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 118248
    },
    {
      "epoch": 22.71,
      "learning_rate": 0.001,
      "loss": 2.5865,
      "step": 118260
    },
    {
      "epoch": 22.71,
      "learning_rate": 0.001,
      "loss": 2.5731,
      "step": 118272
    },
    {
      "epoch": 22.71,
      "learning_rate": 0.001,
      "loss": 2.5729,
      "step": 118284
    },
    {
      "epoch": 22.71,
      "learning_rate": 0.001,
      "loss": 2.5694,
      "step": 118296
    },
    {
      "epoch": 22.72,
      "learning_rate": 0.001,
      "loss": 2.5623,
      "step": 118308
    },
    {
      "epoch": 22.72,
      "learning_rate": 0.001,
      "loss": 2.5731,
      "step": 118320
    },
    {
      "epoch": 22.72,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 118332
    },
    {
      "epoch": 22.72,
      "learning_rate": 0.001,
      "loss": 2.582,
      "step": 118344
    },
    {
      "epoch": 22.73,
      "learning_rate": 0.001,
      "loss": 2.5817,
      "step": 118356
    },
    {
      "epoch": 22.73,
      "learning_rate": 0.001,
      "loss": 2.5828,
      "step": 118368
    },
    {
      "epoch": 22.73,
      "learning_rate": 0.001,
      "loss": 2.5793,
      "step": 118380
    },
    {
      "epoch": 22.73,
      "learning_rate": 0.001,
      "loss": 2.5846,
      "step": 118392
    },
    {
      "epoch": 22.74,
      "learning_rate": 0.001,
      "loss": 2.5848,
      "step": 118404
    },
    {
      "epoch": 22.74,
      "learning_rate": 0.001,
      "loss": 2.5921,
      "step": 118416
    },
    {
      "epoch": 22.74,
      "learning_rate": 0.001,
      "loss": 2.5953,
      "step": 118428
    },
    {
      "epoch": 22.74,
      "learning_rate": 0.001,
      "loss": 2.5871,
      "step": 118440
    },
    {
      "epoch": 22.74,
      "learning_rate": 0.001,
      "loss": 2.5764,
      "step": 118452
    },
    {
      "epoch": 22.75,
      "learning_rate": 0.001,
      "loss": 2.5792,
      "step": 118464
    },
    {
      "epoch": 22.75,
      "learning_rate": 0.001,
      "loss": 2.574,
      "step": 118476
    },
    {
      "epoch": 22.75,
      "learning_rate": 0.001,
      "loss": 2.5756,
      "step": 118488
    },
    {
      "epoch": 22.75,
      "learning_rate": 0.001,
      "loss": 2.5909,
      "step": 118500
    },
    {
      "epoch": 22.76,
      "learning_rate": 0.001,
      "loss": 2.5847,
      "step": 118512
    },
    {
      "epoch": 22.76,
      "learning_rate": 0.001,
      "loss": 2.5727,
      "step": 118524
    },
    {
      "epoch": 22.76,
      "learning_rate": 0.001,
      "loss": 2.5742,
      "step": 118536
    },
    {
      "epoch": 22.76,
      "learning_rate": 0.001,
      "loss": 2.5797,
      "step": 118548
    },
    {
      "epoch": 22.76,
      "learning_rate": 0.001,
      "loss": 2.5793,
      "step": 118560
    },
    {
      "epoch": 22.77,
      "learning_rate": 0.001,
      "loss": 2.5854,
      "step": 118572
    },
    {
      "epoch": 22.77,
      "learning_rate": 0.001,
      "loss": 2.5686,
      "step": 118584
    },
    {
      "epoch": 22.77,
      "learning_rate": 0.001,
      "loss": 2.5709,
      "step": 118596
    },
    {
      "epoch": 22.77,
      "learning_rate": 0.001,
      "loss": 2.5798,
      "step": 118608
    },
    {
      "epoch": 22.78,
      "learning_rate": 0.001,
      "loss": 2.5761,
      "step": 118620
    },
    {
      "epoch": 22.78,
      "learning_rate": 0.001,
      "loss": 2.5614,
      "step": 118632
    },
    {
      "epoch": 22.78,
      "learning_rate": 0.001,
      "loss": 2.578,
      "step": 118644
    },
    {
      "epoch": 22.78,
      "learning_rate": 0.001,
      "loss": 2.5774,
      "step": 118656
    },
    {
      "epoch": 22.79,
      "learning_rate": 0.001,
      "loss": 2.5811,
      "step": 118668
    },
    {
      "epoch": 22.79,
      "learning_rate": 0.001,
      "loss": 2.5817,
      "step": 118680
    },
    {
      "epoch": 22.79,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 118692
    },
    {
      "epoch": 22.79,
      "learning_rate": 0.001,
      "loss": 2.5874,
      "step": 118704
    },
    {
      "epoch": 22.79,
      "learning_rate": 0.001,
      "loss": 2.5805,
      "step": 118716
    },
    {
      "epoch": 22.8,
      "learning_rate": 0.001,
      "loss": 2.5766,
      "step": 118728
    },
    {
      "epoch": 22.8,
      "learning_rate": 0.001,
      "loss": 2.5735,
      "step": 118740
    },
    {
      "epoch": 22.8,
      "eval_ag_news_accuracy": 0.32065625,
      "eval_ag_news_bleu_score": 4.728675119766207,
      "eval_ag_news_bleu_score_sem": 0.16108176959669468,
      "eval_ag_news_emb_cos_sim": 0.8163679838180542,
      "eval_ag_news_emb_cos_sim_sem": 0.006003713861800085,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.574566125869751,
      "eval_ag_news_n_ngrams_match_1": 13.892,
      "eval_ag_news_n_ngrams_match_2": 2.978,
      "eval_ag_news_n_ngrams_match_3": 0.864,
      "eval_ag_news_num_pred_words": 46.704,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.679137202307444,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34591344792147,
      "eval_ag_news_runtime": 10.4948,
      "eval_ag_news_samples_per_second": 47.643,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3456728662053108,
      "eval_ag_news_token_set_f1_sem": 0.004355684349898715,
      "eval_ag_news_token_set_precision": 0.3312698500081311,
      "eval_ag_news_token_set_recall": 0.37825539516319207,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 118750
    },
    {
      "epoch": 22.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.11421875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.131417167315212,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12109858112678264,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6736284494400024,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008596035463348087,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2645761966705322,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.894,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.016,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.16901812338232,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21344392776445575,
      "eval_anthropic_toxic_prompts_runtime": 10.1688,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.17,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3597367554367178,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00646303489294724,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43975985437102044,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33009692371145005,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 118750
    },
    {
      "epoch": 22.8,
      "eval_arxiv_accuracy": 0.345625,
      "eval_arxiv_bleu_score": 4.224076713499098,
      "eval_arxiv_bleu_score_sem": 0.11849153849914,
      "eval_arxiv_emb_cos_sim": 0.7639448642730713,
      "eval_arxiv_emb_cos_sim_sem": 0.0067609057029005834,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4284024238586426,
      "eval_arxiv_n_ngrams_match_1": 14.902,
      "eval_arxiv_n_ngrams_match_2": 2.866,
      "eval_arxiv_n_ngrams_match_3": 0.624,
      "eval_arxiv_num_pred_words": 40.04,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.82735434336895,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35825075103107074,
      "eval_arxiv_runtime": 10.2552,
      "eval_arxiv_samples_per_second": 48.756,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.34873992789830865,
      "eval_arxiv_token_set_f1_sem": 0.004124133935509216,
      "eval_arxiv_token_set_precision": 0.30042025713647386,
      "eval_arxiv_token_set_recall": 0.43556948019261876,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 118750
    },
    {
      "epoch": 22.8,
      "eval_python_code_alpaca_accuracy": 0.15934375,
      "eval_python_code_alpaca_bleu_score": 4.528718327545159,
      "eval_python_code_alpaca_bleu_score_sem": 0.14783233518395464,
      "eval_python_code_alpaca_emb_cos_sim": 0.7302837371826172,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010019767859847232,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.881324529647827,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.378,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.678,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.828,
      "eval_python_code_alpaca_num_pred_words": 40.482,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.83788434598612,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3297159932945782,
      "eval_python_code_alpaca_runtime": 9.7742,
      "eval_python_code_alpaca_samples_per_second": 51.155,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4643577157387954,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005818559465057156,
      "eval_python_code_alpaca_token_set_precision": 0.5103981416177292,
      "eval_python_code_alpaca_token_set_recall": 0.4486524542190428,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 118750
    },
    {
      "epoch": 22.8,
      "eval_wikibio_accuracy": 0.32240625,
      "eval_wikibio_bleu_score": 6.114362821110541,
      "eval_wikibio_bleu_score_sem": 0.21814553975399284,
      "eval_wikibio_emb_cos_sim": 0.744574785232544,
      "eval_wikibio_emb_cos_sim_sem": 0.010106713515602666,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.728107213973999,
      "eval_wikibio_n_ngrams_match_1": 10.178,
      "eval_wikibio_n_ngrams_match_2": 3.412,
      "eval_wikibio_n_ngrams_match_3": 1.284,
      "eval_wikibio_num_pred_words": 36.718,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.60029314403496,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3616905131558259,
      "eval_wikibio_runtime": 10.0604,
      "eval_wikibio_samples_per_second": 49.7,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.32173470242274055,
      "eval_wikibio_token_set_f1_sem": 0.005348885462502014,
      "eval_wikibio_token_set_precision": 0.3317864882124454,
      "eval_wikibio_token_set_recall": 0.3267995582635393,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 118750
    },
    {
      "epoch": 22.8,
      "eval_nq_accuracy": 0.526625,
      "eval_nq_bleu_score": 11.24020788009808,
      "eval_nq_bleu_score_sem": 0.45684858157587976,
      "eval_nq_emb_cos_sim": 0.8305935263633728,
      "eval_nq_emb_cos_sim_sem": 0.006763945544349432,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2016873359680176,
      "eval_nq_n_ngrams_match_1": 22.814,
      "eval_nq_n_ngrams_match_2": 8.208,
      "eval_nq_n_ngrams_match_3": 3.658,
      "eval_nq_num_pred_words": 48.924,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.040254584122676,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4419139402057335,
      "eval_nq_runtime": 11.0143,
      "eval_nq_samples_per_second": 45.396,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.45243742822093835,
      "eval_nq_token_set_f1_sem": 0.004836848923290419,
      "eval_nq_token_set_precision": 0.41248264634775594,
      "eval_nq_token_set_recall": 0.5081416178754598,
      "eval_nq_true_num_tokens": 64.0,
      "step": 118750
    },
    {
      "epoch": 22.8,
      "learning_rate": 0.001,
      "loss": 2.5823,
      "step": 118752
    },
    {
      "epoch": 22.8,
      "learning_rate": 0.001,
      "loss": 2.5781,
      "step": 118764
    },
    {
      "epoch": 22.81,
      "learning_rate": 0.001,
      "loss": 2.5796,
      "step": 118776
    },
    {
      "epoch": 22.81,
      "learning_rate": 0.001,
      "loss": 2.5777,
      "step": 118788
    },
    {
      "epoch": 22.81,
      "learning_rate": 0.001,
      "loss": 2.5743,
      "step": 118800
    },
    {
      "epoch": 22.81,
      "learning_rate": 0.001,
      "loss": 2.5755,
      "step": 118812
    },
    {
      "epoch": 22.82,
      "learning_rate": 0.001,
      "loss": 2.5896,
      "step": 118824
    },
    {
      "epoch": 22.82,
      "learning_rate": 0.001,
      "loss": 2.5788,
      "step": 118836
    },
    {
      "epoch": 22.82,
      "learning_rate": 0.001,
      "loss": 2.5772,
      "step": 118848
    },
    {
      "epoch": 22.82,
      "learning_rate": 0.001,
      "loss": 2.5739,
      "step": 118860
    },
    {
      "epoch": 22.82,
      "learning_rate": 0.001,
      "loss": 2.595,
      "step": 118872
    },
    {
      "epoch": 22.83,
      "learning_rate": 0.001,
      "loss": 2.5723,
      "step": 118884
    },
    {
      "epoch": 22.83,
      "learning_rate": 0.001,
      "loss": 2.5808,
      "step": 118896
    },
    {
      "epoch": 22.83,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 118908
    },
    {
      "epoch": 22.83,
      "learning_rate": 0.001,
      "loss": 2.587,
      "step": 118920
    },
    {
      "epoch": 22.84,
      "learning_rate": 0.001,
      "loss": 2.5827,
      "step": 118932
    },
    {
      "epoch": 22.84,
      "learning_rate": 0.001,
      "loss": 2.5806,
      "step": 118944
    },
    {
      "epoch": 22.84,
      "learning_rate": 0.001,
      "loss": 2.5825,
      "step": 118956
    },
    {
      "epoch": 22.84,
      "learning_rate": 0.001,
      "loss": 2.5759,
      "step": 118968
    },
    {
      "epoch": 22.85,
      "learning_rate": 0.001,
      "loss": 2.5761,
      "step": 118980
    },
    {
      "epoch": 22.85,
      "learning_rate": 0.001,
      "loss": 2.5824,
      "step": 118992
    },
    {
      "epoch": 22.85,
      "learning_rate": 0.001,
      "loss": 2.5812,
      "step": 119004
    },
    {
      "epoch": 22.85,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 119016
    },
    {
      "epoch": 22.85,
      "learning_rate": 0.001,
      "loss": 2.5884,
      "step": 119028
    },
    {
      "epoch": 22.86,
      "learning_rate": 0.001,
      "loss": 2.5742,
      "step": 119040
    },
    {
      "epoch": 22.86,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 119052
    },
    {
      "epoch": 22.86,
      "learning_rate": 0.001,
      "loss": 2.5901,
      "step": 119064
    },
    {
      "epoch": 22.86,
      "learning_rate": 0.001,
      "loss": 2.5791,
      "step": 119076
    },
    {
      "epoch": 22.87,
      "learning_rate": 0.001,
      "loss": 2.581,
      "step": 119088
    },
    {
      "epoch": 22.87,
      "learning_rate": 0.001,
      "loss": 2.5897,
      "step": 119100
    },
    {
      "epoch": 22.87,
      "learning_rate": 0.001,
      "loss": 2.586,
      "step": 119112
    },
    {
      "epoch": 22.87,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 119124
    },
    {
      "epoch": 22.88,
      "learning_rate": 0.001,
      "loss": 2.582,
      "step": 119136
    },
    {
      "epoch": 22.88,
      "learning_rate": 0.001,
      "loss": 2.5738,
      "step": 119148
    },
    {
      "epoch": 22.88,
      "learning_rate": 0.001,
      "loss": 2.5726,
      "step": 119160
    },
    {
      "epoch": 22.88,
      "learning_rate": 0.001,
      "loss": 2.5768,
      "step": 119172
    },
    {
      "epoch": 22.88,
      "learning_rate": 0.001,
      "loss": 2.5766,
      "step": 119184
    },
    {
      "epoch": 22.89,
      "learning_rate": 0.001,
      "loss": 2.5749,
      "step": 119196
    },
    {
      "epoch": 22.89,
      "learning_rate": 0.001,
      "loss": 2.5836,
      "step": 119208
    },
    {
      "epoch": 22.89,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 119220
    },
    {
      "epoch": 22.89,
      "learning_rate": 0.001,
      "loss": 2.5735,
      "step": 119232
    },
    {
      "epoch": 22.9,
      "learning_rate": 0.001,
      "loss": 2.5829,
      "step": 119244
    },
    {
      "epoch": 22.9,
      "learning_rate": 0.001,
      "loss": 2.5734,
      "step": 119256
    },
    {
      "epoch": 22.9,
      "learning_rate": 0.001,
      "loss": 2.5789,
      "step": 119268
    },
    {
      "epoch": 22.9,
      "learning_rate": 0.001,
      "loss": 2.5763,
      "step": 119280
    },
    {
      "epoch": 22.91,
      "learning_rate": 0.001,
      "loss": 2.5862,
      "step": 119292
    },
    {
      "epoch": 22.91,
      "learning_rate": 0.001,
      "loss": 2.5742,
      "step": 119304
    },
    {
      "epoch": 22.91,
      "learning_rate": 0.001,
      "loss": 2.5789,
      "step": 119316
    },
    {
      "epoch": 22.91,
      "learning_rate": 0.001,
      "loss": 2.5865,
      "step": 119328
    },
    {
      "epoch": 22.91,
      "learning_rate": 0.001,
      "loss": 2.5805,
      "step": 119340
    },
    {
      "epoch": 22.92,
      "learning_rate": 0.001,
      "loss": 2.5752,
      "step": 119352
    },
    {
      "epoch": 22.92,
      "learning_rate": 0.001,
      "loss": 2.5752,
      "step": 119364
    },
    {
      "epoch": 22.92,
      "eval_ag_news_accuracy": 0.321375,
      "eval_ag_news_bleu_score": 4.771542609334364,
      "eval_ag_news_bleu_score_sem": 0.1470501125581788,
      "eval_ag_news_emb_cos_sim": 0.8067134022712708,
      "eval_ag_news_emb_cos_sim_sem": 0.0069122174988662495,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5727336406707764,
      "eval_ag_news_n_ngrams_match_1": 14.04,
      "eval_ag_news_n_ngrams_match_2": 3.122,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.576,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.61381558020405,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.350032673989199,
      "eval_ag_news_runtime": 10.4763,
      "eval_ag_news_samples_per_second": 47.727,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.35110158428638666,
      "eval_ag_news_token_set_f1_sem": 0.004342113085911917,
      "eval_ag_news_token_set_precision": 0.3353990444657584,
      "eval_ag_news_token_set_recall": 0.3850773854745541,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 119375
    },
    {
      "epoch": 22.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.113875,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9140668966136736,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10368118999703983,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6709904670715332,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008316619638422576,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.250877618789673,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.148,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.86,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.64,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.954,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.81298393905257,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21121607654629718,
      "eval_anthropic_toxic_prompts_runtime": 10.0005,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.998,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3553615145707035,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006553119473347054,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4315260440881228,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33003881224782194,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 119375
    },
    {
      "epoch": 22.92,
      "eval_arxiv_accuracy": 0.346,
      "eval_arxiv_bleu_score": 4.169312491926533,
      "eval_arxiv_bleu_score_sem": 0.12186592289341049,
      "eval_arxiv_emb_cos_sim": 0.7528259754180908,
      "eval_arxiv_emb_cos_sim_sem": 0.008360341725396564,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4216580390930176,
      "eval_arxiv_n_ngrams_match_1": 14.818,
      "eval_arxiv_n_ngrams_match_2": 2.87,
      "eval_arxiv_n_ngrams_match_3": 0.6,
      "eval_arxiv_num_pred_words": 39.986,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.62014234853063,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3585396308505666,
      "eval_arxiv_runtime": 10.2628,
      "eval_arxiv_samples_per_second": 48.719,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.34880182526485354,
      "eval_arxiv_token_set_f1_sem": 0.004492715906354398,
      "eval_arxiv_token_set_precision": 0.30025934747677413,
      "eval_arxiv_token_set_recall": 0.4359924377589185,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 119375
    },
    {
      "epoch": 22.92,
      "eval_python_code_alpaca_accuracy": 0.1598125,
      "eval_python_code_alpaca_bleu_score": 4.579480170502158,
      "eval_python_code_alpaca_bleu_score_sem": 0.1406896219823957,
      "eval_python_code_alpaca_emb_cos_sim": 0.7514224648475647,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008423041089457092,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.883908748626709,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.574,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.806,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.932,
      "eval_python_code_alpaca_num_pred_words": 42.76,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.88404095896687,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3308781466127945,
      "eval_python_code_alpaca_runtime": 10.1648,
      "eval_python_code_alpaca_samples_per_second": 49.189,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4759927518667449,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005466086633814408,
      "eval_python_code_alpaca_token_set_precision": 0.5201817886890249,
      "eval_python_code_alpaca_token_set_recall": 0.4620434358846748,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 119375
    },
    {
      "epoch": 22.92,
      "eval_wikibio_accuracy": 0.32278125,
      "eval_wikibio_bleu_score": 5.836553740050067,
      "eval_wikibio_bleu_score_sem": 0.20921846966863697,
      "eval_wikibio_emb_cos_sim": 0.7484660148620605,
      "eval_wikibio_emb_cos_sim_sem": 0.009859827155193469,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7701869010925293,
      "eval_wikibio_n_ngrams_match_1": 10.068,
      "eval_wikibio_n_ngrams_match_2": 3.32,
      "eval_wikibio_n_ngrams_match_3": 1.218,
      "eval_wikibio_num_pred_words": 36.876,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.38817337508725,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3521909885771981,
      "eval_wikibio_runtime": 10.2816,
      "eval_wikibio_samples_per_second": 48.631,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3162605130662639,
      "eval_wikibio_token_set_f1_sem": 0.005442885509959944,
      "eval_wikibio_token_set_precision": 0.32671022151088314,
      "eval_wikibio_token_set_recall": 0.3207715336894786,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 119375
    },
    {
      "epoch": 22.92,
      "eval_nq_accuracy": 0.5265,
      "eval_nq_bleu_score": 11.763675240452047,
      "eval_nq_bleu_score_sem": 0.4788024395816035,
      "eval_nq_emb_cos_sim": 0.832360029220581,
      "eval_nq_emb_cos_sim_sem": 0.007437167033781562,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1988203525543213,
      "eval_nq_n_ngrams_match_1": 23.04,
      "eval_nq_n_ngrams_match_2": 8.436,
      "eval_nq_n_ngrams_match_3": 3.916,
      "eval_nq_num_pred_words": 49.28,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.014373442304269,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44567249350959226,
      "eval_nq_runtime": 10.8519,
      "eval_nq_samples_per_second": 46.075,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.4603255991639862,
      "eval_nq_token_set_f1_sem": 0.0047959763576717876,
      "eval_nq_token_set_precision": 0.41883655406372927,
      "eval_nq_token_set_recall": 0.5179694841072207,
      "eval_nq_true_num_tokens": 64.0,
      "step": 119375
    },
    {
      "epoch": 22.92,
      "learning_rate": 0.001,
      "loss": 2.5796,
      "step": 119376
    },
    {
      "epoch": 22.92,
      "learning_rate": 0.001,
      "loss": 2.5771,
      "step": 119388
    },
    {
      "epoch": 22.93,
      "learning_rate": 0.001,
      "loss": 2.5732,
      "step": 119400
    },
    {
      "epoch": 22.93,
      "learning_rate": 0.001,
      "loss": 2.5744,
      "step": 119412
    },
    {
      "epoch": 22.93,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 119424
    },
    {
      "epoch": 22.93,
      "learning_rate": 0.001,
      "loss": 2.5666,
      "step": 119436
    },
    {
      "epoch": 22.94,
      "learning_rate": 0.001,
      "loss": 2.5765,
      "step": 119448
    },
    {
      "epoch": 22.94,
      "learning_rate": 0.001,
      "loss": 2.5787,
      "step": 119460
    },
    {
      "epoch": 22.94,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 119472
    },
    {
      "epoch": 22.94,
      "learning_rate": 0.001,
      "loss": 2.5824,
      "step": 119484
    },
    {
      "epoch": 22.94,
      "learning_rate": 0.001,
      "loss": 2.5933,
      "step": 119496
    },
    {
      "epoch": 22.95,
      "learning_rate": 0.001,
      "loss": 2.5745,
      "step": 119508
    },
    {
      "epoch": 22.95,
      "learning_rate": 0.001,
      "loss": 2.5734,
      "step": 119520
    },
    {
      "epoch": 22.95,
      "learning_rate": 0.001,
      "loss": 2.5817,
      "step": 119532
    },
    {
      "epoch": 22.95,
      "learning_rate": 0.001,
      "loss": 2.5809,
      "step": 119544
    },
    {
      "epoch": 22.96,
      "learning_rate": 0.001,
      "loss": 2.5706,
      "step": 119556
    },
    {
      "epoch": 22.96,
      "learning_rate": 0.001,
      "loss": 2.5852,
      "step": 119568
    },
    {
      "epoch": 22.96,
      "learning_rate": 0.001,
      "loss": 2.5814,
      "step": 119580
    },
    {
      "epoch": 22.96,
      "learning_rate": 0.001,
      "loss": 2.5785,
      "step": 119592
    },
    {
      "epoch": 22.97,
      "learning_rate": 0.001,
      "loss": 2.5801,
      "step": 119604
    },
    {
      "epoch": 22.97,
      "learning_rate": 0.001,
      "loss": 2.5882,
      "step": 119616
    },
    {
      "epoch": 22.97,
      "learning_rate": 0.001,
      "loss": 2.5963,
      "step": 119628
    },
    {
      "epoch": 22.97,
      "learning_rate": 0.001,
      "loss": 2.5927,
      "step": 119640
    },
    {
      "epoch": 22.97,
      "learning_rate": 0.001,
      "loss": 2.582,
      "step": 119652
    },
    {
      "epoch": 22.98,
      "learning_rate": 0.001,
      "loss": 2.5758,
      "step": 119664
    },
    {
      "epoch": 22.98,
      "learning_rate": 0.001,
      "loss": 2.5813,
      "step": 119676
    },
    {
      "epoch": 22.98,
      "learning_rate": 0.001,
      "loss": 2.5811,
      "step": 119688
    },
    {
      "epoch": 22.98,
      "learning_rate": 0.001,
      "loss": 2.5777,
      "step": 119700
    },
    {
      "epoch": 22.99,
      "learning_rate": 0.001,
      "loss": 2.5873,
      "step": 119712
    },
    {
      "epoch": 22.99,
      "learning_rate": 0.001,
      "loss": 2.5855,
      "step": 119724
    },
    {
      "epoch": 22.99,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 119736
    },
    {
      "epoch": 22.99,
      "learning_rate": 0.001,
      "loss": 2.5833,
      "step": 119748
    },
    {
      "epoch": 23.0,
      "learning_rate": 0.001,
      "loss": 2.5716,
      "step": 119760
    },
    {
      "epoch": 23.0,
      "learning_rate": 0.001,
      "loss": 2.5811,
      "step": 119772
    },
    {
      "epoch": 23.0,
      "learning_rate": 0.001,
      "loss": 2.5834,
      "step": 119784
    },
    {
      "epoch": 23.0,
      "learning_rate": 0.001,
      "loss": 2.5674,
      "step": 119796
    },
    {
      "epoch": 23.0,
      "learning_rate": 0.001,
      "loss": 2.5731,
      "step": 119808
    },
    {
      "epoch": 23.01,
      "learning_rate": 0.001,
      "loss": 2.5821,
      "step": 119820
    },
    {
      "epoch": 23.01,
      "learning_rate": 0.001,
      "loss": 2.5688,
      "step": 119832
    },
    {
      "epoch": 23.01,
      "learning_rate": 0.001,
      "loss": 2.5624,
      "step": 119844
    },
    {
      "epoch": 23.01,
      "learning_rate": 0.001,
      "loss": 2.561,
      "step": 119856
    },
    {
      "epoch": 23.02,
      "learning_rate": 0.001,
      "loss": 2.5653,
      "step": 119868
    },
    {
      "epoch": 23.02,
      "learning_rate": 0.001,
      "loss": 2.5612,
      "step": 119880
    },
    {
      "epoch": 23.02,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 119892
    },
    {
      "epoch": 23.02,
      "learning_rate": 0.001,
      "loss": 2.5653,
      "step": 119904
    },
    {
      "epoch": 23.03,
      "learning_rate": 0.001,
      "loss": 2.5666,
      "step": 119916
    },
    {
      "epoch": 23.03,
      "learning_rate": 0.001,
      "loss": 2.5713,
      "step": 119928
    },
    {
      "epoch": 23.03,
      "learning_rate": 0.001,
      "loss": 2.5655,
      "step": 119940
    },
    {
      "epoch": 23.03,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 119952
    },
    {
      "epoch": 23.03,
      "learning_rate": 0.001,
      "loss": 2.5557,
      "step": 119964
    },
    {
      "epoch": 23.04,
      "learning_rate": 0.001,
      "loss": 2.5703,
      "step": 119976
    },
    {
      "epoch": 23.04,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 119988
    },
    {
      "epoch": 23.04,
      "learning_rate": 0.001,
      "loss": 2.5767,
      "step": 120000
    },
    {
      "epoch": 23.04,
      "eval_ag_news_accuracy": 0.32053125,
      "eval_ag_news_bleu_score": 4.7507281973693924,
      "eval_ag_news_bleu_score_sem": 0.15242972130870316,
      "eval_ag_news_emb_cos_sim": 0.80669105052948,
      "eval_ag_news_emb_cos_sim_sem": 0.007559986243064847,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.565901517868042,
      "eval_ag_news_n_ngrams_match_1": 13.914,
      "eval_ag_news_n_ngrams_match_2": 3.038,
      "eval_ag_news_n_ngrams_match_3": 0.852,
      "eval_ag_news_num_pred_words": 46.496,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.371326918082865,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3452257342075885,
      "eval_ag_news_runtime": 10.4969,
      "eval_ag_news_samples_per_second": 47.633,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.348363302530887,
      "eval_ag_news_token_set_f1_sem": 0.004266929603596635,
      "eval_ag_news_token_set_precision": 0.3315074464636364,
      "eval_ag_news_token_set_recall": 0.3834548713420894,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 120000
    },
    {
      "epoch": 23.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.11303125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.113714452848769,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1162551914196674,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6651376485824585,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009246945464849764,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2804033756256104,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.256,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.716,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.78,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.586494881200764,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21111165987552732,
      "eval_anthropic_toxic_prompts_runtime": 10.5815,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.252,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35751448452746515,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00674997818020014,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43784531949414346,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3280085808062943,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 120000
    },
    {
      "epoch": 23.04,
      "eval_arxiv_accuracy": 0.344625,
      "eval_arxiv_bleu_score": 4.261845359047497,
      "eval_arxiv_bleu_score_sem": 0.12152648390963264,
      "eval_arxiv_emb_cos_sim": 0.7591594457626343,
      "eval_arxiv_emb_cos_sim_sem": 0.007763274322867557,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.435774564743042,
      "eval_arxiv_n_ngrams_match_1": 14.84,
      "eval_arxiv_n_ngrams_match_2": 2.964,
      "eval_arxiv_n_ngrams_match_3": 0.65,
      "eval_arxiv_num_pred_words": 40.468,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.05545771468835,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.352156605920514,
      "eval_arxiv_runtime": 10.3125,
      "eval_arxiv_samples_per_second": 48.485,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.34750629453071064,
      "eval_arxiv_token_set_f1_sem": 0.00433831106840747,
      "eval_arxiv_token_set_precision": 0.299259449773176,
      "eval_arxiv_token_set_recall": 0.4366267198668744,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 120000
    },
    {
      "epoch": 23.04,
      "eval_python_code_alpaca_accuracy": 0.159,
      "eval_python_code_alpaca_bleu_score": 4.6542100413563645,
      "eval_python_code_alpaca_bleu_score_sem": 0.14633888693296002,
      "eval_python_code_alpaca_emb_cos_sim": 0.7513779401779175,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009774954597549165,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.913936138153076,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.72,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.964,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.012,
      "eval_python_code_alpaca_num_pred_words": 43.806,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.429195851610917,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.327765670838694,
      "eval_python_code_alpaca_runtime": 9.9441,
      "eval_python_code_alpaca_samples_per_second": 50.281,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.47418106912384855,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005703871103965279,
      "eval_python_code_alpaca_token_set_precision": 0.5300462592300176,
      "eval_python_code_alpaca_token_set_recall": 0.4539878431310463,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 120000
    },
    {
      "epoch": 23.04,
      "eval_wikibio_accuracy": 0.31715625,
      "eval_wikibio_bleu_score": 5.991048425709009,
      "eval_wikibio_bleu_score_sem": 0.2059768237378832,
      "eval_wikibio_emb_cos_sim": 0.7435378432273865,
      "eval_wikibio_emb_cos_sim_sem": 0.008606466149810027,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.795231580734253,
      "eval_wikibio_n_ngrams_match_1": 10.554,
      "eval_wikibio_n_ngrams_match_2": 3.528,
      "eval_wikibio_n_ngrams_match_3": 1.27,
      "eval_wikibio_num_pred_words": 37.76,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.4885379009417,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3675820865797001,
      "eval_wikibio_runtime": 10.3052,
      "eval_wikibio_samples_per_second": 48.519,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3280854906650871,
      "eval_wikibio_token_set_f1_sem": 0.005002807537994431,
      "eval_wikibio_token_set_precision": 0.3409086432007072,
      "eval_wikibio_token_set_recall": 0.3285461415335112,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 120000
    },
    {
      "epoch": 23.04,
      "eval_nq_accuracy": 0.52459375,
      "eval_nq_bleu_score": 11.678230486295876,
      "eval_nq_bleu_score_sem": 0.4793581572895105,
      "eval_nq_emb_cos_sim": 0.8322374820709229,
      "eval_nq_emb_cos_sim_sem": 0.006763545149288578,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.202526569366455,
      "eval_nq_n_ngrams_match_1": 23.18,
      "eval_nq_n_ngrams_match_2": 8.478,
      "eval_nq_n_ngrams_match_3": 3.898,
      "eval_nq_num_pred_words": 49.23,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.04784465217387,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44818501565287217,
      "eval_nq_runtime": 10.6443,
      "eval_nq_samples_per_second": 46.974,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.46243034750089573,
      "eval_nq_token_set_f1_sem": 0.004945985124560686,
      "eval_nq_token_set_precision": 0.42029379092839253,
      "eval_nq_token_set_recall": 0.5201694917058951,
      "eval_nq_true_num_tokens": 64.0,
      "step": 120000
    },
    {
      "epoch": 23.04,
      "learning_rate": 0.001,
      "loss": 2.5671,
      "step": 120012
    },
    {
      "epoch": 23.05,
      "learning_rate": 0.001,
      "loss": 2.5756,
      "step": 120024
    },
    {
      "epoch": 23.05,
      "learning_rate": 0.001,
      "loss": 2.5749,
      "step": 120036
    },
    {
      "epoch": 23.05,
      "learning_rate": 0.001,
      "loss": 2.5689,
      "step": 120048
    },
    {
      "epoch": 23.05,
      "learning_rate": 0.001,
      "loss": 2.5666,
      "step": 120060
    },
    {
      "epoch": 23.06,
      "learning_rate": 0.001,
      "loss": 2.5685,
      "step": 120072
    },
    {
      "epoch": 23.06,
      "learning_rate": 0.001,
      "loss": 2.5659,
      "step": 120084
    },
    {
      "epoch": 23.06,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 120096
    },
    {
      "epoch": 23.06,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 120108
    },
    {
      "epoch": 23.06,
      "learning_rate": 0.001,
      "loss": 2.564,
      "step": 120120
    },
    {
      "epoch": 23.07,
      "learning_rate": 0.001,
      "loss": 2.5722,
      "step": 120132
    },
    {
      "epoch": 23.07,
      "learning_rate": 0.001,
      "loss": 2.5543,
      "step": 120144
    },
    {
      "epoch": 23.07,
      "learning_rate": 0.001,
      "loss": 2.5677,
      "step": 120156
    },
    {
      "epoch": 23.07,
      "learning_rate": 0.001,
      "loss": 2.5642,
      "step": 120168
    },
    {
      "epoch": 23.08,
      "learning_rate": 0.001,
      "loss": 2.5627,
      "step": 120180
    },
    {
      "epoch": 23.08,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 120192
    },
    {
      "epoch": 23.08,
      "learning_rate": 0.001,
      "loss": 2.5596,
      "step": 120204
    },
    {
      "epoch": 23.08,
      "learning_rate": 0.001,
      "loss": 2.5606,
      "step": 120216
    },
    {
      "epoch": 23.09,
      "learning_rate": 0.001,
      "loss": 2.5619,
      "step": 120228
    },
    {
      "epoch": 23.09,
      "learning_rate": 0.001,
      "loss": 2.5598,
      "step": 120240
    },
    {
      "epoch": 23.09,
      "learning_rate": 0.001,
      "loss": 2.5602,
      "step": 120252
    },
    {
      "epoch": 23.09,
      "learning_rate": 0.001,
      "loss": 2.5619,
      "step": 120264
    },
    {
      "epoch": 23.09,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 120276
    },
    {
      "epoch": 23.1,
      "learning_rate": 0.001,
      "loss": 2.5646,
      "step": 120288
    },
    {
      "epoch": 23.1,
      "learning_rate": 0.001,
      "loss": 2.5607,
      "step": 120300
    },
    {
      "epoch": 23.1,
      "learning_rate": 0.001,
      "loss": 2.5649,
      "step": 120312
    },
    {
      "epoch": 23.1,
      "learning_rate": 0.001,
      "loss": 2.5681,
      "step": 120324
    },
    {
      "epoch": 23.11,
      "learning_rate": 0.001,
      "loss": 2.5767,
      "step": 120336
    },
    {
      "epoch": 23.11,
      "learning_rate": 0.001,
      "loss": 2.5597,
      "step": 120348
    },
    {
      "epoch": 23.11,
      "learning_rate": 0.001,
      "loss": 2.5638,
      "step": 120360
    },
    {
      "epoch": 23.11,
      "learning_rate": 0.001,
      "loss": 2.562,
      "step": 120372
    },
    {
      "epoch": 23.12,
      "learning_rate": 0.001,
      "loss": 2.5537,
      "step": 120384
    },
    {
      "epoch": 23.12,
      "learning_rate": 0.001,
      "loss": 2.5656,
      "step": 120396
    },
    {
      "epoch": 23.12,
      "learning_rate": 0.001,
      "loss": 2.5665,
      "step": 120408
    },
    {
      "epoch": 23.12,
      "learning_rate": 0.001,
      "loss": 2.5719,
      "step": 120420
    },
    {
      "epoch": 23.12,
      "learning_rate": 0.001,
      "loss": 2.5597,
      "step": 120432
    },
    {
      "epoch": 23.13,
      "learning_rate": 0.001,
      "loss": 2.5744,
      "step": 120444
    },
    {
      "epoch": 23.13,
      "learning_rate": 0.001,
      "loss": 2.5757,
      "step": 120456
    },
    {
      "epoch": 23.13,
      "learning_rate": 0.001,
      "loss": 2.5696,
      "step": 120468
    },
    {
      "epoch": 23.13,
      "learning_rate": 0.001,
      "loss": 2.5744,
      "step": 120480
    },
    {
      "epoch": 23.14,
      "learning_rate": 0.001,
      "loss": 2.5655,
      "step": 120492
    },
    {
      "epoch": 23.14,
      "learning_rate": 0.001,
      "loss": 2.5745,
      "step": 120504
    },
    {
      "epoch": 23.14,
      "learning_rate": 0.001,
      "loss": 2.5791,
      "step": 120516
    },
    {
      "epoch": 23.14,
      "learning_rate": 0.001,
      "loss": 2.5749,
      "step": 120528
    },
    {
      "epoch": 23.15,
      "learning_rate": 0.001,
      "loss": 2.5625,
      "step": 120540
    },
    {
      "epoch": 23.15,
      "learning_rate": 0.001,
      "loss": 2.5671,
      "step": 120552
    },
    {
      "epoch": 23.15,
      "learning_rate": 0.001,
      "loss": 2.5619,
      "step": 120564
    },
    {
      "epoch": 23.15,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 120576
    },
    {
      "epoch": 23.15,
      "learning_rate": 0.001,
      "loss": 2.5576,
      "step": 120588
    },
    {
      "epoch": 23.16,
      "learning_rate": 0.001,
      "loss": 2.5642,
      "step": 120600
    },
    {
      "epoch": 23.16,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 120612
    },
    {
      "epoch": 23.16,
      "learning_rate": 0.001,
      "loss": 2.5587,
      "step": 120624
    },
    {
      "epoch": 23.16,
      "eval_ag_news_accuracy": 0.32090625,
      "eval_ag_news_bleu_score": 4.875310752354433,
      "eval_ag_news_bleu_score_sem": 0.15870330089120316,
      "eval_ag_news_emb_cos_sim": 0.8108322620391846,
      "eval_ag_news_emb_cos_sim_sem": 0.006963277747292689,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.574097156524658,
      "eval_ag_news_n_ngrams_match_1": 13.874,
      "eval_ag_news_n_ngrams_match_2": 3.092,
      "eval_ag_news_n_ngrams_match_3": 0.908,
      "eval_ag_news_num_pred_words": 46.214,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.662408703583345,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3481154534190848,
      "eval_ag_news_runtime": 10.3417,
      "eval_ag_news_samples_per_second": 48.348,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3509548927974077,
      "eval_ag_news_token_set_f1_sem": 0.004224702097278793,
      "eval_ag_news_token_set_precision": 0.3324007832530405,
      "eval_ag_news_token_set_recall": 0.38889750745929447,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 120625
    },
    {
      "epoch": 23.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.1135625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1167111312432585,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12071792740159332,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6717588901519775,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00930365141534314,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2766811847686768,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.134,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.878,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.46,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.487718818757042,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2133445207598026,
      "eval_anthropic_toxic_prompts_runtime": 10.3808,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.166,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.354542771347837,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006912398442934433,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4321732971310753,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3273676779963287,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 120625
    },
    {
      "epoch": 23.16,
      "eval_arxiv_accuracy": 0.3468125,
      "eval_arxiv_bleu_score": 4.2025908255961895,
      "eval_arxiv_bleu_score_sem": 0.12251858305805556,
      "eval_arxiv_emb_cos_sim": 0.7643671631813049,
      "eval_arxiv_emb_cos_sim_sem": 0.00761582159771867,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.42813777923584,
      "eval_arxiv_n_ngrams_match_1": 14.838,
      "eval_arxiv_n_ngrams_match_2": 2.868,
      "eval_arxiv_n_ngrams_match_3": 0.606,
      "eval_arxiv_num_pred_words": 39.864,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.81919712923578,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35313480072736525,
      "eval_arxiv_runtime": 10.2601,
      "eval_arxiv_samples_per_second": 48.732,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3484620285783303,
      "eval_arxiv_token_set_f1_sem": 0.004427070345154373,
      "eval_arxiv_token_set_precision": 0.2986761384584464,
      "eval_arxiv_token_set_recall": 0.4373637670702221,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 120625
    },
    {
      "epoch": 23.16,
      "eval_python_code_alpaca_accuracy": 0.16203125,
      "eval_python_code_alpaca_bleu_score": 4.638753400852363,
      "eval_python_code_alpaca_bleu_score_sem": 0.13829019574635984,
      "eval_python_code_alpaca_emb_cos_sim": 0.7511048316955566,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009468016222001531,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.892890214920044,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.924,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.908,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.962,
      "eval_python_code_alpaca_num_pred_words": 43.19,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.045389358012933,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3369124215961945,
      "eval_python_code_alpaca_runtime": 9.7862,
      "eval_python_code_alpaca_samples_per_second": 51.092,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4799662341383265,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054365720163412545,
      "eval_python_code_alpaca_token_set_precision": 0.5410514231032056,
      "eval_python_code_alpaca_token_set_recall": 0.454211479875216,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 120625
    },
    {
      "epoch": 23.16,
      "eval_wikibio_accuracy": 0.32409375,
      "eval_wikibio_bleu_score": 5.569470526262075,
      "eval_wikibio_bleu_score_sem": 0.20195718563972753,
      "eval_wikibio_emb_cos_sim": 0.7310576438903809,
      "eval_wikibio_emb_cos_sim_sem": 0.009878558136634023,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7472598552703857,
      "eval_wikibio_n_ngrams_match_1": 9.926,
      "eval_wikibio_n_ngrams_match_2": 3.158,
      "eval_wikibio_n_ngrams_match_3": 1.088,
      "eval_wikibio_num_pred_words": 35.772,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.40472756811744,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3441917984004594,
      "eval_wikibio_runtime": 10.0326,
      "eval_wikibio_samples_per_second": 49.838,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3155579849330876,
      "eval_wikibio_token_set_f1_sem": 0.005554274563882959,
      "eval_wikibio_token_set_precision": 0.3203731255075236,
      "eval_wikibio_token_set_recall": 0.32894732292271944,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 120625
    },
    {
      "epoch": 23.16,
      "eval_nq_accuracy": 0.52390625,
      "eval_nq_bleu_score": 11.227191944524384,
      "eval_nq_bleu_score_sem": 0.47124730409144033,
      "eval_nq_emb_cos_sim": 0.8317947387695312,
      "eval_nq_emb_cos_sim_sem": 0.006556944590610716,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.2020928859710693,
      "eval_nq_n_ngrams_match_1": 22.656,
      "eval_nq_n_ngrams_match_2": 8.128,
      "eval_nq_n_ngrams_match_3": 3.706,
      "eval_nq_num_pred_words": 48.528,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 9.04392160292634,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44103413952042203,
      "eval_nq_runtime": 10.6069,
      "eval_nq_samples_per_second": 47.139,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.45492710419806515,
      "eval_nq_token_set_f1_sem": 0.00474131462453347,
      "eval_nq_token_set_precision": 0.41196937239271425,
      "eval_nq_token_set_recall": 0.5173019347620817,
      "eval_nq_true_num_tokens": 64.0,
      "step": 120625
    },
    {
      "epoch": 23.16,
      "learning_rate": 0.001,
      "loss": 2.5675,
      "step": 120636
    },
    {
      "epoch": 23.17,
      "learning_rate": 0.001,
      "loss": 2.5613,
      "step": 120648
    },
    {
      "epoch": 23.17,
      "learning_rate": 0.001,
      "loss": 2.5778,
      "step": 120660
    },
    {
      "epoch": 23.17,
      "learning_rate": 0.001,
      "loss": 2.576,
      "step": 120672
    },
    {
      "epoch": 23.17,
      "learning_rate": 0.001,
      "loss": 2.5641,
      "step": 120684
    },
    {
      "epoch": 23.18,
      "learning_rate": 0.001,
      "loss": 2.5651,
      "step": 120696
    },
    {
      "epoch": 23.18,
      "learning_rate": 0.001,
      "loss": 2.5758,
      "step": 120708
    },
    {
      "epoch": 23.18,
      "learning_rate": 0.001,
      "loss": 2.5608,
      "step": 120720
    },
    {
      "epoch": 23.18,
      "learning_rate": 0.001,
      "loss": 2.5698,
      "step": 120732
    },
    {
      "epoch": 23.18,
      "learning_rate": 0.001,
      "loss": 2.5683,
      "step": 120744
    },
    {
      "epoch": 23.19,
      "learning_rate": 0.001,
      "loss": 2.5695,
      "step": 120756
    },
    {
      "epoch": 23.19,
      "learning_rate": 0.001,
      "loss": 2.5659,
      "step": 120768
    },
    {
      "epoch": 23.19,
      "learning_rate": 0.001,
      "loss": 2.5724,
      "step": 120780
    },
    {
      "epoch": 23.19,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 120792
    },
    {
      "epoch": 23.2,
      "learning_rate": 0.001,
      "loss": 2.5641,
      "step": 120804
    },
    {
      "epoch": 23.2,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 120816
    },
    {
      "epoch": 23.2,
      "learning_rate": 0.001,
      "loss": 2.5653,
      "step": 120828
    },
    {
      "epoch": 23.2,
      "learning_rate": 0.001,
      "loss": 2.5511,
      "step": 120840
    },
    {
      "epoch": 23.21,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 120852
    },
    {
      "epoch": 23.21,
      "learning_rate": 0.001,
      "loss": 2.5727,
      "step": 120864
    },
    {
      "epoch": 23.21,
      "learning_rate": 0.001,
      "loss": 2.5671,
      "step": 120876
    },
    {
      "epoch": 23.21,
      "learning_rate": 0.001,
      "loss": 2.5669,
      "step": 120888
    },
    {
      "epoch": 23.21,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 120900
    },
    {
      "epoch": 23.22,
      "learning_rate": 0.001,
      "loss": 2.5627,
      "step": 120912
    },
    {
      "epoch": 23.22,
      "learning_rate": 0.001,
      "loss": 2.5655,
      "step": 120924
    },
    {
      "epoch": 23.22,
      "learning_rate": 0.001,
      "loss": 2.5648,
      "step": 120936
    },
    {
      "epoch": 23.22,
      "learning_rate": 0.001,
      "loss": 2.5616,
      "step": 120948
    },
    {
      "epoch": 23.23,
      "learning_rate": 0.001,
      "loss": 2.5678,
      "step": 120960
    },
    {
      "epoch": 23.23,
      "learning_rate": 0.001,
      "loss": 2.5638,
      "step": 120972
    },
    {
      "epoch": 23.23,
      "learning_rate": 0.001,
      "loss": 2.5736,
      "step": 120984
    },
    {
      "epoch": 23.23,
      "learning_rate": 0.001,
      "loss": 2.5688,
      "step": 120996
    },
    {
      "epoch": 23.24,
      "learning_rate": 0.001,
      "loss": 2.578,
      "step": 121008
    },
    {
      "epoch": 23.24,
      "learning_rate": 0.001,
      "loss": 2.5703,
      "step": 121020
    },
    {
      "epoch": 23.24,
      "learning_rate": 0.001,
      "loss": 2.5617,
      "step": 121032
    },
    {
      "epoch": 23.24,
      "learning_rate": 0.001,
      "loss": 2.5669,
      "step": 121044
    },
    {
      "epoch": 23.24,
      "learning_rate": 0.001,
      "loss": 2.5687,
      "step": 121056
    },
    {
      "epoch": 23.25,
      "learning_rate": 0.001,
      "loss": 2.573,
      "step": 121068
    },
    {
      "epoch": 23.25,
      "learning_rate": 0.001,
      "loss": 2.5774,
      "step": 121080
    },
    {
      "epoch": 23.25,
      "learning_rate": 0.001,
      "loss": 2.5608,
      "step": 121092
    },
    {
      "epoch": 23.25,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 121104
    },
    {
      "epoch": 23.26,
      "learning_rate": 0.001,
      "loss": 2.5554,
      "step": 121116
    },
    {
      "epoch": 23.26,
      "learning_rate": 0.001,
      "loss": 2.559,
      "step": 121128
    },
    {
      "epoch": 23.26,
      "learning_rate": 0.001,
      "loss": 2.5653,
      "step": 121140
    },
    {
      "epoch": 23.26,
      "learning_rate": 0.001,
      "loss": 2.5792,
      "step": 121152
    },
    {
      "epoch": 23.26,
      "learning_rate": 0.001,
      "loss": 2.5722,
      "step": 121164
    },
    {
      "epoch": 23.27,
      "learning_rate": 0.001,
      "loss": 2.5624,
      "step": 121176
    },
    {
      "epoch": 23.27,
      "learning_rate": 0.001,
      "loss": 2.5718,
      "step": 121188
    },
    {
      "epoch": 23.27,
      "learning_rate": 0.001,
      "loss": 2.5699,
      "step": 121200
    },
    {
      "epoch": 23.27,
      "learning_rate": 0.001,
      "loss": 2.5543,
      "step": 121212
    },
    {
      "epoch": 23.28,
      "learning_rate": 0.001,
      "loss": 2.5606,
      "step": 121224
    },
    {
      "epoch": 23.28,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 121236
    },
    {
      "epoch": 23.28,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 121248
    },
    {
      "epoch": 23.28,
      "eval_ag_news_accuracy": 0.32103125,
      "eval_ag_news_bleu_score": 4.874165419045654,
      "eval_ag_news_bleu_score_sem": 0.16579289932605318,
      "eval_ag_news_emb_cos_sim": 0.8063075542449951,
      "eval_ag_news_emb_cos_sim_sem": 0.0073833902892596935,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5741777420043945,
      "eval_ag_news_n_ngrams_match_1": 13.988,
      "eval_ag_news_n_ngrams_match_2": 3.042,
      "eval_ag_news_n_ngrams_match_3": 0.898,
      "eval_ag_news_num_pred_words": 46.472,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.665282691696575,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3462235945244524,
      "eval_ag_news_runtime": 10.5762,
      "eval_ag_news_samples_per_second": 47.276,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3477193815814541,
      "eval_ag_news_token_set_f1_sem": 0.004257447670961566,
      "eval_ag_news_token_set_precision": 0.33329777494106055,
      "eval_ag_news_token_set_recall": 0.3792672712927153,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 121250
    },
    {
      "epoch": 23.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.1154375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.080957011351216,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11632108037906756,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6728988885879517,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008611370330760974,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2483327388763428,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.112,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.866,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.66,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.816,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.747376511643456,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21199609188909496,
      "eval_anthropic_toxic_prompts_runtime": 11.388,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.906,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.088,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3494584830314751,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006463487341986004,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4294979965929602,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32281078115399486,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 121250
    },
    {
      "epoch": 23.28,
      "eval_arxiv_accuracy": 0.34340625,
      "eval_arxiv_bleu_score": 4.359020347480952,
      "eval_arxiv_bleu_score_sem": 0.1150764754668123,
      "eval_arxiv_emb_cos_sim": 0.7614631056785583,
      "eval_arxiv_emb_cos_sim_sem": 0.007802540039156027,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.431478977203369,
      "eval_arxiv_n_ngrams_match_1": 15.258,
      "eval_arxiv_n_ngrams_match_2": 2.94,
      "eval_arxiv_n_ngrams_match_3": 0.7,
      "eval_arxiv_num_pred_words": 40.914,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.9223423864507,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.362370790399975,
      "eval_arxiv_runtime": 10.3257,
      "eval_arxiv_samples_per_second": 48.423,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3565735073656416,
      "eval_arxiv_token_set_f1_sem": 0.004100316361615648,
      "eval_arxiv_token_set_precision": 0.3087606879210232,
      "eval_arxiv_token_set_recall": 0.44003770890626887,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 121250
    },
    {
      "epoch": 23.28,
      "eval_python_code_alpaca_accuracy": 0.15809375,
      "eval_python_code_alpaca_bleu_score": 4.582500605666104,
      "eval_python_code_alpaca_bleu_score_sem": 0.1499009498163755,
      "eval_python_code_alpaca_emb_cos_sim": 0.7578281164169312,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008202336483051763,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.906625270843506,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.756,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.848,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.948,
      "eval_python_code_alpaca_num_pred_words": 43.654,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.294953757110324,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3298523791182037,
      "eval_python_code_alpaca_runtime": 10.1033,
      "eval_python_code_alpaca_samples_per_second": 49.489,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.4767302242072028,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005600595670791987,
      "eval_python_code_alpaca_token_set_precision": 0.5336928393342181,
      "eval_python_code_alpaca_token_set_recall": 0.4521975005902411,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 121250
    },
    {
      "epoch": 23.28,
      "eval_wikibio_accuracy": 0.320875,
      "eval_wikibio_bleu_score": 5.741545255088878,
      "eval_wikibio_bleu_score_sem": 0.2083526776120858,
      "eval_wikibio_emb_cos_sim": 0.7482487559318542,
      "eval_wikibio_emb_cos_sim_sem": 0.008251312438110439,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7810096740722656,
      "eval_wikibio_n_ngrams_match_1": 10.218,
      "eval_wikibio_n_ngrams_match_2": 3.324,
      "eval_wikibio_n_ngrams_match_3": 1.166,
      "eval_wikibio_num_pred_words": 37.406,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.860303998332995,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35141834315454623,
      "eval_wikibio_runtime": 10.3546,
      "eval_wikibio_samples_per_second": 48.288,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.321003867132244,
      "eval_wikibio_token_set_f1_sem": 0.005373908280963026,
      "eval_wikibio_token_set_precision": 0.3313451305113394,
      "eval_wikibio_token_set_recall": 0.324060558482982,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 121250
    },
    {
      "epoch": 23.28,
      "eval_nq_accuracy": 0.526125,
      "eval_nq_bleu_score": 11.430813609193399,
      "eval_nq_bleu_score_sem": 0.4650151959951594,
      "eval_nq_emb_cos_sim": 0.8315334320068359,
      "eval_nq_emb_cos_sim_sem": 0.006737074952790213,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.192634344100952,
      "eval_nq_n_ngrams_match_1": 23.026,
      "eval_nq_n_ngrams_match_2": 8.414,
      "eval_nq_n_ngrams_match_3": 3.802,
      "eval_nq_num_pred_words": 48.648,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.958782572058137,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44587342125133683,
      "eval_nq_runtime": 11.7181,
      "eval_nq_samples_per_second": 42.669,
      "eval_nq_steps_per_second": 0.085,
      "eval_nq_token_set_f1": 0.4608285823908761,
      "eval_nq_token_set_f1_sem": 0.004752347148498197,
      "eval_nq_token_set_precision": 0.4192158216426424,
      "eval_nq_token_set_recall": 0.5196408978945384,
      "eval_nq_true_num_tokens": 64.0,
      "step": 121250
    },
    {
      "epoch": 23.28,
      "learning_rate": 0.001,
      "loss": 2.5655,
      "step": 121260
    },
    {
      "epoch": 23.29,
      "learning_rate": 0.001,
      "loss": 2.5754,
      "step": 121272
    },
    {
      "epoch": 23.29,
      "learning_rate": 0.001,
      "loss": 2.5687,
      "step": 121284
    },
    {
      "epoch": 23.29,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 121296
    },
    {
      "epoch": 23.29,
      "learning_rate": 0.001,
      "loss": 2.5732,
      "step": 121308
    },
    {
      "epoch": 23.29,
      "learning_rate": 0.001,
      "loss": 2.5705,
      "step": 121320
    },
    {
      "epoch": 23.3,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 121332
    },
    {
      "epoch": 23.3,
      "learning_rate": 0.001,
      "loss": 2.5717,
      "step": 121344
    },
    {
      "epoch": 23.3,
      "learning_rate": 0.001,
      "loss": 2.5609,
      "step": 121356
    },
    {
      "epoch": 23.3,
      "learning_rate": 0.001,
      "loss": 2.5697,
      "step": 121368
    },
    {
      "epoch": 23.31,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 121380
    },
    {
      "epoch": 23.31,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 121392
    },
    {
      "epoch": 23.31,
      "learning_rate": 0.001,
      "loss": 2.5694,
      "step": 121404
    },
    {
      "epoch": 23.31,
      "learning_rate": 0.001,
      "loss": 2.5755,
      "step": 121416
    },
    {
      "epoch": 23.32,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 121428
    },
    {
      "epoch": 23.32,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 121440
    },
    {
      "epoch": 23.32,
      "learning_rate": 0.001,
      "loss": 2.566,
      "step": 121452
    },
    {
      "epoch": 23.32,
      "learning_rate": 0.001,
      "loss": 2.5668,
      "step": 121464
    },
    {
      "epoch": 23.32,
      "learning_rate": 0.001,
      "loss": 2.5637,
      "step": 121476
    },
    {
      "epoch": 23.33,
      "learning_rate": 0.001,
      "loss": 2.5668,
      "step": 121488
    },
    {
      "epoch": 23.33,
      "learning_rate": 0.001,
      "loss": 2.5697,
      "step": 121500
    },
    {
      "epoch": 23.33,
      "learning_rate": 0.001,
      "loss": 2.5559,
      "step": 121512
    },
    {
      "epoch": 23.33,
      "learning_rate": 0.001,
      "loss": 2.5673,
      "step": 121524
    },
    {
      "epoch": 23.34,
      "learning_rate": 0.001,
      "loss": 2.5755,
      "step": 121536
    },
    {
      "epoch": 23.34,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 121548
    },
    {
      "epoch": 23.34,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 121560
    },
    {
      "epoch": 23.34,
      "learning_rate": 0.001,
      "loss": 2.5684,
      "step": 121572
    },
    {
      "epoch": 23.35,
      "learning_rate": 0.001,
      "loss": 2.5758,
      "step": 121584
    },
    {
      "epoch": 23.35,
      "learning_rate": 0.001,
      "loss": 2.5697,
      "step": 121596
    },
    {
      "epoch": 23.35,
      "learning_rate": 0.001,
      "loss": 2.5658,
      "step": 121608
    },
    {
      "epoch": 23.35,
      "learning_rate": 0.001,
      "loss": 2.5653,
      "step": 121620
    },
    {
      "epoch": 23.35,
      "learning_rate": 0.001,
      "loss": 2.5694,
      "step": 121632
    },
    {
      "epoch": 23.36,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 121644
    },
    {
      "epoch": 23.36,
      "learning_rate": 0.001,
      "loss": 2.5707,
      "step": 121656
    },
    {
      "epoch": 23.36,
      "learning_rate": 0.001,
      "loss": 2.564,
      "step": 121668
    },
    {
      "epoch": 23.36,
      "learning_rate": 0.001,
      "loss": 2.5677,
      "step": 121680
    },
    {
      "epoch": 23.37,
      "learning_rate": 0.001,
      "loss": 2.5742,
      "step": 121692
    },
    {
      "epoch": 23.37,
      "learning_rate": 0.001,
      "loss": 2.5502,
      "step": 121704
    },
    {
      "epoch": 23.37,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 121716
    },
    {
      "epoch": 23.37,
      "learning_rate": 0.001,
      "loss": 2.5613,
      "step": 121728
    },
    {
      "epoch": 23.38,
      "learning_rate": 0.001,
      "loss": 2.5584,
      "step": 121740
    },
    {
      "epoch": 23.38,
      "learning_rate": 0.001,
      "loss": 2.5678,
      "step": 121752
    },
    {
      "epoch": 23.38,
      "learning_rate": 0.001,
      "loss": 2.5728,
      "step": 121764
    },
    {
      "epoch": 23.38,
      "learning_rate": 0.001,
      "loss": 2.5637,
      "step": 121776
    },
    {
      "epoch": 23.38,
      "learning_rate": 0.001,
      "loss": 2.5793,
      "step": 121788
    },
    {
      "epoch": 23.39,
      "learning_rate": 0.001,
      "loss": 2.5786,
      "step": 121800
    },
    {
      "epoch": 23.39,
      "learning_rate": 0.001,
      "loss": 2.5618,
      "step": 121812
    },
    {
      "epoch": 23.39,
      "learning_rate": 0.001,
      "loss": 2.5686,
      "step": 121824
    },
    {
      "epoch": 23.39,
      "learning_rate": 0.001,
      "loss": 2.5706,
      "step": 121836
    },
    {
      "epoch": 23.4,
      "learning_rate": 0.001,
      "loss": 2.5766,
      "step": 121848
    },
    {
      "epoch": 23.4,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 121860
    },
    {
      "epoch": 23.4,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 121872
    },
    {
      "epoch": 23.4,
      "eval_ag_news_accuracy": 0.3189375,
      "eval_ag_news_bleu_score": 4.642633663633252,
      "eval_ag_news_bleu_score_sem": 0.14945202633130197,
      "eval_ag_news_emb_cos_sim": 0.8092349767684937,
      "eval_ag_news_emb_cos_sim_sem": 0.007305152567533314,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.57832670211792,
      "eval_ag_news_n_ngrams_match_1": 14.132,
      "eval_ag_news_n_ngrams_match_2": 3.068,
      "eval_ag_news_n_ngrams_match_3": 0.834,
      "eval_ag_news_num_pred_words": 47.356,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.813563920766775,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34903784453151987,
      "eval_ag_news_runtime": 10.6779,
      "eval_ag_news_samples_per_second": 46.826,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.35103969289557596,
      "eval_ag_news_token_set_f1_sem": 0.004404011331696912,
      "eval_ag_news_token_set_precision": 0.33822467424658614,
      "eval_ag_news_token_set_recall": 0.38167712725139696,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 121875
    },
    {
      "epoch": 23.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.11371875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1588372411166685,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12305615999424686,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.676432728767395,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009129692932292457,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.26751971244812,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.186,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.924,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.768,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.246160520345136,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21396210678798863,
      "eval_anthropic_toxic_prompts_runtime": 11.7323,
      "eval_anthropic_toxic_prompts_samples_per_second": 42.618,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.085,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35061784063451074,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006703209074744799,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4321739548990694,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3203556620142261,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 121875
    },
    {
      "epoch": 23.4,
      "eval_arxiv_accuracy": 0.34728125,
      "eval_arxiv_bleu_score": 4.294976414606507,
      "eval_arxiv_bleu_score_sem": 0.1203371534208929,
      "eval_arxiv_emb_cos_sim": 0.7646492719650269,
      "eval_arxiv_emb_cos_sim_sem": 0.0066429834305786585,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.412419319152832,
      "eval_arxiv_n_ngrams_match_1": 15.07,
      "eval_arxiv_n_ngrams_match_2": 2.896,
      "eval_arxiv_n_ngrams_match_3": 0.646,
      "eval_arxiv_num_pred_words": 40.938,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.33855418880258,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35841171502222924,
      "eval_arxiv_runtime": 10.2995,
      "eval_arxiv_samples_per_second": 48.546,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3524439937035807,
      "eval_arxiv_token_set_f1_sem": 0.004223507030155764,
      "eval_arxiv_token_set_precision": 0.30447165293398804,
      "eval_arxiv_token_set_recall": 0.4330600411975926,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 121875
    },
    {
      "epoch": 23.4,
      "eval_python_code_alpaca_accuracy": 0.15903125,
      "eval_python_code_alpaca_bleu_score": 4.596467146569537,
      "eval_python_code_alpaca_bleu_score_sem": 0.14537870616202808,
      "eval_python_code_alpaca_emb_cos_sim": 0.7548423409461975,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008688273182769672,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.910658359527588,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.848,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.936,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.99,
      "eval_python_code_alpaca_num_pred_words": 43.812,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.368887919384022,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3309596211758795,
      "eval_python_code_alpaca_runtime": 10.0746,
      "eval_python_code_alpaca_samples_per_second": 49.63,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.47304201350047886,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005512177902522732,
      "eval_python_code_alpaca_token_set_precision": 0.5375731385328762,
      "eval_python_code_alpaca_token_set_recall": 0.4422129079418682,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 121875
    },
    {
      "epoch": 23.4,
      "eval_wikibio_accuracy": 0.32171875,
      "eval_wikibio_bleu_score": 5.802899531108984,
      "eval_wikibio_bleu_score_sem": 0.21133635337679493,
      "eval_wikibio_emb_cos_sim": 0.73765629529953,
      "eval_wikibio_emb_cos_sim_sem": 0.010046967723660443,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7330172061920166,
      "eval_wikibio_n_ngrams_match_1": 10.124,
      "eval_wikibio_n_ngrams_match_2": 3.332,
      "eval_wikibio_n_ngrams_match_3": 1.216,
      "eval_wikibio_num_pred_words": 36.964,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.80505253177776,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34796721431657673,
      "eval_wikibio_runtime": 10.1114,
      "eval_wikibio_samples_per_second": 49.449,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.31746722919500114,
      "eval_wikibio_token_set_f1_sem": 0.005486546495886831,
      "eval_wikibio_token_set_precision": 0.32854201146966994,
      "eval_wikibio_token_set_recall": 0.3231797425642653,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 121875
    },
    {
      "epoch": 23.4,
      "eval_nq_accuracy": 0.52665625,
      "eval_nq_bleu_score": 11.756796526191959,
      "eval_nq_bleu_score_sem": 0.4660388153848274,
      "eval_nq_emb_cos_sim": 0.8276510834693909,
      "eval_nq_emb_cos_sim_sem": 0.0071159738038141344,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1940035820007324,
      "eval_nq_n_ngrams_match_1": 23.012,
      "eval_nq_n_ngrams_match_2": 8.534,
      "eval_nq_n_ngrams_match_3": 3.946,
      "eval_nq_num_pred_words": 49.302,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.971057678544408,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4433672046503808,
      "eval_nq_runtime": 10.4057,
      "eval_nq_samples_per_second": 48.051,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4572500635910518,
      "eval_nq_token_set_f1_sem": 0.004937699731814166,
      "eval_nq_token_set_precision": 0.416285840345419,
      "eval_nq_token_set_recall": 0.5161096093213172,
      "eval_nq_true_num_tokens": 64.0,
      "step": 121875
    },
    {
      "epoch": 23.4,
      "learning_rate": 0.001,
      "loss": 2.5653,
      "step": 121884
    },
    {
      "epoch": 23.41,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 121896
    },
    {
      "epoch": 23.41,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 121908
    },
    {
      "epoch": 23.41,
      "learning_rate": 0.001,
      "loss": 2.5783,
      "step": 121920
    },
    {
      "epoch": 23.41,
      "learning_rate": 0.001,
      "loss": 2.5752,
      "step": 121932
    },
    {
      "epoch": 23.41,
      "learning_rate": 0.001,
      "loss": 2.57,
      "step": 121944
    },
    {
      "epoch": 23.42,
      "learning_rate": 0.001,
      "loss": 2.5663,
      "step": 121956
    },
    {
      "epoch": 23.42,
      "learning_rate": 0.001,
      "loss": 2.571,
      "step": 121968
    },
    {
      "epoch": 23.42,
      "learning_rate": 0.001,
      "loss": 2.5697,
      "step": 121980
    },
    {
      "epoch": 23.42,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 121992
    },
    {
      "epoch": 23.43,
      "learning_rate": 0.001,
      "loss": 2.5656,
      "step": 122004
    },
    {
      "epoch": 23.43,
      "learning_rate": 0.001,
      "loss": 2.573,
      "step": 122016
    },
    {
      "epoch": 23.43,
      "learning_rate": 0.001,
      "loss": 2.5609,
      "step": 122028
    },
    {
      "epoch": 23.43,
      "learning_rate": 0.001,
      "loss": 2.5709,
      "step": 122040
    },
    {
      "epoch": 23.44,
      "learning_rate": 0.001,
      "loss": 2.5578,
      "step": 122052
    },
    {
      "epoch": 23.44,
      "learning_rate": 0.001,
      "loss": 2.5665,
      "step": 122064
    },
    {
      "epoch": 23.44,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 122076
    },
    {
      "epoch": 23.44,
      "learning_rate": 0.001,
      "loss": 2.5662,
      "step": 122088
    },
    {
      "epoch": 23.44,
      "learning_rate": 0.001,
      "loss": 2.5716,
      "step": 122100
    },
    {
      "epoch": 23.45,
      "learning_rate": 0.001,
      "loss": 2.5626,
      "step": 122112
    },
    {
      "epoch": 23.45,
      "learning_rate": 0.001,
      "loss": 2.5694,
      "step": 122124
    },
    {
      "epoch": 23.45,
      "learning_rate": 0.001,
      "loss": 2.5634,
      "step": 122136
    },
    {
      "epoch": 23.45,
      "learning_rate": 0.001,
      "loss": 2.562,
      "step": 122148
    },
    {
      "epoch": 23.46,
      "learning_rate": 0.001,
      "loss": 2.5662,
      "step": 122160
    },
    {
      "epoch": 23.46,
      "learning_rate": 0.001,
      "loss": 2.5712,
      "step": 122172
    },
    {
      "epoch": 23.46,
      "learning_rate": 0.001,
      "loss": 2.5738,
      "step": 122184
    },
    {
      "epoch": 23.46,
      "learning_rate": 0.001,
      "loss": 2.5748,
      "step": 122196
    },
    {
      "epoch": 23.47,
      "learning_rate": 0.001,
      "loss": 2.5705,
      "step": 122208
    },
    {
      "epoch": 23.47,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 122220
    },
    {
      "epoch": 23.47,
      "learning_rate": 0.001,
      "loss": 2.5607,
      "step": 122232
    },
    {
      "epoch": 23.47,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 122244
    },
    {
      "epoch": 23.47,
      "learning_rate": 0.001,
      "loss": 2.5759,
      "step": 122256
    },
    {
      "epoch": 23.48,
      "learning_rate": 0.001,
      "loss": 2.5815,
      "step": 122268
    },
    {
      "epoch": 23.48,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 122280
    },
    {
      "epoch": 23.48,
      "learning_rate": 0.001,
      "loss": 2.58,
      "step": 122292
    },
    {
      "epoch": 23.48,
      "learning_rate": 0.001,
      "loss": 2.5718,
      "step": 122304
    },
    {
      "epoch": 23.49,
      "learning_rate": 0.001,
      "loss": 2.561,
      "step": 122316
    },
    {
      "epoch": 23.49,
      "learning_rate": 0.001,
      "loss": 2.5761,
      "step": 122328
    },
    {
      "epoch": 23.49,
      "learning_rate": 0.001,
      "loss": 2.5772,
      "step": 122340
    },
    {
      "epoch": 23.49,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 122352
    },
    {
      "epoch": 23.5,
      "learning_rate": 0.001,
      "loss": 2.5659,
      "step": 122364
    },
    {
      "epoch": 23.5,
      "learning_rate": 0.001,
      "loss": 2.5697,
      "step": 122376
    },
    {
      "epoch": 23.5,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 122388
    },
    {
      "epoch": 23.5,
      "learning_rate": 0.001,
      "loss": 2.5647,
      "step": 122400
    },
    {
      "epoch": 23.5,
      "learning_rate": 0.001,
      "loss": 2.5657,
      "step": 122412
    },
    {
      "epoch": 23.51,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 122424
    },
    {
      "epoch": 23.51,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 122436
    },
    {
      "epoch": 23.51,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 122448
    },
    {
      "epoch": 23.51,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 122460
    },
    {
      "epoch": 23.52,
      "learning_rate": 0.001,
      "loss": 2.5777,
      "step": 122472
    },
    {
      "epoch": 23.52,
      "learning_rate": 0.001,
      "loss": 2.5693,
      "step": 122484
    },
    {
      "epoch": 23.52,
      "learning_rate": 0.001,
      "loss": 2.5762,
      "step": 122496
    },
    {
      "epoch": 23.52,
      "eval_ag_news_accuracy": 0.3204375,
      "eval_ag_news_bleu_score": 4.8349354317944595,
      "eval_ag_news_bleu_score_sem": 0.15351510065098548,
      "eval_ag_news_emb_cos_sim": 0.8132928609848022,
      "eval_ag_news_emb_cos_sim_sem": 0.006314245256183273,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5733981132507324,
      "eval_ag_news_n_ngrams_match_1": 14.014,
      "eval_ag_news_n_ngrams_match_2": 3.078,
      "eval_ag_news_n_ngrams_match_3": 0.898,
      "eval_ag_news_num_pred_words": 46.854,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.63748784804016,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3475771176116358,
      "eval_ag_news_runtime": 10.7206,
      "eval_ag_news_samples_per_second": 46.639,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.35014314843507793,
      "eval_ag_news_token_set_f1_sem": 0.004463124716538842,
      "eval_ag_news_token_set_precision": 0.3359607803155184,
      "eval_ag_news_token_set_recall": 0.3821531054917202,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 122500
    },
    {
      "epoch": 23.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.11328125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1850523961142883,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11906180560670668,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.681453287601471,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008138155393848276,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2276039123535156,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.278,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.968,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.754,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.624,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.21915720750061,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21616496563371101,
      "eval_anthropic_toxic_prompts_runtime": 9.7901,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.072,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36249006687763446,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00663312401101885,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44388364175908684,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33359599349921426,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 122500
    },
    {
      "epoch": 23.52,
      "eval_arxiv_accuracy": 0.34746875,
      "eval_arxiv_bleu_score": 4.362487455202465,
      "eval_arxiv_bleu_score_sem": 0.13176923200403984,
      "eval_arxiv_emb_cos_sim": 0.7627640962600708,
      "eval_arxiv_emb_cos_sim_sem": 0.007787284855628606,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.426496744155884,
      "eval_arxiv_n_ngrams_match_1": 15.17,
      "eval_arxiv_n_ngrams_match_2": 3.056,
      "eval_arxiv_n_ngrams_match_3": 0.676,
      "eval_arxiv_num_pred_words": 40.82,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.768663220909403,
      "eval_arxiv_pred_num_tokens": 62.9765625,
      "eval_arxiv_rouge_score": 0.3599718993054766,
      "eval_arxiv_runtime": 10.253,
      "eval_arxiv_samples_per_second": 48.766,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3546398122566314,
      "eval_arxiv_token_set_f1_sem": 0.004311476017662968,
      "eval_arxiv_token_set_precision": 0.30654908122137453,
      "eval_arxiv_token_set_recall": 0.44160247311300843,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 122500
    },
    {
      "epoch": 23.52,
      "eval_python_code_alpaca_accuracy": 0.16140625,
      "eval_python_code_alpaca_bleu_score": 4.511677052583327,
      "eval_python_code_alpaca_bleu_score_sem": 0.13317167967921642,
      "eval_python_code_alpaca_emb_cos_sim": 0.758931577205658,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007892412925249444,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.876638889312744,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.92,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.0,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.002,
      "eval_python_code_alpaca_num_pred_words": 45.226,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.75449794750196,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32749275849401843,
      "eval_python_code_alpaca_runtime": 13.2672,
      "eval_python_code_alpaca_samples_per_second": 37.687,
      "eval_python_code_alpaca_steps_per_second": 0.075,
      "eval_python_code_alpaca_token_set_f1": 0.48482474430756134,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005587493634600956,
      "eval_python_code_alpaca_token_set_precision": 0.5450426199491449,
      "eval_python_code_alpaca_token_set_recall": 0.4568639943124936,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 122500
    },
    {
      "epoch": 23.52,
      "eval_wikibio_accuracy": 0.32209375,
      "eval_wikibio_bleu_score": 6.204530654614644,
      "eval_wikibio_bleu_score_sem": 0.20980407647689625,
      "eval_wikibio_emb_cos_sim": 0.7364906668663025,
      "eval_wikibio_emb_cos_sim_sem": 0.010211961440323956,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.744091272354126,
      "eval_wikibio_n_ngrams_match_1": 10.388,
      "eval_wikibio_n_ngrams_match_2": 3.546,
      "eval_wikibio_n_ngrams_match_3": 1.314,
      "eval_wikibio_num_pred_words": 36.864,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.27057731810975,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35843491332223465,
      "eval_wikibio_runtime": 10.0817,
      "eval_wikibio_samples_per_second": 49.595,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.32614690938554636,
      "eval_wikibio_token_set_f1_sem": 0.0051561823668839135,
      "eval_wikibio_token_set_precision": 0.33595573731791706,
      "eval_wikibio_token_set_recall": 0.33118152361646486,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 122500
    },
    {
      "epoch": 23.52,
      "eval_nq_accuracy": 0.52534375,
      "eval_nq_bleu_score": 11.67496100472067,
      "eval_nq_bleu_score_sem": 0.47042914886520376,
      "eval_nq_emb_cos_sim": 0.8323712348937988,
      "eval_nq_emb_cos_sim_sem": 0.007359014264296713,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1965255737304688,
      "eval_nq_n_ngrams_match_1": 23.072,
      "eval_nq_n_ngrams_match_2": 8.49,
      "eval_nq_n_ngrams_match_3": 3.906,
      "eval_nq_num_pred_words": 49.244,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.993711165763212,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44471343700422467,
      "eval_nq_runtime": 11.1069,
      "eval_nq_samples_per_second": 45.017,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.4604667802683219,
      "eval_nq_token_set_f1_sem": 0.004934073570951141,
      "eval_nq_token_set_precision": 0.4176394182970839,
      "eval_nq_token_set_recall": 0.5210676704936616,
      "eval_nq_true_num_tokens": 64.0,
      "step": 122500
    },
    {
      "epoch": 23.52,
      "learning_rate": 0.001,
      "loss": 2.568,
      "step": 122508
    },
    {
      "epoch": 23.53,
      "learning_rate": 0.001,
      "loss": 2.57,
      "step": 122520
    },
    {
      "epoch": 23.53,
      "learning_rate": 0.001,
      "loss": 2.5625,
      "step": 122532
    },
    {
      "epoch": 23.53,
      "learning_rate": 0.001,
      "loss": 2.5689,
      "step": 122544
    },
    {
      "epoch": 23.53,
      "learning_rate": 0.001,
      "loss": 2.5823,
      "step": 122556
    },
    {
      "epoch": 23.53,
      "learning_rate": 0.001,
      "loss": 2.5677,
      "step": 122568
    },
    {
      "epoch": 23.54,
      "learning_rate": 0.001,
      "loss": 2.5654,
      "step": 122580
    },
    {
      "epoch": 23.54,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 122592
    },
    {
      "epoch": 23.54,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 122604
    },
    {
      "epoch": 23.54,
      "learning_rate": 0.001,
      "loss": 2.5692,
      "step": 122616
    },
    {
      "epoch": 23.55,
      "learning_rate": 0.001,
      "loss": 2.5647,
      "step": 122628
    },
    {
      "epoch": 23.55,
      "learning_rate": 0.001,
      "loss": 2.5752,
      "step": 122640
    },
    {
      "epoch": 23.55,
      "learning_rate": 0.001,
      "loss": 2.5806,
      "step": 122652
    },
    {
      "epoch": 23.55,
      "learning_rate": 0.001,
      "loss": 2.5698,
      "step": 122664
    },
    {
      "epoch": 23.56,
      "learning_rate": 0.001,
      "loss": 2.5703,
      "step": 122676
    },
    {
      "epoch": 23.56,
      "learning_rate": 0.001,
      "loss": 2.5732,
      "step": 122688
    },
    {
      "epoch": 23.56,
      "learning_rate": 0.001,
      "loss": 2.5745,
      "step": 122700
    },
    {
      "epoch": 23.56,
      "learning_rate": 0.001,
      "loss": 2.5723,
      "step": 122712
    },
    {
      "epoch": 23.56,
      "learning_rate": 0.001,
      "loss": 2.5597,
      "step": 122724
    },
    {
      "epoch": 23.57,
      "learning_rate": 0.001,
      "loss": 2.5713,
      "step": 122736
    },
    {
      "epoch": 23.57,
      "learning_rate": 0.001,
      "loss": 2.5765,
      "step": 122748
    },
    {
      "epoch": 23.57,
      "learning_rate": 0.001,
      "loss": 2.5762,
      "step": 122760
    },
    {
      "epoch": 23.57,
      "learning_rate": 0.001,
      "loss": 2.5772,
      "step": 122772
    },
    {
      "epoch": 23.58,
      "learning_rate": 0.001,
      "loss": 2.5867,
      "step": 122784
    },
    {
      "epoch": 23.58,
      "learning_rate": 0.001,
      "loss": 2.5579,
      "step": 122796
    },
    {
      "epoch": 23.58,
      "learning_rate": 0.001,
      "loss": 2.573,
      "step": 122808
    },
    {
      "epoch": 23.58,
      "learning_rate": 0.001,
      "loss": 2.5622,
      "step": 122820
    },
    {
      "epoch": 23.59,
      "learning_rate": 0.001,
      "loss": 2.5716,
      "step": 122832
    },
    {
      "epoch": 23.59,
      "learning_rate": 0.001,
      "loss": 2.5748,
      "step": 122844
    },
    {
      "epoch": 23.59,
      "learning_rate": 0.001,
      "loss": 2.5699,
      "step": 122856
    },
    {
      "epoch": 23.59,
      "learning_rate": 0.001,
      "loss": 2.5571,
      "step": 122868
    },
    {
      "epoch": 23.59,
      "learning_rate": 0.001,
      "loss": 2.5729,
      "step": 122880
    },
    {
      "epoch": 23.6,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 122892
    },
    {
      "epoch": 23.6,
      "learning_rate": 0.001,
      "loss": 2.5665,
      "step": 122904
    },
    {
      "epoch": 23.6,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 122916
    },
    {
      "epoch": 23.6,
      "learning_rate": 0.001,
      "loss": 2.56,
      "step": 122928
    },
    {
      "epoch": 23.61,
      "learning_rate": 0.001,
      "loss": 2.5726,
      "step": 122940
    },
    {
      "epoch": 23.61,
      "learning_rate": 0.001,
      "loss": 2.5578,
      "step": 122952
    },
    {
      "epoch": 23.61,
      "learning_rate": 0.001,
      "loss": 2.5705,
      "step": 122964
    },
    {
      "epoch": 23.61,
      "learning_rate": 0.001,
      "loss": 2.5657,
      "step": 122976
    },
    {
      "epoch": 23.62,
      "learning_rate": 0.001,
      "loss": 2.5717,
      "step": 122988
    },
    {
      "epoch": 23.62,
      "learning_rate": 0.001,
      "loss": 2.5759,
      "step": 123000
    },
    {
      "epoch": 23.62,
      "learning_rate": 0.001,
      "loss": 2.5643,
      "step": 123012
    },
    {
      "epoch": 23.62,
      "learning_rate": 0.001,
      "loss": 2.5615,
      "step": 123024
    },
    {
      "epoch": 23.62,
      "learning_rate": 0.001,
      "loss": 2.5697,
      "step": 123036
    },
    {
      "epoch": 23.63,
      "learning_rate": 0.001,
      "loss": 2.5629,
      "step": 123048
    },
    {
      "epoch": 23.63,
      "learning_rate": 0.001,
      "loss": 2.5687,
      "step": 123060
    },
    {
      "epoch": 23.63,
      "learning_rate": 0.001,
      "loss": 2.5693,
      "step": 123072
    },
    {
      "epoch": 23.63,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 123084
    },
    {
      "epoch": 23.64,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 123096
    },
    {
      "epoch": 23.64,
      "learning_rate": 0.001,
      "loss": 2.5693,
      "step": 123108
    },
    {
      "epoch": 23.64,
      "learning_rate": 0.001,
      "loss": 2.5683,
      "step": 123120
    },
    {
      "epoch": 23.64,
      "eval_ag_news_accuracy": 0.32153125,
      "eval_ag_news_bleu_score": 4.7287873492146195,
      "eval_ag_news_bleu_score_sem": 0.14790544014679205,
      "eval_ag_news_emb_cos_sim": 0.809762716293335,
      "eval_ag_news_emb_cos_sim_sem": 0.006641599170022878,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.560532808303833,
      "eval_ag_news_n_ngrams_match_1": 13.782,
      "eval_ag_news_n_ngrams_match_2": 3.006,
      "eval_ag_news_n_ngrams_match_3": 0.828,
      "eval_ag_news_num_pred_words": 45.906,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.18193738056804,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3446016510997211,
      "eval_ag_news_runtime": 10.4482,
      "eval_ag_news_samples_per_second": 47.855,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3472491714521868,
      "eval_ag_news_token_set_f1_sem": 0.004338237047170675,
      "eval_ag_news_token_set_precision": 0.3303722738410303,
      "eval_ag_news_token_set_recall": 0.38102018685853756,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 123125
    },
    {
      "epoch": 23.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.1146875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1873214004287562,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13109050829733274,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6590409278869629,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010291505714354174,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.222557306289673,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.192,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.886,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.706,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.23,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.092206659947507,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21532330665313854,
      "eval_anthropic_toxic_prompts_runtime": 9.9067,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.471,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3518883630743679,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063797285877650505,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43790742145145567,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.319679910672875,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 123125
    },
    {
      "epoch": 23.64,
      "eval_arxiv_accuracy": 0.34678125,
      "eval_arxiv_bleu_score": 4.255156246553696,
      "eval_arxiv_bleu_score_sem": 0.12239349455577901,
      "eval_arxiv_emb_cos_sim": 0.7600793242454529,
      "eval_arxiv_emb_cos_sim_sem": 0.007320094283874934,
      "eval_arxiv_emb_top1_equal": 0.34375,
      "eval_arxiv_emb_top1_equal_sem": 0.04214578430296913,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4352352619171143,
      "eval_arxiv_n_ngrams_match_1": 15.038,
      "eval_arxiv_n_ngrams_match_2": 2.934,
      "eval_arxiv_n_ngrams_match_3": 0.622,
      "eval_arxiv_num_pred_words": 40.334,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 31.038713933972293,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36074641046381883,
      "eval_arxiv_runtime": 10.7035,
      "eval_arxiv_samples_per_second": 46.714,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.35192481147901955,
      "eval_arxiv_token_set_f1_sem": 0.004068077998896871,
      "eval_arxiv_token_set_precision": 0.30415173156387404,
      "eval_arxiv_token_set_recall": 0.4370254780126012,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 123125
    },
    {
      "epoch": 23.64,
      "eval_python_code_alpaca_accuracy": 0.15871875,
      "eval_python_code_alpaca_bleu_score": 4.427311801675563,
      "eval_python_code_alpaca_bleu_score_sem": 0.1414208501910256,
      "eval_python_code_alpaca_emb_cos_sim": 0.7463688254356384,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009315223359437411,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9006664752960205,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.67,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.802,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.914,
      "eval_python_code_alpaca_num_pred_words": 43.166,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.186262025635585,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3284907309805889,
      "eval_python_code_alpaca_runtime": 10.0174,
      "eval_python_code_alpaca_samples_per_second": 49.913,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4642666563319816,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005637965100670752,
      "eval_python_code_alpaca_token_set_precision": 0.5255050098361054,
      "eval_python_code_alpaca_token_set_recall": 0.4426584371830135,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 123125
    },
    {
      "epoch": 23.64,
      "eval_wikibio_accuracy": 0.320125,
      "eval_wikibio_bleu_score": 5.9093723264471825,
      "eval_wikibio_bleu_score_sem": 0.2161196680840932,
      "eval_wikibio_emb_cos_sim": 0.7459827065467834,
      "eval_wikibio_emb_cos_sim_sem": 0.008466136242059481,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7835049629211426,
      "eval_wikibio_n_ngrams_match_1": 10.186,
      "eval_wikibio_n_ngrams_match_2": 3.388,
      "eval_wikibio_n_ngrams_match_3": 1.228,
      "eval_wikibio_num_pred_words": 36.43,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.96988478680974,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3542634594162949,
      "eval_wikibio_runtime": 10.1012,
      "eval_wikibio_samples_per_second": 49.499,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.32083178432983245,
      "eval_wikibio_token_set_f1_sem": 0.005317237659460588,
      "eval_wikibio_token_set_precision": 0.33244088826435114,
      "eval_wikibio_token_set_recall": 0.3245069040968402,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 123125
    },
    {
      "epoch": 23.64,
      "eval_nq_accuracy": 0.52409375,
      "eval_nq_bleu_score": 11.675484314510431,
      "eval_nq_bleu_score_sem": 0.47187692937762726,
      "eval_nq_emb_cos_sim": 0.835478663444519,
      "eval_nq_emb_cos_sim_sem": 0.006884316943696545,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1956288814544678,
      "eval_nq_n_ngrams_match_1": 22.874,
      "eval_nq_n_ngrams_match_2": 8.4,
      "eval_nq_n_ngrams_match_3": 3.918,
      "eval_nq_num_pred_words": 48.88,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.985650189076178,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44354355937317347,
      "eval_nq_runtime": 10.5147,
      "eval_nq_samples_per_second": 47.553,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4577481427591343,
      "eval_nq_token_set_f1_sem": 0.005029344854526479,
      "eval_nq_token_set_precision": 0.4164451514692618,
      "eval_nq_token_set_recall": 0.51582719441119,
      "eval_nq_true_num_tokens": 64.0,
      "step": 123125
    },
    {
      "epoch": 23.64,
      "learning_rate": 0.001,
      "loss": 2.568,
      "step": 123132
    },
    {
      "epoch": 23.65,
      "learning_rate": 0.001,
      "loss": 2.5756,
      "step": 123144
    },
    {
      "epoch": 23.65,
      "learning_rate": 0.001,
      "loss": 2.5696,
      "step": 123156
    },
    {
      "epoch": 23.65,
      "learning_rate": 0.001,
      "loss": 2.5622,
      "step": 123168
    },
    {
      "epoch": 23.65,
      "learning_rate": 0.001,
      "loss": 2.5646,
      "step": 123180
    },
    {
      "epoch": 23.65,
      "learning_rate": 0.001,
      "loss": 2.5682,
      "step": 123192
    },
    {
      "epoch": 23.66,
      "learning_rate": 0.001,
      "loss": 2.5642,
      "step": 123204
    },
    {
      "epoch": 23.66,
      "learning_rate": 0.001,
      "loss": 2.5674,
      "step": 123216
    },
    {
      "epoch": 23.66,
      "learning_rate": 0.001,
      "loss": 2.5673,
      "step": 123228
    },
    {
      "epoch": 23.66,
      "learning_rate": 0.001,
      "loss": 2.5609,
      "step": 123240
    },
    {
      "epoch": 23.67,
      "learning_rate": 0.001,
      "loss": 2.5662,
      "step": 123252
    },
    {
      "epoch": 23.67,
      "learning_rate": 0.001,
      "loss": 2.5659,
      "step": 123264
    },
    {
      "epoch": 23.67,
      "learning_rate": 0.001,
      "loss": 2.5743,
      "step": 123276
    },
    {
      "epoch": 23.67,
      "learning_rate": 0.001,
      "loss": 2.5591,
      "step": 123288
    },
    {
      "epoch": 23.68,
      "learning_rate": 0.001,
      "loss": 2.5695,
      "step": 123300
    },
    {
      "epoch": 23.68,
      "learning_rate": 0.001,
      "loss": 2.5607,
      "step": 123312
    },
    {
      "epoch": 23.68,
      "learning_rate": 0.001,
      "loss": 2.5819,
      "step": 123324
    },
    {
      "epoch": 23.68,
      "learning_rate": 0.001,
      "loss": 2.5752,
      "step": 123336
    },
    {
      "epoch": 23.68,
      "learning_rate": 0.001,
      "loss": 2.5687,
      "step": 123348
    },
    {
      "epoch": 23.69,
      "learning_rate": 0.001,
      "loss": 2.5663,
      "step": 123360
    },
    {
      "epoch": 23.69,
      "learning_rate": 0.001,
      "loss": 2.5846,
      "step": 123372
    },
    {
      "epoch": 23.69,
      "learning_rate": 0.001,
      "loss": 2.5851,
      "step": 123384
    },
    {
      "epoch": 23.69,
      "learning_rate": 0.001,
      "loss": 2.5678,
      "step": 123396
    },
    {
      "epoch": 23.7,
      "learning_rate": 0.001,
      "loss": 2.5721,
      "step": 123408
    },
    {
      "epoch": 23.7,
      "learning_rate": 0.001,
      "loss": 2.5692,
      "step": 123420
    },
    {
      "epoch": 23.7,
      "learning_rate": 0.001,
      "loss": 2.5655,
      "step": 123432
    },
    {
      "epoch": 23.7,
      "learning_rate": 0.001,
      "loss": 2.5676,
      "step": 123444
    },
    {
      "epoch": 23.71,
      "learning_rate": 0.001,
      "loss": 2.5739,
      "step": 123456
    },
    {
      "epoch": 23.71,
      "learning_rate": 0.001,
      "loss": 2.5666,
      "step": 123468
    },
    {
      "epoch": 23.71,
      "learning_rate": 0.001,
      "loss": 2.5643,
      "step": 123480
    },
    {
      "epoch": 23.71,
      "learning_rate": 0.001,
      "loss": 2.5635,
      "step": 123492
    },
    {
      "epoch": 23.71,
      "learning_rate": 0.001,
      "loss": 2.5627,
      "step": 123504
    },
    {
      "epoch": 23.72,
      "learning_rate": 0.001,
      "loss": 2.576,
      "step": 123516
    },
    {
      "epoch": 23.72,
      "learning_rate": 0.001,
      "loss": 2.5699,
      "step": 123528
    },
    {
      "epoch": 23.72,
      "learning_rate": 0.001,
      "loss": 2.5556,
      "step": 123540
    },
    {
      "epoch": 23.72,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 123552
    },
    {
      "epoch": 23.73,
      "learning_rate": 0.001,
      "loss": 2.5663,
      "step": 123564
    },
    {
      "epoch": 23.73,
      "learning_rate": 0.001,
      "loss": 2.5679,
      "step": 123576
    },
    {
      "epoch": 23.73,
      "learning_rate": 0.001,
      "loss": 2.5601,
      "step": 123588
    },
    {
      "epoch": 23.73,
      "learning_rate": 0.001,
      "loss": 2.5626,
      "step": 123600
    },
    {
      "epoch": 23.74,
      "learning_rate": 0.001,
      "loss": 2.5697,
      "step": 123612
    },
    {
      "epoch": 23.74,
      "learning_rate": 0.001,
      "loss": 2.5734,
      "step": 123624
    },
    {
      "epoch": 23.74,
      "learning_rate": 0.001,
      "loss": 2.5623,
      "step": 123636
    },
    {
      "epoch": 23.74,
      "learning_rate": 0.001,
      "loss": 2.5698,
      "step": 123648
    },
    {
      "epoch": 23.74,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 123660
    },
    {
      "epoch": 23.75,
      "learning_rate": 0.001,
      "loss": 2.5712,
      "step": 123672
    },
    {
      "epoch": 23.75,
      "learning_rate": 0.001,
      "loss": 2.5729,
      "step": 123684
    },
    {
      "epoch": 23.75,
      "learning_rate": 0.001,
      "loss": 2.5662,
      "step": 123696
    },
    {
      "epoch": 23.75,
      "learning_rate": 0.001,
      "loss": 2.5706,
      "step": 123708
    },
    {
      "epoch": 23.76,
      "learning_rate": 0.001,
      "loss": 2.5614,
      "step": 123720
    },
    {
      "epoch": 23.76,
      "learning_rate": 0.001,
      "loss": 2.5663,
      "step": 123732
    },
    {
      "epoch": 23.76,
      "learning_rate": 0.001,
      "loss": 2.5601,
      "step": 123744
    },
    {
      "epoch": 23.76,
      "eval_ag_news_accuracy": 0.3225625,
      "eval_ag_news_bleu_score": 4.80911523662197,
      "eval_ag_news_bleu_score_sem": 0.1505158013521019,
      "eval_ag_news_emb_cos_sim": 0.8057507276535034,
      "eval_ag_news_emb_cos_sim_sem": 0.007469187443393206,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5592167377471924,
      "eval_ag_news_n_ngrams_match_1": 13.954,
      "eval_ag_news_n_ngrams_match_2": 3.088,
      "eval_ag_news_n_ngrams_match_3": 0.886,
      "eval_ag_news_num_pred_words": 46.306,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.13566592358565,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3499271773864675,
      "eval_ag_news_runtime": 10.5718,
      "eval_ag_news_samples_per_second": 47.295,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3505775578588199,
      "eval_ag_news_token_set_f1_sem": 0.00446683082499616,
      "eval_ag_news_token_set_precision": 0.3341955923310428,
      "eval_ag_news_token_set_recall": 0.3857662798047629,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 123750
    },
    {
      "epoch": 23.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.1138125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0856771276902344,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12505592710514374,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6681952476501465,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009632829497469858,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2448768615722656,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.13,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.686,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.93,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.65855031214567,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21177156658064994,
      "eval_anthropic_toxic_prompts_runtime": 9.8606,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.707,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3542165080733708,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006705805135088895,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43358594079213353,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32403467838846783,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 123750
    },
    {
      "epoch": 23.76,
      "eval_arxiv_accuracy": 0.34634375,
      "eval_arxiv_bleu_score": 4.323527819434282,
      "eval_arxiv_bleu_score_sem": 0.12690310676739336,
      "eval_arxiv_emb_cos_sim": 0.7569823265075684,
      "eval_arxiv_emb_cos_sim_sem": 0.007879461330410948,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.416527032852173,
      "eval_arxiv_n_ngrams_match_1": 14.87,
      "eval_arxiv_n_ngrams_match_2": 2.876,
      "eval_arxiv_n_ngrams_match_3": 0.66,
      "eval_arxiv_num_pred_words": 40.04,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.46343259022963,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35730537813127394,
      "eval_arxiv_runtime": 10.7288,
      "eval_arxiv_samples_per_second": 46.604,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.35056932094447046,
      "eval_arxiv_token_set_f1_sem": 0.004136622701862446,
      "eval_arxiv_token_set_precision": 0.3013940086194369,
      "eval_arxiv_token_set_recall": 0.4352885692743516,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 123750
    },
    {
      "epoch": 23.76,
      "eval_python_code_alpaca_accuracy": 0.16028125,
      "eval_python_code_alpaca_bleu_score": 4.6366772816008766,
      "eval_python_code_alpaca_bleu_score_sem": 0.14784878135350046,
      "eval_python_code_alpaca_emb_cos_sim": 0.7635290622711182,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007488291819569785,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8861355781555176,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.85,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.9,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.932,
      "eval_python_code_alpaca_num_pred_words": 43.502,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.92391004382131,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3353624760878188,
      "eval_python_code_alpaca_runtime": 10.0937,
      "eval_python_code_alpaca_samples_per_second": 49.536,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.4776360116933921,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005389846905568517,
      "eval_python_code_alpaca_token_set_precision": 0.5378696106686256,
      "eval_python_code_alpaca_token_set_recall": 0.4511554207872903,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 123750
    },
    {
      "epoch": 23.76,
      "eval_wikibio_accuracy": 0.32315625,
      "eval_wikibio_bleu_score": 5.888987682259996,
      "eval_wikibio_bleu_score_sem": 0.2005888805823427,
      "eval_wikibio_emb_cos_sim": 0.7536141276359558,
      "eval_wikibio_emb_cos_sim_sem": 0.008231543590525305,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.739074945449829,
      "eval_wikibio_n_ngrams_match_1": 10.212,
      "eval_wikibio_n_ngrams_match_2": 3.414,
      "eval_wikibio_n_ngrams_match_3": 1.21,
      "eval_wikibio_num_pred_words": 37.138,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.05906523425962,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.357356145874208,
      "eval_wikibio_runtime": 10.0528,
      "eval_wikibio_samples_per_second": 49.737,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.32031816975269517,
      "eval_wikibio_token_set_f1_sem": 0.005141235283817227,
      "eval_wikibio_token_set_precision": 0.331091781388553,
      "eval_wikibio_token_set_recall": 0.326432029902187,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 123750
    },
    {
      "epoch": 23.76,
      "eval_nq_accuracy": 0.52696875,
      "eval_nq_bleu_score": 11.512729350208728,
      "eval_nq_bleu_score_sem": 0.4686555999273394,
      "eval_nq_emb_cos_sim": 0.826134204864502,
      "eval_nq_emb_cos_sim_sem": 0.0073975634841604975,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1931984424591064,
      "eval_nq_n_ngrams_match_1": 22.952,
      "eval_nq_n_ngrams_match_2": 8.41,
      "eval_nq_n_ngrams_match_3": 3.804,
      "eval_nq_num_pred_words": 48.844,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.963837632239624,
      "eval_nq_pred_num_tokens": 62.9921875,
      "eval_nq_rouge_score": 0.44703954207752905,
      "eval_nq_runtime": 10.7547,
      "eval_nq_samples_per_second": 46.491,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.4609720776791791,
      "eval_nq_token_set_f1_sem": 0.004923906721364817,
      "eval_nq_token_set_precision": 0.4177849225975492,
      "eval_nq_token_set_recall": 0.5238332287599875,
      "eval_nq_true_num_tokens": 64.0,
      "step": 123750
    },
    {
      "epoch": 23.76,
      "learning_rate": 0.001,
      "loss": 2.5682,
      "step": 123756
    },
    {
      "epoch": 23.76,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 123768
    },
    {
      "epoch": 23.77,
      "learning_rate": 0.001,
      "loss": 2.5595,
      "step": 123780
    },
    {
      "epoch": 23.77,
      "learning_rate": 0.001,
      "loss": 2.5702,
      "step": 123792
    },
    {
      "epoch": 23.77,
      "learning_rate": 0.001,
      "loss": 2.5694,
      "step": 123804
    },
    {
      "epoch": 23.77,
      "learning_rate": 0.001,
      "loss": 2.5686,
      "step": 123816
    },
    {
      "epoch": 23.78,
      "learning_rate": 0.001,
      "loss": 2.5638,
      "step": 123828
    },
    {
      "epoch": 23.78,
      "learning_rate": 0.001,
      "loss": 2.5677,
      "step": 123840
    },
    {
      "epoch": 23.78,
      "learning_rate": 0.001,
      "loss": 2.5593,
      "step": 123852
    },
    {
      "epoch": 23.78,
      "learning_rate": 0.001,
      "loss": 2.5647,
      "step": 123864
    },
    {
      "epoch": 23.79,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 123876
    },
    {
      "epoch": 23.79,
      "learning_rate": 0.001,
      "loss": 2.5725,
      "step": 123888
    },
    {
      "epoch": 23.79,
      "learning_rate": 0.001,
      "loss": 2.5622,
      "step": 123900
    },
    {
      "epoch": 23.79,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 123912
    },
    {
      "epoch": 23.79,
      "learning_rate": 0.001,
      "loss": 2.5678,
      "step": 123924
    },
    {
      "epoch": 23.8,
      "learning_rate": 0.001,
      "loss": 2.5684,
      "step": 123936
    },
    {
      "epoch": 23.8,
      "learning_rate": 0.001,
      "loss": 2.5557,
      "step": 123948
    },
    {
      "epoch": 23.8,
      "learning_rate": 0.001,
      "loss": 2.5616,
      "step": 123960
    },
    {
      "epoch": 23.8,
      "learning_rate": 0.001,
      "loss": 2.569,
      "step": 123972
    },
    {
      "epoch": 23.81,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 123984
    },
    {
      "epoch": 23.81,
      "learning_rate": 0.001,
      "loss": 2.5703,
      "step": 123996
    },
    {
      "epoch": 23.81,
      "learning_rate": 0.001,
      "loss": 2.5633,
      "step": 124008
    },
    {
      "epoch": 23.81,
      "learning_rate": 0.001,
      "loss": 2.5713,
      "step": 124020
    },
    {
      "epoch": 23.82,
      "learning_rate": 0.001,
      "loss": 2.5782,
      "step": 124032
    },
    {
      "epoch": 23.82,
      "learning_rate": 0.001,
      "loss": 2.5581,
      "step": 124044
    },
    {
      "epoch": 23.82,
      "learning_rate": 0.001,
      "loss": 2.5647,
      "step": 124056
    },
    {
      "epoch": 23.82,
      "learning_rate": 0.001,
      "loss": 2.5773,
      "step": 124068
    },
    {
      "epoch": 23.82,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 124080
    },
    {
      "epoch": 23.83,
      "learning_rate": 0.001,
      "loss": 2.5624,
      "step": 124092
    },
    {
      "epoch": 23.83,
      "learning_rate": 0.001,
      "loss": 2.5612,
      "step": 124104
    },
    {
      "epoch": 23.83,
      "learning_rate": 0.001,
      "loss": 2.5672,
      "step": 124116
    },
    {
      "epoch": 23.83,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 124128
    },
    {
      "epoch": 23.84,
      "learning_rate": 0.001,
      "loss": 2.5658,
      "step": 124140
    },
    {
      "epoch": 23.84,
      "learning_rate": 0.001,
      "loss": 2.5625,
      "step": 124152
    },
    {
      "epoch": 23.84,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 124164
    },
    {
      "epoch": 23.84,
      "learning_rate": 0.001,
      "loss": 2.5666,
      "step": 124176
    },
    {
      "epoch": 23.85,
      "learning_rate": 0.001,
      "loss": 2.5646,
      "step": 124188
    },
    {
      "epoch": 23.85,
      "learning_rate": 0.001,
      "loss": 2.5505,
      "step": 124200
    },
    {
      "epoch": 23.85,
      "learning_rate": 0.001,
      "loss": 2.5662,
      "step": 124212
    },
    {
      "epoch": 23.85,
      "learning_rate": 0.001,
      "loss": 2.5705,
      "step": 124224
    },
    {
      "epoch": 23.85,
      "learning_rate": 0.001,
      "loss": 2.5672,
      "step": 124236
    },
    {
      "epoch": 23.86,
      "learning_rate": 0.001,
      "loss": 2.5709,
      "step": 124248
    },
    {
      "epoch": 23.86,
      "learning_rate": 0.001,
      "loss": 2.5598,
      "step": 124260
    },
    {
      "epoch": 23.86,
      "learning_rate": 0.001,
      "loss": 2.5641,
      "step": 124272
    },
    {
      "epoch": 23.86,
      "learning_rate": 0.001,
      "loss": 2.5646,
      "step": 124284
    },
    {
      "epoch": 23.87,
      "learning_rate": 0.001,
      "loss": 2.5621,
      "step": 124296
    },
    {
      "epoch": 23.87,
      "learning_rate": 0.001,
      "loss": 2.5594,
      "step": 124308
    },
    {
      "epoch": 23.87,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 124320
    },
    {
      "epoch": 23.87,
      "learning_rate": 0.001,
      "loss": 2.5699,
      "step": 124332
    },
    {
      "epoch": 23.88,
      "learning_rate": 0.001,
      "loss": 2.5613,
      "step": 124344
    },
    {
      "epoch": 23.88,
      "learning_rate": 0.001,
      "loss": 2.5679,
      "step": 124356
    },
    {
      "epoch": 23.88,
      "learning_rate": 0.001,
      "loss": 2.574,
      "step": 124368
    },
    {
      "epoch": 23.88,
      "eval_ag_news_accuracy": 0.32234375,
      "eval_ag_news_bleu_score": 4.616771394251265,
      "eval_ag_news_bleu_score_sem": 0.14110397244622383,
      "eval_ag_news_emb_cos_sim": 0.8145469427108765,
      "eval_ag_news_emb_cos_sim_sem": 0.0076602694007404555,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5534090995788574,
      "eval_ag_news_n_ngrams_match_1": 14.004,
      "eval_ag_news_n_ngrams_match_2": 3.006,
      "eval_ag_news_n_ngrams_match_3": 0.79,
      "eval_ag_news_num_pred_words": 46.486,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.93220208316065,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3487744780063735,
      "eval_ag_news_runtime": 11.7613,
      "eval_ag_news_samples_per_second": 42.512,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.34724649744661784,
      "eval_ag_news_token_set_f1_sem": 0.004423211016564372,
      "eval_ag_news_token_set_precision": 0.3315374911371905,
      "eval_ag_news_token_set_recall": 0.3822301581506783,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 124375
    },
    {
      "epoch": 23.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.1145,
      "eval_anthropic_toxic_prompts_bleu_score": 3.022738651020741,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11094835640131087,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6775563955307007,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009782226246006309,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.211892604827881,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.188,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.908,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.688,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.204,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.82602764957878,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21259204565192613,
      "eval_anthropic_toxic_prompts_runtime": 9.7955,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.044,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3537173268028458,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006601718683755981,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4370176657236675,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32217223297445924,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 124375
    },
    {
      "epoch": 23.88,
      "eval_arxiv_accuracy": 0.3485625,
      "eval_arxiv_bleu_score": 4.317926980658027,
      "eval_arxiv_bleu_score_sem": 0.13168929178770322,
      "eval_arxiv_emb_cos_sim": 0.7690600752830505,
      "eval_arxiv_emb_cos_sim_sem": 0.006692548782971978,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.393745183944702,
      "eval_arxiv_n_ngrams_match_1": 15.034,
      "eval_arxiv_n_ngrams_match_2": 2.898,
      "eval_arxiv_n_ngrams_match_3": 0.66,
      "eval_arxiv_num_pred_words": 40.74,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.777265031789824,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3554842152864838,
      "eval_arxiv_runtime": 10.212,
      "eval_arxiv_samples_per_second": 48.962,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.35387775035000996,
      "eval_arxiv_token_set_f1_sem": 0.0042370892249581765,
      "eval_arxiv_token_set_precision": 0.3028047192261118,
      "eval_arxiv_token_set_recall": 0.4465013605988915,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 124375
    },
    {
      "epoch": 23.88,
      "eval_python_code_alpaca_accuracy": 0.1601875,
      "eval_python_code_alpaca_bleu_score": 4.572525873858984,
      "eval_python_code_alpaca_bleu_score_sem": 0.14368911937320733,
      "eval_python_code_alpaca_emb_cos_sim": 0.7642950415611267,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007458241116581517,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8737761974334717,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.884,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.918,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.972,
      "eval_python_code_alpaca_num_pred_words": 44.026,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.703744970135237,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33411647224285534,
      "eval_python_code_alpaca_runtime": 9.9246,
      "eval_python_code_alpaca_samples_per_second": 50.38,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4754079026373771,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005461751353869362,
      "eval_python_code_alpaca_token_set_precision": 0.5384331032914961,
      "eval_python_code_alpaca_token_set_recall": 0.4463353592644423,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 124375
    },
    {
      "epoch": 23.88,
      "eval_wikibio_accuracy": 0.32553125,
      "eval_wikibio_bleu_score": 6.029396731199813,
      "eval_wikibio_bleu_score_sem": 0.22558600661844924,
      "eval_wikibio_emb_cos_sim": 0.745415210723877,
      "eval_wikibio_emb_cos_sim_sem": 0.00891533669494408,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.705982208251953,
      "eval_wikibio_n_ngrams_match_1": 10.118,
      "eval_wikibio_n_ngrams_match_2": 3.346,
      "eval_wikibio_n_ngrams_match_3": 1.212,
      "eval_wikibio_num_pred_words": 36.508,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.689993743439125,
      "eval_wikibio_pred_num_tokens": 62.9765625,
      "eval_wikibio_rouge_score": 0.35712811986607584,
      "eval_wikibio_runtime": 9.8765,
      "eval_wikibio_samples_per_second": 50.625,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.32024821143729626,
      "eval_wikibio_token_set_f1_sem": 0.005385370485516734,
      "eval_wikibio_token_set_precision": 0.3303363399386787,
      "eval_wikibio_token_set_recall": 0.32362681929192155,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 124375
    },
    {
      "epoch": 23.88,
      "eval_nq_accuracy": 0.52834375,
      "eval_nq_bleu_score": 11.565532244966526,
      "eval_nq_bleu_score_sem": 0.477540407621757,
      "eval_nq_emb_cos_sim": 0.8256431221961975,
      "eval_nq_emb_cos_sim_sem": 0.0073839790280498295,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1905767917633057,
      "eval_nq_n_ngrams_match_1": 22.94,
      "eval_nq_n_ngrams_match_2": 8.388,
      "eval_nq_n_ngrams_match_3": 3.882,
      "eval_nq_num_pred_words": 49.17,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.940368358634883,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4452944172525074,
      "eval_nq_runtime": 10.3096,
      "eval_nq_samples_per_second": 48.498,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4601880192052505,
      "eval_nq_token_set_f1_sem": 0.00493523955955324,
      "eval_nq_token_set_precision": 0.4178062410098839,
      "eval_nq_token_set_recall": 0.5191908223833593,
      "eval_nq_true_num_tokens": 64.0,
      "step": 124375
    },
    {
      "epoch": 23.88,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 124380
    },
    {
      "epoch": 23.88,
      "learning_rate": 0.001,
      "loss": 2.5604,
      "step": 124392
    },
    {
      "epoch": 23.89,
      "learning_rate": 0.001,
      "loss": 2.5591,
      "step": 124404
    },
    {
      "epoch": 23.89,
      "learning_rate": 0.001,
      "loss": 2.5682,
      "step": 124416
    },
    {
      "epoch": 23.89,
      "learning_rate": 0.001,
      "loss": 2.5727,
      "step": 124428
    },
    {
      "epoch": 23.89,
      "learning_rate": 0.001,
      "loss": 2.5738,
      "step": 124440
    },
    {
      "epoch": 23.9,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 124452
    },
    {
      "epoch": 23.9,
      "learning_rate": 0.001,
      "loss": 2.5638,
      "step": 124464
    },
    {
      "epoch": 23.9,
      "learning_rate": 0.001,
      "loss": 2.563,
      "step": 124476
    },
    {
      "epoch": 23.9,
      "learning_rate": 0.001,
      "loss": 2.5683,
      "step": 124488
    },
    {
      "epoch": 23.91,
      "learning_rate": 0.001,
      "loss": 2.569,
      "step": 124500
    },
    {
      "epoch": 23.91,
      "learning_rate": 0.001,
      "loss": 2.5746,
      "step": 124512
    },
    {
      "epoch": 23.91,
      "learning_rate": 0.001,
      "loss": 2.5711,
      "step": 124524
    },
    {
      "epoch": 23.91,
      "learning_rate": 0.001,
      "loss": 2.5689,
      "step": 124536
    },
    {
      "epoch": 23.91,
      "learning_rate": 0.001,
      "loss": 2.5742,
      "step": 124548
    },
    {
      "epoch": 23.92,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 124560
    },
    {
      "epoch": 23.92,
      "learning_rate": 0.001,
      "loss": 2.5727,
      "step": 124572
    },
    {
      "epoch": 23.92,
      "learning_rate": 0.001,
      "loss": 2.5565,
      "step": 124584
    },
    {
      "epoch": 23.92,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 124596
    },
    {
      "epoch": 23.93,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 124608
    },
    {
      "epoch": 23.93,
      "learning_rate": 0.001,
      "loss": 2.5619,
      "step": 124620
    },
    {
      "epoch": 23.93,
      "learning_rate": 0.001,
      "loss": 2.5582,
      "step": 124632
    },
    {
      "epoch": 23.93,
      "learning_rate": 0.001,
      "loss": 2.567,
      "step": 124644
    },
    {
      "epoch": 23.94,
      "learning_rate": 0.001,
      "loss": 2.5537,
      "step": 124656
    },
    {
      "epoch": 23.94,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 124668
    },
    {
      "epoch": 23.94,
      "learning_rate": 0.001,
      "loss": 2.5627,
      "step": 124680
    },
    {
      "epoch": 23.94,
      "learning_rate": 0.001,
      "loss": 2.5539,
      "step": 124692
    },
    {
      "epoch": 23.94,
      "learning_rate": 0.001,
      "loss": 2.5644,
      "step": 124704
    },
    {
      "epoch": 23.95,
      "learning_rate": 0.001,
      "loss": 2.5704,
      "step": 124716
    },
    {
      "epoch": 23.95,
      "learning_rate": 0.001,
      "loss": 2.5654,
      "step": 124728
    },
    {
      "epoch": 23.95,
      "learning_rate": 0.001,
      "loss": 2.5584,
      "step": 124740
    },
    {
      "epoch": 23.95,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 124752
    },
    {
      "epoch": 23.96,
      "learning_rate": 0.001,
      "loss": 2.5751,
      "step": 124764
    },
    {
      "epoch": 23.96,
      "learning_rate": 0.001,
      "loss": 2.5751,
      "step": 124776
    },
    {
      "epoch": 23.96,
      "learning_rate": 0.001,
      "loss": 2.5658,
      "step": 124788
    },
    {
      "epoch": 23.96,
      "learning_rate": 0.001,
      "loss": 2.5625,
      "step": 124800
    },
    {
      "epoch": 23.97,
      "learning_rate": 0.001,
      "loss": 2.575,
      "step": 124812
    },
    {
      "epoch": 23.97,
      "learning_rate": 0.001,
      "loss": 2.5581,
      "step": 124824
    },
    {
      "epoch": 23.97,
      "learning_rate": 0.001,
      "loss": 2.5606,
      "step": 124836
    },
    {
      "epoch": 23.97,
      "learning_rate": 0.001,
      "loss": 2.5639,
      "step": 124848
    },
    {
      "epoch": 23.97,
      "learning_rate": 0.001,
      "loss": 2.5748,
      "step": 124860
    },
    {
      "epoch": 23.98,
      "learning_rate": 0.001,
      "loss": 2.5665,
      "step": 124872
    },
    {
      "epoch": 23.98,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 124884
    },
    {
      "epoch": 23.98,
      "learning_rate": 0.001,
      "loss": 2.5679,
      "step": 124896
    },
    {
      "epoch": 23.98,
      "learning_rate": 0.001,
      "loss": 2.5649,
      "step": 124908
    },
    {
      "epoch": 23.99,
      "learning_rate": 0.001,
      "loss": 2.5701,
      "step": 124920
    },
    {
      "epoch": 23.99,
      "learning_rate": 0.001,
      "loss": 2.5732,
      "step": 124932
    },
    {
      "epoch": 23.99,
      "learning_rate": 0.001,
      "loss": 2.5717,
      "step": 124944
    },
    {
      "epoch": 23.99,
      "learning_rate": 0.001,
      "loss": 2.5672,
      "step": 124956
    },
    {
      "epoch": 24.0,
      "learning_rate": 0.001,
      "loss": 2.5645,
      "step": 124968
    },
    {
      "epoch": 24.0,
      "learning_rate": 0.001,
      "loss": 2.5644,
      "step": 124980
    },
    {
      "epoch": 24.0,
      "learning_rate": 0.001,
      "loss": 2.5666,
      "step": 124992
    },
    {
      "epoch": 24.0,
      "eval_ag_news_accuracy": 0.32259375,
      "eval_ag_news_bleu_score": 4.842368892698552,
      "eval_ag_news_bleu_score_sem": 0.1488780661338448,
      "eval_ag_news_emb_cos_sim": 0.8147686719894409,
      "eval_ag_news_emb_cos_sim_sem": 0.006900436796162947,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.559001922607422,
      "eval_ag_news_n_ngrams_match_1": 14.002,
      "eval_ag_news_n_ngrams_match_2": 3.146,
      "eval_ag_news_n_ngrams_match_3": 0.862,
      "eval_ag_news_num_pred_words": 46.428,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.128119061218506,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3476293266622733,
      "eval_ag_news_runtime": 10.414,
      "eval_ag_news_samples_per_second": 48.012,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35248336170187944,
      "eval_ag_news_token_set_f1_sem": 0.004469338978748432,
      "eval_ag_news_token_set_precision": 0.3357517603114475,
      "eval_ag_news_token_set_recall": 0.38874662374982394,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 125000
    },
    {
      "epoch": 24.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.1151875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0859499989740744,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10905266666044433,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6744695901870728,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009016824989069298,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2217483520507812,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.196,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.93,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.69,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.306,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.071916421050688,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21347986523836338,
      "eval_anthropic_toxic_prompts_runtime": 10.1658,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.184,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3619602511688941,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006730535266363549,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.436739302416979,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3369953895690069,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 125000
    },
    {
      "epoch": 24.0,
      "eval_arxiv_accuracy": 0.348,
      "eval_arxiv_bleu_score": 4.24697386896977,
      "eval_arxiv_bleu_score_sem": 0.12353276395941543,
      "eval_arxiv_emb_cos_sim": 0.7643498182296753,
      "eval_arxiv_emb_cos_sim_sem": 0.006785813831826014,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4126248359680176,
      "eval_arxiv_n_ngrams_match_1": 14.73,
      "eval_arxiv_n_ngrams_match_2": 2.87,
      "eval_arxiv_n_ngrams_match_3": 0.644,
      "eval_arxiv_num_pred_words": 39.556,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.344789912587892,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3547536572828171,
      "eval_arxiv_runtime": 10.7939,
      "eval_arxiv_samples_per_second": 46.322,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.34956129651660545,
      "eval_arxiv_token_set_f1_sem": 0.004269246417330083,
      "eval_arxiv_token_set_precision": 0.29812225878968807,
      "eval_arxiv_token_set_recall": 0.4441866052858707,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 125000
    },
    {
      "epoch": 24.0,
      "eval_python_code_alpaca_accuracy": 0.1606875,
      "eval_python_code_alpaca_bleu_score": 4.505426017777299,
      "eval_python_code_alpaca_bleu_score_sem": 0.14411098878649153,
      "eval_python_code_alpaca_emb_cos_sim": 0.7475378513336182,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0115773441592378,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8584771156311035,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.712,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.864,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.956,
      "eval_python_code_alpaca_num_pred_words": 43.454,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.434955288060664,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3282754389076918,
      "eval_python_code_alpaca_runtime": 10.0518,
      "eval_python_code_alpaca_samples_per_second": 49.742,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.47499117136433716,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006080313457766385,
      "eval_python_code_alpaca_token_set_precision": 0.5285453966285859,
      "eval_python_code_alpaca_token_set_recall": 0.4507578398211699,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 125000
    },
    {
      "epoch": 24.0,
      "eval_wikibio_accuracy": 0.319125,
      "eval_wikibio_bleu_score": 5.8327788162543035,
      "eval_wikibio_bleu_score_sem": 0.21357066509556064,
      "eval_wikibio_emb_cos_sim": 0.7230526208877563,
      "eval_wikibio_emb_cos_sim_sem": 0.01157876134701898,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.780616521835327,
      "eval_wikibio_n_ngrams_match_1": 10.08,
      "eval_wikibio_n_ngrams_match_2": 3.404,
      "eval_wikibio_n_ngrams_match_3": 1.224,
      "eval_wikibio_num_pred_words": 36.83,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 43.84306361097375,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35287446014234436,
      "eval_wikibio_runtime": 9.9376,
      "eval_wikibio_samples_per_second": 50.314,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.3160235209487705,
      "eval_wikibio_token_set_f1_sem": 0.005651608281351785,
      "eval_wikibio_token_set_precision": 0.326063145200404,
      "eval_wikibio_token_set_recall": 0.3222274321290726,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 125000
    },
    {
      "epoch": 24.0,
      "eval_nq_accuracy": 0.526,
      "eval_nq_bleu_score": 11.934141509486224,
      "eval_nq_bleu_score_sem": 0.48057588393319084,
      "eval_nq_emb_cos_sim": 0.8391439914703369,
      "eval_nq_emb_cos_sim_sem": 0.006317563661958997,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1919448375701904,
      "eval_nq_n_ngrams_match_1": 23.008,
      "eval_nq_n_ngrams_match_2": 8.51,
      "eval_nq_n_ngrams_match_3": 4.036,
      "eval_nq_num_pred_words": 48.806,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.95260756206653,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4488428481016429,
      "eval_nq_runtime": 10.3918,
      "eval_nq_samples_per_second": 48.115,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46076825900826884,
      "eval_nq_token_set_f1_sem": 0.004848203609598437,
      "eval_nq_token_set_precision": 0.4170604278959615,
      "eval_nq_token_set_recall": 0.5216247249282614,
      "eval_nq_true_num_tokens": 64.0,
      "step": 125000
    },
    {
      "epoch": 24.0,
      "learning_rate": 0.001,
      "loss": 2.5511,
      "step": 125004
    },
    {
      "epoch": 24.0,
      "learning_rate": 0.001,
      "loss": 2.5528,
      "step": 125016
    },
    {
      "epoch": 24.01,
      "learning_rate": 0.001,
      "loss": 2.5507,
      "step": 125028
    },
    {
      "epoch": 24.01,
      "learning_rate": 0.001,
      "loss": 2.5555,
      "step": 125040
    },
    {
      "epoch": 24.01,
      "learning_rate": 0.001,
      "loss": 2.5484,
      "step": 125052
    },
    {
      "epoch": 24.01,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 125064
    },
    {
      "epoch": 24.02,
      "learning_rate": 0.001,
      "loss": 2.5504,
      "step": 125076
    },
    {
      "epoch": 24.02,
      "learning_rate": 0.001,
      "loss": 2.5529,
      "step": 125088
    },
    {
      "epoch": 24.02,
      "learning_rate": 0.001,
      "loss": 2.5475,
      "step": 125100
    },
    {
      "epoch": 24.02,
      "learning_rate": 0.001,
      "loss": 2.554,
      "step": 125112
    },
    {
      "epoch": 24.03,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 125124
    },
    {
      "epoch": 24.03,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 125136
    },
    {
      "epoch": 24.03,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 125148
    },
    {
      "epoch": 24.03,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 125160
    },
    {
      "epoch": 24.03,
      "learning_rate": 0.001,
      "loss": 2.5555,
      "step": 125172
    },
    {
      "epoch": 24.04,
      "learning_rate": 0.001,
      "loss": 2.5669,
      "step": 125184
    },
    {
      "epoch": 24.04,
      "learning_rate": 0.001,
      "loss": 2.5535,
      "step": 125196
    },
    {
      "epoch": 24.04,
      "learning_rate": 0.001,
      "loss": 2.5501,
      "step": 125208
    },
    {
      "epoch": 24.04,
      "learning_rate": 0.001,
      "loss": 2.5535,
      "step": 125220
    },
    {
      "epoch": 24.05,
      "learning_rate": 0.001,
      "loss": 2.5534,
      "step": 125232
    },
    {
      "epoch": 24.05,
      "learning_rate": 0.001,
      "loss": 2.5501,
      "step": 125244
    },
    {
      "epoch": 24.05,
      "learning_rate": 0.001,
      "loss": 2.5645,
      "step": 125256
    },
    {
      "epoch": 24.05,
      "learning_rate": 0.001,
      "loss": 2.5464,
      "step": 125268
    },
    {
      "epoch": 24.06,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 125280
    },
    {
      "epoch": 24.06,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 125292
    },
    {
      "epoch": 24.06,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 125304
    },
    {
      "epoch": 24.06,
      "learning_rate": 0.001,
      "loss": 2.5545,
      "step": 125316
    },
    {
      "epoch": 24.06,
      "learning_rate": 0.001,
      "loss": 2.5599,
      "step": 125328
    },
    {
      "epoch": 24.07,
      "learning_rate": 0.001,
      "loss": 2.5502,
      "step": 125340
    },
    {
      "epoch": 24.07,
      "learning_rate": 0.001,
      "loss": 2.5468,
      "step": 125352
    },
    {
      "epoch": 24.07,
      "learning_rate": 0.001,
      "loss": 2.5612,
      "step": 125364
    },
    {
      "epoch": 24.07,
      "learning_rate": 0.001,
      "loss": 2.5554,
      "step": 125376
    },
    {
      "epoch": 24.08,
      "learning_rate": 0.001,
      "loss": 2.5581,
      "step": 125388
    },
    {
      "epoch": 24.08,
      "learning_rate": 0.001,
      "loss": 2.5493,
      "step": 125400
    },
    {
      "epoch": 24.08,
      "learning_rate": 0.001,
      "loss": 2.5549,
      "step": 125412
    },
    {
      "epoch": 24.08,
      "learning_rate": 0.001,
      "loss": 2.5563,
      "step": 125424
    },
    {
      "epoch": 24.09,
      "learning_rate": 0.001,
      "loss": 2.5495,
      "step": 125436
    },
    {
      "epoch": 24.09,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 125448
    },
    {
      "epoch": 24.09,
      "learning_rate": 0.001,
      "loss": 2.5608,
      "step": 125460
    },
    {
      "epoch": 24.09,
      "learning_rate": 0.001,
      "loss": 2.5598,
      "step": 125472
    },
    {
      "epoch": 24.09,
      "learning_rate": 0.001,
      "loss": 2.545,
      "step": 125484
    },
    {
      "epoch": 24.1,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 125496
    },
    {
      "epoch": 24.1,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 125508
    },
    {
      "epoch": 24.1,
      "learning_rate": 0.001,
      "loss": 2.557,
      "step": 125520
    },
    {
      "epoch": 24.1,
      "learning_rate": 0.001,
      "loss": 2.5501,
      "step": 125532
    },
    {
      "epoch": 24.11,
      "learning_rate": 0.001,
      "loss": 2.5575,
      "step": 125544
    },
    {
      "epoch": 24.11,
      "learning_rate": 0.001,
      "loss": 2.5653,
      "step": 125556
    },
    {
      "epoch": 24.11,
      "learning_rate": 0.001,
      "loss": 2.5526,
      "step": 125568
    },
    {
      "epoch": 24.11,
      "learning_rate": 0.001,
      "loss": 2.5608,
      "step": 125580
    },
    {
      "epoch": 24.12,
      "learning_rate": 0.001,
      "loss": 2.5504,
      "step": 125592
    },
    {
      "epoch": 24.12,
      "learning_rate": 0.001,
      "loss": 2.5412,
      "step": 125604
    },
    {
      "epoch": 24.12,
      "learning_rate": 0.001,
      "loss": 2.5538,
      "step": 125616
    },
    {
      "epoch": 24.12,
      "eval_ag_news_accuracy": 0.32103125,
      "eval_ag_news_bleu_score": 4.784798295141622,
      "eval_ag_news_bleu_score_sem": 0.15427868540256148,
      "eval_ag_news_emb_cos_sim": 0.809669017791748,
      "eval_ag_news_emb_cos_sim_sem": 0.006527537944151255,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.568237543106079,
      "eval_ag_news_n_ngrams_match_1": 13.996,
      "eval_ag_news_n_ngrams_match_2": 3.092,
      "eval_ag_news_n_ngrams_match_3": 0.882,
      "eval_ag_news_num_pred_words": 46.874,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.45405181657247,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3442876372138963,
      "eval_ag_news_runtime": 10.5068,
      "eval_ag_news_samples_per_second": 47.588,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.34793921692200586,
      "eval_ag_news_token_set_f1_sem": 0.004441752454298163,
      "eval_ag_news_token_set_precision": 0.33362705228095707,
      "eval_ag_news_token_set_recall": 0.38219241731187903,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 125625
    },
    {
      "epoch": 24.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.11428125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1322644439317546,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11562268752709737,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6842227578163147,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008113772124895707,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2619502544403076,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.25,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.95,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.448,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.10038993982719,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2141068707903946,
      "eval_anthropic_toxic_prompts_runtime": 10.0407,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.798,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3561577536101551,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006522907534362738,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4431010390258805,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3242378106576544,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 125625
    },
    {
      "epoch": 24.12,
      "eval_arxiv_accuracy": 0.3483125,
      "eval_arxiv_bleu_score": 4.498381604918457,
      "eval_arxiv_bleu_score_sem": 0.13068081532478143,
      "eval_arxiv_emb_cos_sim": 0.768142580986023,
      "eval_arxiv_emb_cos_sim_sem": 0.007198875996290555,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4143869876861572,
      "eval_arxiv_n_ngrams_match_1": 15.354,
      "eval_arxiv_n_ngrams_match_2": 3.072,
      "eval_arxiv_n_ngrams_match_3": 0.724,
      "eval_arxiv_num_pred_words": 41.068,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.398309176951866,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36447736348630155,
      "eval_arxiv_runtime": 10.26,
      "eval_arxiv_samples_per_second": 48.733,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.36006858998921565,
      "eval_arxiv_token_set_f1_sem": 0.0041938916836464805,
      "eval_arxiv_token_set_precision": 0.3108529444811535,
      "eval_arxiv_token_set_recall": 0.44454270881214014,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 125625
    },
    {
      "epoch": 24.12,
      "eval_python_code_alpaca_accuracy": 0.16021875,
      "eval_python_code_alpaca_bleu_score": 4.505963201119526,
      "eval_python_code_alpaca_bleu_score_sem": 0.13186304346315086,
      "eval_python_code_alpaca_emb_cos_sim": 0.7627312541007996,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00802290022678368,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9129910469055176,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.9,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.914,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.912,
      "eval_python_code_alpaca_num_pred_words": 42.998,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.411786807775307,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3383525506797753,
      "eval_python_code_alpaca_runtime": 9.655,
      "eval_python_code_alpaca_samples_per_second": 51.787,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.4830843485849761,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005552455657271146,
      "eval_python_code_alpaca_token_set_precision": 0.5403298221735762,
      "eval_python_code_alpaca_token_set_recall": 0.4588261703302595,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 125625
    },
    {
      "epoch": 24.12,
      "eval_wikibio_accuracy": 0.32375,
      "eval_wikibio_bleu_score": 6.29947775891366,
      "eval_wikibio_bleu_score_sem": 0.23459476458042533,
      "eval_wikibio_emb_cos_sim": 0.7467377185821533,
      "eval_wikibio_emb_cos_sim_sem": 0.008959534909597342,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7882604598999023,
      "eval_wikibio_n_ngrams_match_1": 10.436,
      "eval_wikibio_n_ngrams_match_2": 3.57,
      "eval_wikibio_n_ngrams_match_3": 1.36,
      "eval_wikibio_num_pred_words": 37.06,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 44.17948141413601,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.363930063425463,
      "eval_wikibio_runtime": 10.4021,
      "eval_wikibio_samples_per_second": 48.067,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.3277827524552823,
      "eval_wikibio_token_set_f1_sem": 0.004963359708103471,
      "eval_wikibio_token_set_precision": 0.3390005311193062,
      "eval_wikibio_token_set_recall": 0.3305002920753566,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 125625
    },
    {
      "epoch": 24.12,
      "eval_nq_accuracy": 0.5286875,
      "eval_nq_bleu_score": 11.848439929969501,
      "eval_nq_bleu_score_sem": 0.482556589758612,
      "eval_nq_emb_cos_sim": 0.8347615003585815,
      "eval_nq_emb_cos_sim_sem": 0.00660855538224583,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1916778087615967,
      "eval_nq_n_ngrams_match_1": 23.158,
      "eval_nq_n_ngrams_match_2": 8.594,
      "eval_nq_n_ngrams_match_3": 3.97,
      "eval_nq_num_pred_words": 49.124,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.950217277087102,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44821869646121165,
      "eval_nq_runtime": 11.3716,
      "eval_nq_samples_per_second": 43.969,
      "eval_nq_steps_per_second": 0.088,
      "eval_nq_token_set_f1": 0.462282850068921,
      "eval_nq_token_set_f1_sem": 0.004955139364647121,
      "eval_nq_token_set_precision": 0.42033641053801074,
      "eval_nq_token_set_recall": 0.5212534612496856,
      "eval_nq_true_num_tokens": 64.0,
      "step": 125625
    },
    {
      "epoch": 24.12,
      "learning_rate": 0.001,
      "loss": 2.5563,
      "step": 125628
    },
    {
      "epoch": 24.12,
      "learning_rate": 0.001,
      "loss": 2.5562,
      "step": 125640
    },
    {
      "epoch": 24.13,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 125652
    },
    {
      "epoch": 24.13,
      "learning_rate": 0.001,
      "loss": 2.5601,
      "step": 125664
    },
    {
      "epoch": 24.13,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 125676
    },
    {
      "epoch": 24.13,
      "learning_rate": 0.001,
      "loss": 2.559,
      "step": 125688
    },
    {
      "epoch": 24.14,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 125700
    },
    {
      "epoch": 24.14,
      "learning_rate": 0.001,
      "loss": 2.5583,
      "step": 125712
    },
    {
      "epoch": 24.14,
      "learning_rate": 0.001,
      "loss": 2.5508,
      "step": 125724
    },
    {
      "epoch": 24.14,
      "learning_rate": 0.001,
      "loss": 2.5603,
      "step": 125736
    },
    {
      "epoch": 24.15,
      "learning_rate": 0.001,
      "loss": 2.5611,
      "step": 125748
    },
    {
      "epoch": 24.15,
      "learning_rate": 0.001,
      "loss": 2.5584,
      "step": 125760
    },
    {
      "epoch": 24.15,
      "learning_rate": 0.001,
      "loss": 2.5513,
      "step": 125772
    },
    {
      "epoch": 24.15,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 125784
    },
    {
      "epoch": 24.15,
      "learning_rate": 0.001,
      "loss": 2.5501,
      "step": 125796
    },
    {
      "epoch": 24.16,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 125808
    },
    {
      "epoch": 24.16,
      "learning_rate": 0.001,
      "loss": 2.5583,
      "step": 125820
    },
    {
      "epoch": 24.16,
      "learning_rate": 0.001,
      "loss": 2.5551,
      "step": 125832
    },
    {
      "epoch": 24.16,
      "learning_rate": 0.001,
      "loss": 2.5508,
      "step": 125844
    },
    {
      "epoch": 24.17,
      "learning_rate": 0.001,
      "loss": 2.5529,
      "step": 125856
    },
    {
      "epoch": 24.17,
      "learning_rate": 0.001,
      "loss": 2.561,
      "step": 125868
    },
    {
      "epoch": 24.17,
      "learning_rate": 0.001,
      "loss": 2.5617,
      "step": 125880
    },
    {
      "epoch": 24.17,
      "learning_rate": 0.001,
      "loss": 2.5544,
      "step": 125892
    },
    {
      "epoch": 24.18,
      "learning_rate": 0.001,
      "loss": 2.5477,
      "step": 125904
    },
    {
      "epoch": 24.18,
      "learning_rate": 0.001,
      "loss": 2.5626,
      "step": 125916
    },
    {
      "epoch": 24.18,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 125928
    },
    {
      "epoch": 24.18,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 125940
    },
    {
      "epoch": 24.18,
      "learning_rate": 0.001,
      "loss": 2.5596,
      "step": 125952
    },
    {
      "epoch": 24.19,
      "learning_rate": 0.001,
      "loss": 2.554,
      "step": 125964
    },
    {
      "epoch": 24.19,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 125976
    },
    {
      "epoch": 24.19,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 125988
    },
    {
      "epoch": 24.19,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 126000
    },
    {
      "epoch": 24.2,
      "learning_rate": 0.001,
      "loss": 2.5644,
      "step": 126012
    },
    {
      "epoch": 24.2,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 126024
    },
    {
      "epoch": 24.2,
      "learning_rate": 0.001,
      "loss": 2.548,
      "step": 126036
    },
    {
      "epoch": 24.2,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 126048
    },
    {
      "epoch": 24.21,
      "learning_rate": 0.001,
      "loss": 2.5527,
      "step": 126060
    },
    {
      "epoch": 24.21,
      "learning_rate": 0.001,
      "loss": 2.5403,
      "step": 126072
    },
    {
      "epoch": 24.21,
      "learning_rate": 0.001,
      "loss": 2.5546,
      "step": 126084
    },
    {
      "epoch": 24.21,
      "learning_rate": 0.001,
      "loss": 2.5591,
      "step": 126096
    },
    {
      "epoch": 24.21,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 126108
    },
    {
      "epoch": 24.22,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 126120
    },
    {
      "epoch": 24.22,
      "learning_rate": 0.001,
      "loss": 2.5528,
      "step": 126132
    },
    {
      "epoch": 24.22,
      "learning_rate": 0.001,
      "loss": 2.5662,
      "step": 126144
    },
    {
      "epoch": 24.22,
      "learning_rate": 0.001,
      "loss": 2.5569,
      "step": 126156
    },
    {
      "epoch": 24.23,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 126168
    },
    {
      "epoch": 24.23,
      "learning_rate": 0.001,
      "loss": 2.556,
      "step": 126180
    },
    {
      "epoch": 24.23,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 126192
    },
    {
      "epoch": 24.23,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 126204
    },
    {
      "epoch": 24.24,
      "learning_rate": 0.001,
      "loss": 2.56,
      "step": 126216
    },
    {
      "epoch": 24.24,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 126228
    },
    {
      "epoch": 24.24,
      "learning_rate": 0.001,
      "loss": 2.556,
      "step": 126240
    },
    {
      "epoch": 24.24,
      "eval_ag_news_accuracy": 0.320375,
      "eval_ag_news_bleu_score": 4.775344895117515,
      "eval_ag_news_bleu_score_sem": 0.1503726080535789,
      "eval_ag_news_emb_cos_sim": 0.813957929611206,
      "eval_ag_news_emb_cos_sim_sem": 0.006901063071990038,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.56335186958313,
      "eval_ag_news_n_ngrams_match_1": 14.102,
      "eval_ag_news_n_ngrams_match_2": 3.164,
      "eval_ag_news_n_ngrams_match_3": 0.908,
      "eval_ag_news_num_pred_words": 47.192,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.281257346728154,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34860669216545304,
      "eval_ag_news_runtime": 10.2215,
      "eval_ag_news_samples_per_second": 48.917,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3508809194480405,
      "eval_ag_news_token_set_f1_sem": 0.004608580168012796,
      "eval_ag_news_token_set_precision": 0.3370039889105722,
      "eval_ag_news_token_set_recall": 0.38289193839935337,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 126250
    },
    {
      "epoch": 24.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.1155625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.183296997527721,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11751172369579264,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6759467720985413,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009322279663840447,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2364230155944824,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.258,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.458,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.442551177908204,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21737512716610657,
      "eval_anthropic_toxic_prompts_runtime": 10.5639,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.331,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35706090076941505,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006248248098035702,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44598438796709156,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.322629626383217,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 126250
    },
    {
      "epoch": 24.24,
      "eval_arxiv_accuracy": 0.34590625,
      "eval_arxiv_bleu_score": 4.370220563565557,
      "eval_arxiv_bleu_score_sem": 0.12244450527849082,
      "eval_arxiv_emb_cos_sim": 0.7648048400878906,
      "eval_arxiv_emb_cos_sim_sem": 0.007851811680698786,
      "eval_arxiv_emb_top1_equal": 0.21875,
      "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.40950608253479,
      "eval_arxiv_n_ngrams_match_1": 15.444,
      "eval_arxiv_n_ngrams_match_2": 3.028,
      "eval_arxiv_n_ngrams_match_3": 0.646,
      "eval_arxiv_num_pred_words": 40.934,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.250299417734496,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36743317306610207,
      "eval_arxiv_runtime": 10.3059,
      "eval_arxiv_samples_per_second": 48.516,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3612732390786656,
      "eval_arxiv_token_set_f1_sem": 0.0041916572078898855,
      "eval_arxiv_token_set_precision": 0.31239754699987654,
      "eval_arxiv_token_set_recall": 0.4463468093924475,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 126250
    },
    {
      "epoch": 24.24,
      "eval_python_code_alpaca_accuracy": 0.1619375,
      "eval_python_code_alpaca_bleu_score": 4.515389946211663,
      "eval_python_code_alpaca_bleu_score_sem": 0.14363464753390234,
      "eval_python_code_alpaca_emb_cos_sim": 0.7711158990859985,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007478067257538845,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8851685523986816,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.818,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.838,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.942,
      "eval_python_code_alpaca_num_pred_words": 44.356,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.90658553911693,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33062087935042067,
      "eval_python_code_alpaca_runtime": 10.0448,
      "eval_python_code_alpaca_samples_per_second": 49.777,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4738106631009328,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005270960318232269,
      "eval_python_code_alpaca_token_set_precision": 0.5344894127527231,
      "eval_python_code_alpaca_token_set_recall": 0.4441386550305618,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 126250
    },
    {
      "epoch": 24.24,
      "eval_wikibio_accuracy": 0.31928125,
      "eval_wikibio_bleu_score": 6.161294640055921,
      "eval_wikibio_bleu_score_sem": 0.22319173794094715,
      "eval_wikibio_emb_cos_sim": 0.7410797476768494,
      "eval_wikibio_emb_cos_sim_sem": 0.00849212174979997,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7522311210632324,
      "eval_wikibio_n_ngrams_match_1": 10.232,
      "eval_wikibio_n_ngrams_match_2": 3.546,
      "eval_wikibio_n_ngrams_match_3": 1.31,
      "eval_wikibio_num_pred_words": 36.764,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.61605759336966,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3602616735370881,
      "eval_wikibio_runtime": 10.0047,
      "eval_wikibio_samples_per_second": 49.976,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.32419194600123336,
      "eval_wikibio_token_set_f1_sem": 0.005199649131062626,
      "eval_wikibio_token_set_precision": 0.3324473577542092,
      "eval_wikibio_token_set_recall": 0.33253550773248336,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 126250
    },
    {
      "epoch": 24.24,
      "eval_nq_accuracy": 0.52671875,
      "eval_nq_bleu_score": 11.72408680291379,
      "eval_nq_bleu_score_sem": 0.48437731077461954,
      "eval_nq_emb_cos_sim": 0.8301180005073547,
      "eval_nq_emb_cos_sim_sem": 0.006678577102690616,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.191635847091675,
      "eval_nq_n_ngrams_match_1": 23.17,
      "eval_nq_n_ngrams_match_2": 8.48,
      "eval_nq_n_ngrams_match_3": 3.878,
      "eval_nq_num_pred_words": 49.504,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.949841718903572,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4478242322044885,
      "eval_nq_runtime": 11.5305,
      "eval_nq_samples_per_second": 43.363,
      "eval_nq_steps_per_second": 0.087,
      "eval_nq_token_set_f1": 0.4605346009442573,
      "eval_nq_token_set_f1_sem": 0.004921763449679784,
      "eval_nq_token_set_precision": 0.42052989951206965,
      "eval_nq_token_set_recall": 0.5158344925083073,
      "eval_nq_true_num_tokens": 64.0,
      "step": 126250
    },
    {
      "epoch": 24.24,
      "learning_rate": 0.001,
      "loss": 2.5555,
      "step": 126252
    },
    {
      "epoch": 24.24,
      "learning_rate": 0.001,
      "loss": 2.5494,
      "step": 126264
    },
    {
      "epoch": 24.25,
      "learning_rate": 0.001,
      "loss": 2.5587,
      "step": 126276
    },
    {
      "epoch": 24.25,
      "learning_rate": 0.001,
      "loss": 2.563,
      "step": 126288
    },
    {
      "epoch": 24.25,
      "learning_rate": 0.001,
      "loss": 2.5574,
      "step": 126300
    },
    {
      "epoch": 24.25,
      "learning_rate": 0.001,
      "loss": 2.5598,
      "step": 126312
    },
    {
      "epoch": 24.26,
      "learning_rate": 0.001,
      "loss": 2.557,
      "step": 126324
    },
    {
      "epoch": 24.26,
      "learning_rate": 0.001,
      "loss": 2.5709,
      "step": 126336
    },
    {
      "epoch": 24.26,
      "learning_rate": 0.001,
      "loss": 2.5587,
      "step": 126348
    },
    {
      "epoch": 24.26,
      "learning_rate": 0.001,
      "loss": 2.5574,
      "step": 126360
    },
    {
      "epoch": 24.26,
      "learning_rate": 0.001,
      "loss": 2.5513,
      "step": 126372
    },
    {
      "epoch": 24.27,
      "learning_rate": 0.001,
      "loss": 2.5608,
      "step": 126384
    },
    {
      "epoch": 24.27,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 126396
    },
    {
      "epoch": 24.27,
      "learning_rate": 0.001,
      "loss": 2.5539,
      "step": 126408
    },
    {
      "epoch": 24.27,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 126420
    },
    {
      "epoch": 24.28,
      "learning_rate": 0.001,
      "loss": 2.5554,
      "step": 126432
    },
    {
      "epoch": 24.28,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 126444
    },
    {
      "epoch": 24.28,
      "learning_rate": 0.001,
      "loss": 2.5624,
      "step": 126456
    },
    {
      "epoch": 24.28,
      "learning_rate": 0.001,
      "loss": 2.5596,
      "step": 126468
    },
    {
      "epoch": 24.29,
      "learning_rate": 0.001,
      "loss": 2.5547,
      "step": 126480
    },
    {
      "epoch": 24.29,
      "learning_rate": 0.001,
      "loss": 2.5608,
      "step": 126492
    },
    {
      "epoch": 24.29,
      "learning_rate": 0.001,
      "loss": 2.5542,
      "step": 126504
    },
    {
      "epoch": 24.29,
      "learning_rate": 0.001,
      "loss": 2.567,
      "step": 126516
    },
    {
      "epoch": 24.29,
      "learning_rate": 0.001,
      "loss": 2.5639,
      "step": 126528
    },
    {
      "epoch": 24.3,
      "learning_rate": 0.001,
      "loss": 2.567,
      "step": 126540
    },
    {
      "epoch": 24.3,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 126552
    },
    {
      "epoch": 24.3,
      "learning_rate": 0.001,
      "loss": 2.557,
      "step": 126564
    },
    {
      "epoch": 24.3,
      "learning_rate": 0.001,
      "loss": 2.5639,
      "step": 126576
    },
    {
      "epoch": 24.31,
      "learning_rate": 0.001,
      "loss": 2.5517,
      "step": 126588
    },
    {
      "epoch": 24.31,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 126600
    },
    {
      "epoch": 24.31,
      "learning_rate": 0.001,
      "loss": 2.5569,
      "step": 126612
    },
    {
      "epoch": 24.31,
      "learning_rate": 0.001,
      "loss": 2.5504,
      "step": 126624
    },
    {
      "epoch": 24.32,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 126636
    },
    {
      "epoch": 24.32,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 126648
    },
    {
      "epoch": 24.32,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 126660
    },
    {
      "epoch": 24.32,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 126672
    },
    {
      "epoch": 24.32,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 126684
    },
    {
      "epoch": 24.33,
      "learning_rate": 0.001,
      "loss": 2.5611,
      "step": 126696
    },
    {
      "epoch": 24.33,
      "learning_rate": 0.001,
      "loss": 2.5607,
      "step": 126708
    },
    {
      "epoch": 24.33,
      "learning_rate": 0.001,
      "loss": 2.5643,
      "step": 126720
    },
    {
      "epoch": 24.33,
      "learning_rate": 0.001,
      "loss": 2.5524,
      "step": 126732
    },
    {
      "epoch": 24.34,
      "learning_rate": 0.001,
      "loss": 2.5363,
      "step": 126744
    },
    {
      "epoch": 24.34,
      "learning_rate": 0.001,
      "loss": 2.5578,
      "step": 126756
    },
    {
      "epoch": 24.34,
      "learning_rate": 0.001,
      "loss": 2.5579,
      "step": 126768
    },
    {
      "epoch": 24.34,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 126780
    },
    {
      "epoch": 24.35,
      "learning_rate": 0.001,
      "loss": 2.561,
      "step": 126792
    },
    {
      "epoch": 24.35,
      "learning_rate": 0.001,
      "loss": 2.5596,
      "step": 126804
    },
    {
      "epoch": 24.35,
      "learning_rate": 0.001,
      "loss": 2.5511,
      "step": 126816
    },
    {
      "epoch": 24.35,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 126828
    },
    {
      "epoch": 24.35,
      "learning_rate": 0.001,
      "loss": 2.5554,
      "step": 126840
    },
    {
      "epoch": 24.36,
      "learning_rate": 0.001,
      "loss": 2.5511,
      "step": 126852
    },
    {
      "epoch": 24.36,
      "learning_rate": 0.001,
      "loss": 2.5623,
      "step": 126864
    },
    {
      "epoch": 24.36,
      "eval_ag_news_accuracy": 0.322625,
      "eval_ag_news_bleu_score": 4.8517890698492945,
      "eval_ag_news_bleu_score_sem": 0.1522933907373371,
      "eval_ag_news_emb_cos_sim": 0.8152967095375061,
      "eval_ag_news_emb_cos_sim_sem": 0.0068136669717911436,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5554556846618652,
      "eval_ag_news_n_ngrams_match_1": 14.124,
      "eval_ag_news_n_ngrams_match_2": 3.106,
      "eval_ag_news_n_ngrams_match_3": 0.896,
      "eval_ag_news_num_pred_words": 46.47,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.003767013741154,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35211771820722176,
      "eval_ag_news_runtime": 10.42,
      "eval_ag_news_samples_per_second": 47.985,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35229496268430777,
      "eval_ag_news_token_set_f1_sem": 0.004466539590259143,
      "eval_ag_news_token_set_precision": 0.3383977395442717,
      "eval_ag_news_token_set_recall": 0.3810278056279813,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 126875
    },
    {
      "epoch": 24.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.1158125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.149471515981467,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1190929971631158,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6771558523178101,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008211656863473242,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.233881950378418,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.258,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.226,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.37798206800531,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21729543515758332,
      "eval_anthropic_toxic_prompts_runtime": 10.1479,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.271,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35704811484097193,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006284680811508397,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4458421497684926,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3231182740517771,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 126875
    },
    {
      "epoch": 24.36,
      "eval_arxiv_accuracy": 0.346875,
      "eval_arxiv_bleu_score": 4.3466297534459635,
      "eval_arxiv_bleu_score_sem": 0.12563718015316847,
      "eval_arxiv_emb_cos_sim": 0.7638865113258362,
      "eval_arxiv_emb_cos_sim_sem": 0.007166739682845506,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4086461067199707,
      "eval_arxiv_n_ngrams_match_1": 15.086,
      "eval_arxiv_n_ngrams_match_2": 2.978,
      "eval_arxiv_n_ngrams_match_3": 0.642,
      "eval_arxiv_num_pred_words": 40.306,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.224296074569896,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36123106928975923,
      "eval_arxiv_runtime": 10.4356,
      "eval_arxiv_samples_per_second": 47.913,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.35605520173792715,
      "eval_arxiv_token_set_f1_sem": 0.004142081506871111,
      "eval_arxiv_token_set_precision": 0.3057803339486993,
      "eval_arxiv_token_set_recall": 0.44311153278851845,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 126875
    },
    {
      "epoch": 24.36,
      "eval_python_code_alpaca_accuracy": 0.160625,
      "eval_python_code_alpaca_bleu_score": 4.30087033516261,
      "eval_python_code_alpaca_bleu_score_sem": 0.13042133780117723,
      "eval_python_code_alpaca_emb_cos_sim": 0.7505527138710022,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00803445307308722,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8764841556549072,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.74,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.774,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.866,
      "eval_python_code_alpaca_num_pred_words": 44.03,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.751750941624103,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3260038877090582,
      "eval_python_code_alpaca_runtime": 10.7463,
      "eval_python_code_alpaca_samples_per_second": 46.528,
      "eval_python_code_alpaca_steps_per_second": 0.093,
      "eval_python_code_alpaca_token_set_f1": 0.47522250597941507,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005458049414288956,
      "eval_python_code_alpaca_token_set_precision": 0.5245003194265568,
      "eval_python_code_alpaca_token_set_recall": 0.46036931571758516,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 126875
    },
    {
      "epoch": 24.36,
      "eval_wikibio_accuracy": 0.3223125,
      "eval_wikibio_bleu_score": 5.974463342667476,
      "eval_wikibio_bleu_score_sem": 0.2150361480818903,
      "eval_wikibio_emb_cos_sim": 0.7507862448692322,
      "eval_wikibio_emb_cos_sim_sem": 0.008929076567558633,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7396738529205322,
      "eval_wikibio_n_ngrams_match_1": 9.938,
      "eval_wikibio_n_ngrams_match_2": 3.368,
      "eval_wikibio_n_ngrams_match_3": 1.23,
      "eval_wikibio_num_pred_words": 35.982,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.08426226723169,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3550537204340967,
      "eval_wikibio_runtime": 10.2427,
      "eval_wikibio_samples_per_second": 48.815,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.3207254466103453,
      "eval_wikibio_token_set_f1_sem": 0.0054076916010678076,
      "eval_wikibio_token_set_precision": 0.32816329139207967,
      "eval_wikibio_token_set_recall": 0.32734481181794106,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 126875
    },
    {
      "epoch": 24.36,
      "eval_nq_accuracy": 0.52746875,
      "eval_nq_bleu_score": 11.540913833744037,
      "eval_nq_bleu_score_sem": 0.45454573049351493,
      "eval_nq_emb_cos_sim": 0.8343319892883301,
      "eval_nq_emb_cos_sim_sem": 0.006692799029885011,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1918694972991943,
      "eval_nq_n_ngrams_match_1": 23.134,
      "eval_nq_n_ngrams_match_2": 8.408,
      "eval_nq_n_ngrams_match_3": 3.828,
      "eval_nq_num_pred_words": 48.936,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.951933095594246,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4484575503789313,
      "eval_nq_runtime": 10.5333,
      "eval_nq_samples_per_second": 47.469,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4613356012270699,
      "eval_nq_token_set_f1_sem": 0.004893589728379912,
      "eval_nq_token_set_precision": 0.4203849298189492,
      "eval_nq_token_set_recall": 0.5178627023143768,
      "eval_nq_true_num_tokens": 64.0,
      "step": 126875
    },
    {
      "epoch": 24.36,
      "learning_rate": 0.001,
      "loss": 2.5562,
      "step": 126876
    },
    {
      "epoch": 24.36,
      "learning_rate": 0.001,
      "loss": 2.5638,
      "step": 126888
    },
    {
      "epoch": 24.37,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 126900
    },
    {
      "epoch": 24.37,
      "learning_rate": 0.001,
      "loss": 2.5579,
      "step": 126912
    },
    {
      "epoch": 24.37,
      "learning_rate": 0.001,
      "loss": 2.5597,
      "step": 126924
    },
    {
      "epoch": 24.37,
      "learning_rate": 0.001,
      "loss": 2.5638,
      "step": 126936
    },
    {
      "epoch": 24.38,
      "learning_rate": 0.001,
      "loss": 2.5625,
      "step": 126948
    },
    {
      "epoch": 24.38,
      "learning_rate": 0.001,
      "loss": 2.564,
      "step": 126960
    },
    {
      "epoch": 24.38,
      "learning_rate": 0.001,
      "loss": 2.5637,
      "step": 126972
    },
    {
      "epoch": 24.38,
      "learning_rate": 0.001,
      "loss": 2.5598,
      "step": 126984
    },
    {
      "epoch": 24.38,
      "learning_rate": 0.001,
      "loss": 2.5647,
      "step": 126996
    },
    {
      "epoch": 24.39,
      "learning_rate": 0.001,
      "loss": 2.5537,
      "step": 127008
    },
    {
      "epoch": 24.39,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 127020
    },
    {
      "epoch": 24.39,
      "learning_rate": 0.001,
      "loss": 2.5602,
      "step": 127032
    },
    {
      "epoch": 24.39,
      "learning_rate": 0.001,
      "loss": 2.5693,
      "step": 127044
    },
    {
      "epoch": 24.4,
      "learning_rate": 0.001,
      "loss": 2.5591,
      "step": 127056
    },
    {
      "epoch": 24.4,
      "learning_rate": 0.001,
      "loss": 2.5621,
      "step": 127068
    },
    {
      "epoch": 24.4,
      "learning_rate": 0.001,
      "loss": 2.5521,
      "step": 127080
    },
    {
      "epoch": 24.4,
      "learning_rate": 0.001,
      "loss": 2.5578,
      "step": 127092
    },
    {
      "epoch": 24.41,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 127104
    },
    {
      "epoch": 24.41,
      "learning_rate": 0.001,
      "loss": 2.5435,
      "step": 127116
    },
    {
      "epoch": 24.41,
      "learning_rate": 0.001,
      "loss": 2.5597,
      "step": 127128
    },
    {
      "epoch": 24.41,
      "learning_rate": 0.001,
      "loss": 2.5629,
      "step": 127140
    },
    {
      "epoch": 24.41,
      "learning_rate": 0.001,
      "loss": 2.5765,
      "step": 127152
    },
    {
      "epoch": 24.42,
      "learning_rate": 0.001,
      "loss": 2.5616,
      "step": 127164
    },
    {
      "epoch": 24.42,
      "learning_rate": 0.001,
      "loss": 2.5583,
      "step": 127176
    },
    {
      "epoch": 24.42,
      "learning_rate": 0.001,
      "loss": 2.5643,
      "step": 127188
    },
    {
      "epoch": 24.42,
      "learning_rate": 0.001,
      "loss": 2.5734,
      "step": 127200
    },
    {
      "epoch": 24.43,
      "learning_rate": 0.001,
      "loss": 2.555,
      "step": 127212
    },
    {
      "epoch": 24.43,
      "learning_rate": 0.001,
      "loss": 2.5562,
      "step": 127224
    },
    {
      "epoch": 24.43,
      "learning_rate": 0.001,
      "loss": 2.569,
      "step": 127236
    },
    {
      "epoch": 24.43,
      "learning_rate": 0.001,
      "loss": 2.5769,
      "step": 127248
    },
    {
      "epoch": 24.44,
      "learning_rate": 0.001,
      "loss": 2.5634,
      "step": 127260
    },
    {
      "epoch": 24.44,
      "learning_rate": 0.001,
      "loss": 2.5638,
      "step": 127272
    },
    {
      "epoch": 24.44,
      "learning_rate": 0.001,
      "loss": 2.5647,
      "step": 127284
    },
    {
      "epoch": 24.44,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 127296
    },
    {
      "epoch": 24.44,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 127308
    },
    {
      "epoch": 24.45,
      "learning_rate": 0.001,
      "loss": 2.5604,
      "step": 127320
    },
    {
      "epoch": 24.45,
      "learning_rate": 0.001,
      "loss": 2.5643,
      "step": 127332
    },
    {
      "epoch": 24.45,
      "learning_rate": 0.001,
      "loss": 2.572,
      "step": 127344
    },
    {
      "epoch": 24.45,
      "learning_rate": 0.001,
      "loss": 2.5779,
      "step": 127356
    },
    {
      "epoch": 24.46,
      "learning_rate": 0.001,
      "loss": 2.556,
      "step": 127368
    },
    {
      "epoch": 24.46,
      "learning_rate": 0.001,
      "loss": 2.5708,
      "step": 127380
    },
    {
      "epoch": 24.46,
      "learning_rate": 0.001,
      "loss": 2.5754,
      "step": 127392
    },
    {
      "epoch": 24.46,
      "learning_rate": 0.001,
      "loss": 2.5524,
      "step": 127404
    },
    {
      "epoch": 24.47,
      "learning_rate": 0.001,
      "loss": 2.5581,
      "step": 127416
    },
    {
      "epoch": 24.47,
      "learning_rate": 0.001,
      "loss": 2.5617,
      "step": 127428
    },
    {
      "epoch": 24.47,
      "learning_rate": 0.001,
      "loss": 2.5626,
      "step": 127440
    },
    {
      "epoch": 24.47,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 127452
    },
    {
      "epoch": 24.47,
      "learning_rate": 0.001,
      "loss": 2.5667,
      "step": 127464
    },
    {
      "epoch": 24.48,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 127476
    },
    {
      "epoch": 24.48,
      "learning_rate": 0.001,
      "loss": 2.5702,
      "step": 127488
    },
    {
      "epoch": 24.48,
      "learning_rate": 0.001,
      "loss": 2.5611,
      "step": 127500
    },
    {
      "epoch": 24.48,
      "eval_ag_news_accuracy": 0.32275,
      "eval_ag_news_bleu_score": 4.639986234548955,
      "eval_ag_news_bleu_score_sem": 0.15093575212638385,
      "eval_ag_news_emb_cos_sim": 0.8081047534942627,
      "eval_ag_news_emb_cos_sim_sem": 0.007535462045587571,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.562582492828369,
      "eval_ag_news_n_ngrams_match_1": 13.852,
      "eval_ag_news_n_ngrams_match_2": 3.042,
      "eval_ag_news_n_ngrams_match_3": 0.848,
      "eval_ag_news_num_pred_words": 46.446,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.25412320697353,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3421019167867688,
      "eval_ag_news_runtime": 10.3419,
      "eval_ag_news_samples_per_second": 48.347,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3455803030196758,
      "eval_ag_news_token_set_f1_sem": 0.00467891002648212,
      "eval_ag_news_token_set_precision": 0.33008748740328303,
      "eval_ag_news_token_set_recall": 0.37960448336621844,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 127500
    },
    {
      "epoch": 24.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.114125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1050762676535277,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11241941847580554,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6736693382263184,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008920013678039775,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2447471618652344,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.266,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.874,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.3,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.655222621492257,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9921875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21439864626368288,
      "eval_anthropic_toxic_prompts_runtime": 9.9393,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.305,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3553538130328317,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065486978584849975,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4437251640051009,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3215271663954234,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 127500
    },
    {
      "epoch": 24.48,
      "eval_arxiv_accuracy": 0.34778125,
      "eval_arxiv_bleu_score": 4.448812431247174,
      "eval_arxiv_bleu_score_sem": 0.12925630353778725,
      "eval_arxiv_emb_cos_sim": 0.7683203220367432,
      "eval_arxiv_emb_cos_sim_sem": 0.007284435415856686,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.406280755996704,
      "eval_arxiv_n_ngrams_match_1": 15.096,
      "eval_arxiv_n_ngrams_match_2": 2.99,
      "eval_arxiv_n_ngrams_match_3": 0.71,
      "eval_arxiv_num_pred_words": 40.876,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.152889498080985,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35627068013546237,
      "eval_arxiv_runtime": 10.0932,
      "eval_arxiv_samples_per_second": 49.538,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3528760660577213,
      "eval_arxiv_token_set_f1_sem": 0.004272846584088692,
      "eval_arxiv_token_set_precision": 0.30536737720033885,
      "eval_arxiv_token_set_recall": 0.43913473403758196,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 127500
    },
    {
      "epoch": 24.48,
      "eval_python_code_alpaca_accuracy": 0.1595,
      "eval_python_code_alpaca_bleu_score": 4.365213762966829,
      "eval_python_code_alpaca_bleu_score_sem": 0.13607054543843916,
      "eval_python_code_alpaca_emb_cos_sim": 0.7403180003166199,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010975971851260109,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8966422080993652,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.822,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.798,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.896,
      "eval_python_code_alpaca_num_pred_words": 43.694,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.113222711365232,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32386381280394927,
      "eval_python_code_alpaca_runtime": 9.9343,
      "eval_python_code_alpaca_samples_per_second": 50.33,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.46753360571179126,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005739783072673743,
      "eval_python_code_alpaca_token_set_precision": 0.5370473728284986,
      "eval_python_code_alpaca_token_set_recall": 0.4377501185475591,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 127500
    },
    {
      "epoch": 24.48,
      "eval_wikibio_accuracy": 0.32396875,
      "eval_wikibio_bleu_score": 6.1000228062214825,
      "eval_wikibio_bleu_score_sem": 0.23407866633719634,
      "eval_wikibio_emb_cos_sim": 0.7507628798484802,
      "eval_wikibio_emb_cos_sim_sem": 0.00895993069484664,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7186765670776367,
      "eval_wikibio_n_ngrams_match_1": 10.184,
      "eval_wikibio_n_ngrams_match_2": 3.444,
      "eval_wikibio_n_ngrams_match_3": 1.314,
      "eval_wikibio_num_pred_words": 37.072,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.209819571760974,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.356131583206422,
      "eval_wikibio_runtime": 10.9648,
      "eval_wikibio_samples_per_second": 45.6,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.3199903781561186,
      "eval_wikibio_token_set_f1_sem": 0.005533735553196984,
      "eval_wikibio_token_set_precision": 0.33100406067427135,
      "eval_wikibio_token_set_recall": 0.32680066525775153,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 127500
    },
    {
      "epoch": 24.48,
      "eval_nq_accuracy": 0.52690625,
      "eval_nq_bleu_score": 11.758171062058048,
      "eval_nq_bleu_score_sem": 0.4951452956106227,
      "eval_nq_emb_cos_sim": 0.8322650194168091,
      "eval_nq_emb_cos_sim_sem": 0.006576643632478003,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1930091381073,
      "eval_nq_n_ngrams_match_1": 23.302,
      "eval_nq_n_ngrams_match_2": 8.636,
      "eval_nq_n_ngrams_match_3": 3.942,
      "eval_nq_num_pred_words": 49.612,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.962140899371478,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4474533939548328,
      "eval_nq_runtime": 10.3442,
      "eval_nq_samples_per_second": 48.336,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.46476623759754593,
      "eval_nq_token_set_f1_sem": 0.004884904240429779,
      "eval_nq_token_set_precision": 0.4220830535152562,
      "eval_nq_token_set_recall": 0.5242566794244469,
      "eval_nq_true_num_tokens": 64.0,
      "step": 127500
    },
    {
      "epoch": 24.48,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 127512
    },
    {
      "epoch": 24.49,
      "learning_rate": 0.001,
      "loss": 2.56,
      "step": 127524
    },
    {
      "epoch": 24.49,
      "learning_rate": 0.001,
      "loss": 2.5641,
      "step": 127536
    },
    {
      "epoch": 24.49,
      "learning_rate": 0.001,
      "loss": 2.5613,
      "step": 127548
    },
    {
      "epoch": 24.49,
      "learning_rate": 0.001,
      "loss": 2.5545,
      "step": 127560
    },
    {
      "epoch": 24.5,
      "learning_rate": 0.001,
      "loss": 2.5666,
      "step": 127572
    },
    {
      "epoch": 24.5,
      "learning_rate": 0.001,
      "loss": 2.5643,
      "step": 127584
    },
    {
      "epoch": 24.5,
      "learning_rate": 0.001,
      "loss": 2.5573,
      "step": 127596
    },
    {
      "epoch": 24.5,
      "learning_rate": 0.001,
      "loss": 2.557,
      "step": 127608
    },
    {
      "epoch": 24.5,
      "learning_rate": 0.001,
      "loss": 2.5619,
      "step": 127620
    },
    {
      "epoch": 24.51,
      "learning_rate": 0.001,
      "loss": 2.5564,
      "step": 127632
    },
    {
      "epoch": 24.51,
      "learning_rate": 0.001,
      "loss": 2.5662,
      "step": 127644
    },
    {
      "epoch": 24.51,
      "learning_rate": 0.001,
      "loss": 2.5552,
      "step": 127656
    },
    {
      "epoch": 24.51,
      "learning_rate": 0.001,
      "loss": 2.5669,
      "step": 127668
    },
    {
      "epoch": 24.52,
      "learning_rate": 0.001,
      "loss": 2.5679,
      "step": 127680
    },
    {
      "epoch": 24.52,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 127692
    },
    {
      "epoch": 24.52,
      "learning_rate": 0.001,
      "loss": 2.5624,
      "step": 127704
    },
    {
      "epoch": 24.52,
      "learning_rate": 0.001,
      "loss": 2.5621,
      "step": 127716
    },
    {
      "epoch": 24.53,
      "learning_rate": 0.001,
      "loss": 2.5698,
      "step": 127728
    },
    {
      "epoch": 24.53,
      "learning_rate": 0.001,
      "loss": 2.5542,
      "step": 127740
    },
    {
      "epoch": 24.53,
      "learning_rate": 0.001,
      "loss": 2.5687,
      "step": 127752
    },
    {
      "epoch": 24.53,
      "learning_rate": 0.001,
      "loss": 2.5659,
      "step": 127764
    },
    {
      "epoch": 24.53,
      "learning_rate": 0.001,
      "loss": 2.5643,
      "step": 127776
    },
    {
      "epoch": 24.54,
      "learning_rate": 0.001,
      "loss": 2.5698,
      "step": 127788
    },
    {
      "epoch": 24.54,
      "learning_rate": 0.001,
      "loss": 2.5541,
      "step": 127800
    },
    {
      "epoch": 24.54,
      "learning_rate": 0.001,
      "loss": 2.5662,
      "step": 127812
    },
    {
      "epoch": 24.54,
      "learning_rate": 0.001,
      "loss": 2.5659,
      "step": 127824
    },
    {
      "epoch": 24.55,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 127836
    },
    {
      "epoch": 24.55,
      "learning_rate": 0.001,
      "loss": 2.5513,
      "step": 127848
    },
    {
      "epoch": 24.55,
      "learning_rate": 0.001,
      "loss": 2.5729,
      "step": 127860
    },
    {
      "epoch": 24.55,
      "learning_rate": 0.001,
      "loss": 2.5559,
      "step": 127872
    },
    {
      "epoch": 24.56,
      "learning_rate": 0.001,
      "loss": 2.5557,
      "step": 127884
    },
    {
      "epoch": 24.56,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 127896
    },
    {
      "epoch": 24.56,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 127908
    },
    {
      "epoch": 24.56,
      "learning_rate": 0.001,
      "loss": 2.566,
      "step": 127920
    },
    {
      "epoch": 24.56,
      "learning_rate": 0.001,
      "loss": 2.5733,
      "step": 127932
    },
    {
      "epoch": 24.57,
      "learning_rate": 0.001,
      "loss": 2.5615,
      "step": 127944
    },
    {
      "epoch": 24.57,
      "learning_rate": 0.001,
      "loss": 2.5655,
      "step": 127956
    },
    {
      "epoch": 24.57,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 127968
    },
    {
      "epoch": 24.57,
      "learning_rate": 0.001,
      "loss": 2.5528,
      "step": 127980
    },
    {
      "epoch": 24.58,
      "learning_rate": 0.001,
      "loss": 2.5627,
      "step": 127992
    },
    {
      "epoch": 24.58,
      "learning_rate": 0.001,
      "loss": 2.574,
      "step": 128004
    },
    {
      "epoch": 24.58,
      "learning_rate": 0.001,
      "loss": 2.5644,
      "step": 128016
    },
    {
      "epoch": 24.58,
      "learning_rate": 0.001,
      "loss": 2.5635,
      "step": 128028
    },
    {
      "epoch": 24.59,
      "learning_rate": 0.001,
      "loss": 2.569,
      "step": 128040
    },
    {
      "epoch": 24.59,
      "learning_rate": 0.001,
      "loss": 2.5648,
      "step": 128052
    },
    {
      "epoch": 24.59,
      "learning_rate": 0.001,
      "loss": 2.56,
      "step": 128064
    },
    {
      "epoch": 24.59,
      "learning_rate": 0.001,
      "loss": 2.5599,
      "step": 128076
    },
    {
      "epoch": 24.59,
      "learning_rate": 0.001,
      "loss": 2.5594,
      "step": 128088
    },
    {
      "epoch": 24.6,
      "learning_rate": 0.001,
      "loss": 2.5635,
      "step": 128100
    },
    {
      "epoch": 24.6,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 128112
    },
    {
      "epoch": 24.6,
      "learning_rate": 0.001,
      "loss": 2.5576,
      "step": 128124
    },
    {
      "epoch": 24.6,
      "eval_ag_news_accuracy": 0.3214375,
      "eval_ag_news_bleu_score": 4.750501466268349,
      "eval_ag_news_bleu_score_sem": 0.15019038992967793,
      "eval_ag_news_emb_cos_sim": 0.8111311793327332,
      "eval_ag_news_emb_cos_sim_sem": 0.006907176340657641,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5570576190948486,
      "eval_ag_news_n_ngrams_match_1": 14.06,
      "eval_ag_news_n_ngrams_match_2": 3.07,
      "eval_ag_news_n_ngrams_match_3": 0.872,
      "eval_ag_news_num_pred_words": 46.858,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.05988569062406,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34809493488834936,
      "eval_ag_news_runtime": 10.653,
      "eval_ag_news_samples_per_second": 46.935,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.3494369119117738,
      "eval_ag_news_token_set_f1_sem": 0.004413964753067884,
      "eval_ag_news_token_set_precision": 0.33502623632371903,
      "eval_ag_news_token_set_recall": 0.3817980944693631,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 128125
    },
    {
      "epoch": 24.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.1135625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9947039622433658,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10692995469104294,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.674341082572937,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0093943527504276,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2263243198394775,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.226,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.808,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.63,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.472,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.18690760030586,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2144519553837049,
      "eval_anthropic_toxic_prompts_runtime": 9.8168,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.933,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35419134220481474,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006446299903105918,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4402190910751929,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3204095056859719,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 128125
    },
    {
      "epoch": 24.6,
      "eval_arxiv_accuracy": 0.34721875,
      "eval_arxiv_bleu_score": 4.285557501452188,
      "eval_arxiv_bleu_score_sem": 0.12125013896119925,
      "eval_arxiv_emb_cos_sim": 0.7653764486312866,
      "eval_arxiv_emb_cos_sim_sem": 0.007201370562886794,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3980510234832764,
      "eval_arxiv_n_ngrams_match_1": 15.098,
      "eval_arxiv_n_ngrams_match_2": 2.94,
      "eval_arxiv_n_ngrams_match_3": 0.654,
      "eval_arxiv_num_pred_words": 40.84,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.90575759256494,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3596928326263683,
      "eval_arxiv_runtime": 10.3729,
      "eval_arxiv_samples_per_second": 48.202,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.35664676180739174,
      "eval_arxiv_token_set_f1_sem": 0.004113132817146726,
      "eval_arxiv_token_set_precision": 0.3063978328796045,
      "eval_arxiv_token_set_recall": 0.4440181599383096,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 128125
    },
    {
      "epoch": 24.6,
      "eval_python_code_alpaca_accuracy": 0.15984375,
      "eval_python_code_alpaca_bleu_score": 4.527798796290013,
      "eval_python_code_alpaca_bleu_score_sem": 0.13816159101012032,
      "eval_python_code_alpaca_emb_cos_sim": 0.7694908380508423,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0074370432274140615,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8988540172576904,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.874,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.842,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.916,
      "eval_python_code_alpaca_num_pred_words": 43.818,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.15333004177462,
      "eval_python_code_alpaca_pred_num_tokens": 62.796875,
      "eval_python_code_alpaca_rouge_score": 0.33195087861643935,
      "eval_python_code_alpaca_runtime": 10.3324,
      "eval_python_code_alpaca_samples_per_second": 48.391,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.47935385745143244,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005417704378028998,
      "eval_python_code_alpaca_token_set_precision": 0.5396073459899511,
      "eval_python_code_alpaca_token_set_recall": 0.4510487647260865,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 128125
    },
    {
      "epoch": 24.6,
      "eval_wikibio_accuracy": 0.324375,
      "eval_wikibio_bleu_score": 5.886994198339176,
      "eval_wikibio_bleu_score_sem": 0.22194314839117812,
      "eval_wikibio_emb_cos_sim": 0.7401241064071655,
      "eval_wikibio_emb_cos_sim_sem": 0.008463849775527764,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.706007719039917,
      "eval_wikibio_n_ngrams_match_1": 10.028,
      "eval_wikibio_n_ngrams_match_2": 3.322,
      "eval_wikibio_n_ngrams_match_3": 1.196,
      "eval_wikibio_num_pred_words": 35.996,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.69103179048241,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.351261898116098,
      "eval_wikibio_runtime": 10.0533,
      "eval_wikibio_samples_per_second": 49.735,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3166181934378616,
      "eval_wikibio_token_set_f1_sem": 0.005451295581210154,
      "eval_wikibio_token_set_precision": 0.3263197820828759,
      "eval_wikibio_token_set_recall": 0.3229714542426315,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 128125
    },
    {
      "epoch": 24.6,
      "eval_nq_accuracy": 0.5265,
      "eval_nq_bleu_score": 11.506156015401555,
      "eval_nq_bleu_score_sem": 0.46702609026551933,
      "eval_nq_emb_cos_sim": 0.8306975960731506,
      "eval_nq_emb_cos_sim_sem": 0.007131957337567383,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1885643005371094,
      "eval_nq_n_ngrams_match_1": 22.91,
      "eval_nq_n_ngrams_match_2": 8.414,
      "eval_nq_n_ngrams_match_3": 3.848,
      "eval_nq_num_pred_words": 49.1,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.922394038401565,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44169443578995443,
      "eval_nq_runtime": 10.5133,
      "eval_nq_samples_per_second": 47.559,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4594846132417338,
      "eval_nq_token_set_f1_sem": 0.004880251654589268,
      "eval_nq_token_set_precision": 0.41670072674744696,
      "eval_nq_token_set_recall": 0.5202501300338103,
      "eval_nq_true_num_tokens": 64.0,
      "step": 128125
    },
    {
      "epoch": 24.6,
      "learning_rate": 0.001,
      "loss": 2.5607,
      "step": 128136
    },
    {
      "epoch": 24.61,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 128148
    },
    {
      "epoch": 24.61,
      "learning_rate": 0.001,
      "loss": 2.5671,
      "step": 128160
    },
    {
      "epoch": 24.61,
      "learning_rate": 0.001,
      "loss": 2.5498,
      "step": 128172
    },
    {
      "epoch": 24.61,
      "learning_rate": 0.001,
      "loss": 2.5572,
      "step": 128184
    },
    {
      "epoch": 24.62,
      "learning_rate": 0.001,
      "loss": 2.569,
      "step": 128196
    },
    {
      "epoch": 24.62,
      "learning_rate": 0.001,
      "loss": 2.5602,
      "step": 128208
    },
    {
      "epoch": 24.62,
      "learning_rate": 0.001,
      "loss": 2.5538,
      "step": 128220
    },
    {
      "epoch": 24.62,
      "learning_rate": 0.001,
      "loss": 2.5505,
      "step": 128232
    },
    {
      "epoch": 24.62,
      "learning_rate": 0.001,
      "loss": 2.568,
      "step": 128244
    },
    {
      "epoch": 24.63,
      "learning_rate": 0.001,
      "loss": 2.5673,
      "step": 128256
    },
    {
      "epoch": 24.63,
      "learning_rate": 0.001,
      "loss": 2.5617,
      "step": 128268
    },
    {
      "epoch": 24.63,
      "learning_rate": 0.001,
      "loss": 2.5574,
      "step": 128280
    },
    {
      "epoch": 24.63,
      "learning_rate": 0.001,
      "loss": 2.5679,
      "step": 128292
    },
    {
      "epoch": 24.64,
      "learning_rate": 0.001,
      "loss": 2.5544,
      "step": 128304
    },
    {
      "epoch": 24.64,
      "learning_rate": 0.001,
      "loss": 2.5668,
      "step": 128316
    },
    {
      "epoch": 24.64,
      "learning_rate": 0.001,
      "loss": 2.5593,
      "step": 128328
    },
    {
      "epoch": 24.64,
      "learning_rate": 0.001,
      "loss": 2.5677,
      "step": 128340
    },
    {
      "epoch": 24.65,
      "learning_rate": 0.001,
      "loss": 2.5686,
      "step": 128352
    },
    {
      "epoch": 24.65,
      "learning_rate": 0.001,
      "loss": 2.5564,
      "step": 128364
    },
    {
      "epoch": 24.65,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 128376
    },
    {
      "epoch": 24.65,
      "learning_rate": 0.001,
      "loss": 2.5616,
      "step": 128388
    },
    {
      "epoch": 24.65,
      "learning_rate": 0.001,
      "loss": 2.5562,
      "step": 128400
    },
    {
      "epoch": 24.66,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 128412
    },
    {
      "epoch": 24.66,
      "learning_rate": 0.001,
      "loss": 2.5656,
      "step": 128424
    },
    {
      "epoch": 24.66,
      "learning_rate": 0.001,
      "loss": 2.5659,
      "step": 128436
    },
    {
      "epoch": 24.66,
      "learning_rate": 0.001,
      "loss": 2.5625,
      "step": 128448
    },
    {
      "epoch": 24.67,
      "learning_rate": 0.001,
      "loss": 2.5607,
      "step": 128460
    },
    {
      "epoch": 24.67,
      "learning_rate": 0.001,
      "loss": 2.5608,
      "step": 128472
    },
    {
      "epoch": 24.67,
      "learning_rate": 0.001,
      "loss": 2.5542,
      "step": 128484
    },
    {
      "epoch": 24.67,
      "learning_rate": 0.001,
      "loss": 2.5677,
      "step": 128496
    },
    {
      "epoch": 24.68,
      "learning_rate": 0.001,
      "loss": 2.5654,
      "step": 128508
    },
    {
      "epoch": 24.68,
      "learning_rate": 0.001,
      "loss": 2.5713,
      "step": 128520
    },
    {
      "epoch": 24.68,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 128532
    },
    {
      "epoch": 24.68,
      "learning_rate": 0.001,
      "loss": 2.5571,
      "step": 128544
    },
    {
      "epoch": 24.68,
      "learning_rate": 0.001,
      "loss": 2.562,
      "step": 128556
    },
    {
      "epoch": 24.69,
      "learning_rate": 0.001,
      "loss": 2.5591,
      "step": 128568
    },
    {
      "epoch": 24.69,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 128580
    },
    {
      "epoch": 24.69,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 128592
    },
    {
      "epoch": 24.69,
      "learning_rate": 0.001,
      "loss": 2.5445,
      "step": 128604
    },
    {
      "epoch": 24.7,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 128616
    },
    {
      "epoch": 24.7,
      "learning_rate": 0.001,
      "loss": 2.5553,
      "step": 128628
    },
    {
      "epoch": 24.7,
      "learning_rate": 0.001,
      "loss": 2.5568,
      "step": 128640
    },
    {
      "epoch": 24.7,
      "learning_rate": 0.001,
      "loss": 2.5587,
      "step": 128652
    },
    {
      "epoch": 24.71,
      "learning_rate": 0.001,
      "loss": 2.5671,
      "step": 128664
    },
    {
      "epoch": 24.71,
      "learning_rate": 0.001,
      "loss": 2.5626,
      "step": 128676
    },
    {
      "epoch": 24.71,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 128688
    },
    {
      "epoch": 24.71,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 128700
    },
    {
      "epoch": 24.71,
      "learning_rate": 0.001,
      "loss": 2.56,
      "step": 128712
    },
    {
      "epoch": 24.72,
      "learning_rate": 0.001,
      "loss": 2.5585,
      "step": 128724
    },
    {
      "epoch": 24.72,
      "learning_rate": 0.001,
      "loss": 2.5686,
      "step": 128736
    },
    {
      "epoch": 24.72,
      "learning_rate": 0.001,
      "loss": 2.5624,
      "step": 128748
    },
    {
      "epoch": 24.72,
      "eval_ag_news_accuracy": 0.3225,
      "eval_ag_news_bleu_score": 4.8594125617269714,
      "eval_ag_news_bleu_score_sem": 0.1493270373992517,
      "eval_ag_news_emb_cos_sim": 0.8203473091125488,
      "eval_ag_news_emb_cos_sim_sem": 0.006253855407712242,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5523531436920166,
      "eval_ag_news_n_ngrams_match_1": 14.236,
      "eval_ag_news_n_ngrams_match_2": 3.112,
      "eval_ag_news_n_ngrams_match_3": 0.9,
      "eval_ag_news_num_pred_words": 46.576,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.89533468732818,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3520427300955037,
      "eval_ag_news_runtime": 10.3902,
      "eval_ag_news_samples_per_second": 48.122,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35754788054873177,
      "eval_ag_news_token_set_f1_sem": 0.00438505286786259,
      "eval_ag_news_token_set_precision": 0.34227245238066745,
      "eval_ag_news_token_set_recall": 0.3899300240692165,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 128750
    },
    {
      "epoch": 24.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.11378125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1841452691344037,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11833967806378883,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6728819608688354,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008648650535357875,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2363362312316895,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.284,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.982,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.716,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.174,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.440343258124244,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2175659295386394,
      "eval_anthropic_toxic_prompts_runtime": 9.8006,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.017,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3627906442378746,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006526858455145603,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4439766026920582,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3322530444952403,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 128750
    },
    {
      "epoch": 24.72,
      "eval_arxiv_accuracy": 0.3476875,
      "eval_arxiv_bleu_score": 4.388585308371022,
      "eval_arxiv_bleu_score_sem": 0.12655224546827296,
      "eval_arxiv_emb_cos_sim": 0.7760187387466431,
      "eval_arxiv_emb_cos_sim_sem": 0.006352286079686895,
      "eval_arxiv_emb_top1_equal": 0.3984375,
      "eval_arxiv_emb_top1_equal_sem": 0.04344287990767221,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.40493106842041,
      "eval_arxiv_n_ngrams_match_1": 15.22,
      "eval_arxiv_n_ngrams_match_2": 2.974,
      "eval_arxiv_n_ngrams_match_3": 0.66,
      "eval_arxiv_num_pred_words": 40.964,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.112219969488624,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3626776385572391,
      "eval_arxiv_runtime": 10.2292,
      "eval_arxiv_samples_per_second": 48.88,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.35554001998337925,
      "eval_arxiv_token_set_f1_sem": 0.004236998849753365,
      "eval_arxiv_token_set_precision": 0.30881461791339043,
      "eval_arxiv_token_set_recall": 0.43578627201146425,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 128750
    },
    {
      "epoch": 24.72,
      "eval_python_code_alpaca_accuracy": 0.16084375,
      "eval_python_code_alpaca_bleu_score": 4.623035325589885,
      "eval_python_code_alpaca_bleu_score_sem": 0.13889948220375697,
      "eval_python_code_alpaca_emb_cos_sim": 0.7599724531173706,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00829261108129505,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8961777687072754,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.97,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.876,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.004,
      "eval_python_code_alpaca_num_pred_words": 43.95,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.10481217046481,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33946354671892975,
      "eval_python_code_alpaca_runtime": 10.4155,
      "eval_python_code_alpaca_samples_per_second": 48.005,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.47794335008136624,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005361574417404339,
      "eval_python_code_alpaca_token_set_precision": 0.548465843970291,
      "eval_python_code_alpaca_token_set_recall": 0.4426105243920704,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 128750
    },
    {
      "epoch": 24.72,
      "eval_wikibio_accuracy": 0.3264375,
      "eval_wikibio_bleu_score": 6.027325001403926,
      "eval_wikibio_bleu_score_sem": 0.21576941232633043,
      "eval_wikibio_emb_cos_sim": 0.7406089305877686,
      "eval_wikibio_emb_cos_sim_sem": 0.009288847993348196,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7055041790008545,
      "eval_wikibio_n_ngrams_match_1": 10.224,
      "eval_wikibio_n_ngrams_match_2": 3.424,
      "eval_wikibio_n_ngrams_match_3": 1.244,
      "eval_wikibio_num_pred_words": 36.738,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.670547384537244,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36090803844712627,
      "eval_wikibio_runtime": 10.1752,
      "eval_wikibio_samples_per_second": 49.139,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.32459411007854344,
      "eval_wikibio_token_set_f1_sem": 0.00525416512902063,
      "eval_wikibio_token_set_precision": 0.3334788419930351,
      "eval_wikibio_token_set_recall": 0.33203200127954646,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 128750
    },
    {
      "epoch": 24.72,
      "eval_nq_accuracy": 0.52678125,
      "eval_nq_bleu_score": 12.045270072228162,
      "eval_nq_bleu_score_sem": 0.4974524232186081,
      "eval_nq_emb_cos_sim": 0.8337074518203735,
      "eval_nq_emb_cos_sim_sem": 0.007259930974714649,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1854989528656006,
      "eval_nq_n_ngrams_match_1": 23.206,
      "eval_nq_n_ngrams_match_2": 8.638,
      "eval_nq_n_ngrams_match_3": 4.086,
      "eval_nq_num_pred_words": 49.276,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.895085674809298,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.446872530303096,
      "eval_nq_runtime": 10.4633,
      "eval_nq_samples_per_second": 47.786,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46272120122428845,
      "eval_nq_token_set_f1_sem": 0.005098550388567427,
      "eval_nq_token_set_precision": 0.42145287599355963,
      "eval_nq_token_set_recall": 0.5210570214353234,
      "eval_nq_true_num_tokens": 64.0,
      "step": 128750
    },
    {
      "epoch": 24.72,
      "learning_rate": 0.001,
      "loss": 2.5584,
      "step": 128760
    },
    {
      "epoch": 24.73,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 128772
    },
    {
      "epoch": 24.73,
      "learning_rate": 0.001,
      "loss": 2.5529,
      "step": 128784
    },
    {
      "epoch": 24.73,
      "learning_rate": 0.001,
      "loss": 2.5602,
      "step": 128796
    },
    {
      "epoch": 24.73,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 128808
    },
    {
      "epoch": 24.74,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 128820
    },
    {
      "epoch": 24.74,
      "learning_rate": 0.001,
      "loss": 2.5646,
      "step": 128832
    },
    {
      "epoch": 24.74,
      "learning_rate": 0.001,
      "loss": 2.5542,
      "step": 128844
    },
    {
      "epoch": 24.74,
      "learning_rate": 0.001,
      "loss": 2.5537,
      "step": 128856
    },
    {
      "epoch": 24.74,
      "learning_rate": 0.001,
      "loss": 2.5499,
      "step": 128868
    },
    {
      "epoch": 24.75,
      "learning_rate": 0.001,
      "loss": 2.5547,
      "step": 128880
    },
    {
      "epoch": 24.75,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 128892
    },
    {
      "epoch": 24.75,
      "learning_rate": 0.001,
      "loss": 2.5522,
      "step": 128904
    },
    {
      "epoch": 24.75,
      "learning_rate": 0.001,
      "loss": 2.5629,
      "step": 128916
    },
    {
      "epoch": 24.76,
      "learning_rate": 0.001,
      "loss": 2.561,
      "step": 128928
    },
    {
      "epoch": 24.76,
      "learning_rate": 0.001,
      "loss": 2.5636,
      "step": 128940
    },
    {
      "epoch": 24.76,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 128952
    },
    {
      "epoch": 24.76,
      "learning_rate": 0.001,
      "loss": 2.5664,
      "step": 128964
    },
    {
      "epoch": 24.76,
      "learning_rate": 0.001,
      "loss": 2.5452,
      "step": 128976
    },
    {
      "epoch": 24.77,
      "learning_rate": 0.001,
      "loss": 2.5504,
      "step": 128988
    },
    {
      "epoch": 24.77,
      "learning_rate": 0.001,
      "loss": 2.5518,
      "step": 129000
    },
    {
      "epoch": 24.77,
      "learning_rate": 0.001,
      "loss": 2.5479,
      "step": 129012
    },
    {
      "epoch": 24.77,
      "learning_rate": 0.001,
      "loss": 2.5624,
      "step": 129024
    },
    {
      "epoch": 24.78,
      "learning_rate": 0.001,
      "loss": 2.5631,
      "step": 129036
    },
    {
      "epoch": 24.78,
      "learning_rate": 0.001,
      "loss": 2.5556,
      "step": 129048
    },
    {
      "epoch": 24.78,
      "learning_rate": 0.001,
      "loss": 2.5669,
      "step": 129060
    },
    {
      "epoch": 24.78,
      "learning_rate": 0.001,
      "loss": 2.5637,
      "step": 129072
    },
    {
      "epoch": 24.79,
      "learning_rate": 0.001,
      "loss": 2.5682,
      "step": 129084
    },
    {
      "epoch": 24.79,
      "learning_rate": 0.001,
      "loss": 2.5614,
      "step": 129096
    },
    {
      "epoch": 24.79,
      "learning_rate": 0.001,
      "loss": 2.5716,
      "step": 129108
    },
    {
      "epoch": 24.79,
      "learning_rate": 0.001,
      "loss": 2.5651,
      "step": 129120
    },
    {
      "epoch": 24.79,
      "learning_rate": 0.001,
      "loss": 2.5562,
      "step": 129132
    },
    {
      "epoch": 24.8,
      "learning_rate": 0.001,
      "loss": 2.5691,
      "step": 129144
    },
    {
      "epoch": 24.8,
      "learning_rate": 0.001,
      "loss": 2.5581,
      "step": 129156
    },
    {
      "epoch": 24.8,
      "learning_rate": 0.001,
      "loss": 2.5634,
      "step": 129168
    },
    {
      "epoch": 24.8,
      "learning_rate": 0.001,
      "loss": 2.5591,
      "step": 129180
    },
    {
      "epoch": 24.81,
      "learning_rate": 0.001,
      "loss": 2.5588,
      "step": 129192
    },
    {
      "epoch": 24.81,
      "learning_rate": 0.001,
      "loss": 2.5587,
      "step": 129204
    },
    {
      "epoch": 24.81,
      "learning_rate": 0.001,
      "loss": 2.5596,
      "step": 129216
    },
    {
      "epoch": 24.81,
      "learning_rate": 0.001,
      "loss": 2.5607,
      "step": 129228
    },
    {
      "epoch": 24.82,
      "learning_rate": 0.001,
      "loss": 2.555,
      "step": 129240
    },
    {
      "epoch": 24.82,
      "learning_rate": 0.001,
      "loss": 2.5719,
      "step": 129252
    },
    {
      "epoch": 24.82,
      "learning_rate": 0.001,
      "loss": 2.5566,
      "step": 129264
    },
    {
      "epoch": 24.82,
      "learning_rate": 0.001,
      "loss": 2.563,
      "step": 129276
    },
    {
      "epoch": 24.82,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 129288
    },
    {
      "epoch": 24.83,
      "learning_rate": 0.001,
      "loss": 2.5645,
      "step": 129300
    },
    {
      "epoch": 24.83,
      "learning_rate": 0.001,
      "loss": 2.5639,
      "step": 129312
    },
    {
      "epoch": 24.83,
      "learning_rate": 0.001,
      "loss": 2.5668,
      "step": 129324
    },
    {
      "epoch": 24.83,
      "learning_rate": 0.001,
      "loss": 2.5592,
      "step": 129336
    },
    {
      "epoch": 24.84,
      "learning_rate": 0.001,
      "loss": 2.5652,
      "step": 129348
    },
    {
      "epoch": 24.84,
      "learning_rate": 0.001,
      "loss": 2.5657,
      "step": 129360
    },
    {
      "epoch": 24.84,
      "learning_rate": 0.001,
      "loss": 2.5548,
      "step": 129372
    },
    {
      "epoch": 24.84,
      "eval_ag_news_accuracy": 0.324625,
      "eval_ag_news_bleu_score": 4.611841310972994,
      "eval_ag_news_bleu_score_sem": 0.139095623438631,
      "eval_ag_news_emb_cos_sim": 0.8163087368011475,
      "eval_ag_news_emb_cos_sim_sem": 0.00677469299071971,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5461506843566895,
      "eval_ag_news_n_ngrams_match_1": 14.162,
      "eval_ag_news_n_ngrams_match_2": 2.99,
      "eval_ag_news_n_ngrams_match_3": 0.796,
      "eval_ag_news_num_pred_words": 46.91,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.67956762786532,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3496774758012094,
      "eval_ag_news_runtime": 10.3696,
      "eval_ag_news_samples_per_second": 48.218,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3550150146183312,
      "eval_ag_news_token_set_f1_sem": 0.004226825967887237,
      "eval_ag_news_token_set_precision": 0.3405986662742473,
      "eval_ag_news_token_set_recall": 0.38454226864413726,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 129375
    },
    {
      "epoch": 24.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.1145,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1030712903873607,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11636478023507556,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6733030080795288,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008804373920982517,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.243640661239624,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.292,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.914,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.766,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.626850801224577,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21522968185035563,
      "eval_anthropic_toxic_prompts_runtime": 10.1647,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.19,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3619918156189409,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006453628770575891,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44416642940755013,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33056648737153715,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 129375
    },
    {
      "epoch": 24.84,
      "eval_arxiv_accuracy": 0.34715625,
      "eval_arxiv_bleu_score": 4.37437253227214,
      "eval_arxiv_bleu_score_sem": 0.12190013051498984,
      "eval_arxiv_emb_cos_sim": 0.7696950435638428,
      "eval_arxiv_emb_cos_sim_sem": 0.006782770697654626,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.39719557762146,
      "eval_arxiv_n_ngrams_match_1": 15.26,
      "eval_arxiv_n_ngrams_match_2": 2.968,
      "eval_arxiv_n_ngrams_match_3": 0.668,
      "eval_arxiv_num_pred_words": 41.218,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.880185775199994,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3615217581165149,
      "eval_arxiv_runtime": 10.3273,
      "eval_arxiv_samples_per_second": 48.415,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3550864412908191,
      "eval_arxiv_token_set_f1_sem": 0.004074105658219401,
      "eval_arxiv_token_set_precision": 0.30747585101477637,
      "eval_arxiv_token_set_recall": 0.4341225104184517,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 129375
    },
    {
      "epoch": 24.84,
      "eval_python_code_alpaca_accuracy": 0.161,
      "eval_python_code_alpaca_bleu_score": 4.656605964320858,
      "eval_python_code_alpaca_bleu_score_sem": 0.14850754928690157,
      "eval_python_code_alpaca_emb_cos_sim": 0.7533272504806519,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007777214393012535,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8644797801971436,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.974,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.98,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.994,
      "eval_python_code_alpaca_num_pred_words": 44.032,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.539926213814688,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3328447913721537,
      "eval_python_code_alpaca_runtime": 10.4122,
      "eval_python_code_alpaca_samples_per_second": 48.021,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.47994598316547554,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005217274632447154,
      "eval_python_code_alpaca_token_set_precision": 0.5455145607356641,
      "eval_python_code_alpaca_token_set_recall": 0.4496675056044883,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 129375
    },
    {
      "epoch": 24.84,
      "eval_wikibio_accuracy": 0.32353125,
      "eval_wikibio_bleu_score": 6.030243942921439,
      "eval_wikibio_bleu_score_sem": 0.20807651743859762,
      "eval_wikibio_emb_cos_sim": 0.7456989288330078,
      "eval_wikibio_emb_cos_sim_sem": 0.008595102305780276,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.722168445587158,
      "eval_wikibio_n_ngrams_match_1": 10.16,
      "eval_wikibio_n_ngrams_match_2": 3.412,
      "eval_wikibio_n_ngrams_match_3": 1.234,
      "eval_wikibio_num_pred_words": 36.298,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.353970787899534,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35719515166510757,
      "eval_wikibio_runtime": 9.8344,
      "eval_wikibio_samples_per_second": 50.842,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.32339913754212296,
      "eval_wikibio_token_set_f1_sem": 0.00524644653849838,
      "eval_wikibio_token_set_precision": 0.3304765342446991,
      "eval_wikibio_token_set_recall": 0.3320025909337566,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 129375
    },
    {
      "epoch": 24.84,
      "eval_nq_accuracy": 0.52859375,
      "eval_nq_bleu_score": 11.836564485734353,
      "eval_nq_bleu_score_sem": 0.48477659779615984,
      "eval_nq_emb_cos_sim": 0.834022045135498,
      "eval_nq_emb_cos_sim_sem": 0.0076715871466546464,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1840269565582275,
      "eval_nq_n_ngrams_match_1": 23.274,
      "eval_nq_n_ngrams_match_2": 8.558,
      "eval_nq_n_ngrams_match_3": 3.924,
      "eval_nq_num_pred_words": 49.214,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.882001773631808,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4511581413013653,
      "eval_nq_runtime": 10.7005,
      "eval_nq_samples_per_second": 46.727,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.46504651157551585,
      "eval_nq_token_set_f1_sem": 0.0049702645665558485,
      "eval_nq_token_set_precision": 0.4232878176653709,
      "eval_nq_token_set_recall": 0.5234713107335722,
      "eval_nq_true_num_tokens": 64.0,
      "step": 129375
    },
    {
      "epoch": 24.84,
      "learning_rate": 0.001,
      "loss": 2.5641,
      "step": 129384
    },
    {
      "epoch": 24.85,
      "learning_rate": 0.001,
      "loss": 2.5639,
      "step": 129396
    },
    {
      "epoch": 24.85,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 129408
    },
    {
      "epoch": 24.85,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 129420
    },
    {
      "epoch": 24.85,
      "learning_rate": 0.001,
      "loss": 2.5631,
      "step": 129432
    },
    {
      "epoch": 24.85,
      "learning_rate": 0.001,
      "loss": 2.5656,
      "step": 129444
    },
    {
      "epoch": 24.86,
      "learning_rate": 0.001,
      "loss": 2.5572,
      "step": 129456
    },
    {
      "epoch": 24.86,
      "learning_rate": 0.001,
      "loss": 2.5584,
      "step": 129468
    },
    {
      "epoch": 24.86,
      "learning_rate": 0.001,
      "loss": 2.5651,
      "step": 129480
    },
    {
      "epoch": 24.86,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 129492
    },
    {
      "epoch": 24.87,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 129504
    },
    {
      "epoch": 24.87,
      "learning_rate": 0.001,
      "loss": 2.5509,
      "step": 129516
    },
    {
      "epoch": 24.87,
      "learning_rate": 0.001,
      "loss": 2.5593,
      "step": 129528
    },
    {
      "epoch": 24.87,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 129540
    },
    {
      "epoch": 24.88,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 129552
    },
    {
      "epoch": 24.88,
      "learning_rate": 0.001,
      "loss": 2.5619,
      "step": 129564
    },
    {
      "epoch": 24.88,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 129576
    },
    {
      "epoch": 24.88,
      "learning_rate": 0.001,
      "loss": 2.5547,
      "step": 129588
    },
    {
      "epoch": 24.88,
      "learning_rate": 0.001,
      "loss": 2.5693,
      "step": 129600
    },
    {
      "epoch": 24.89,
      "learning_rate": 0.001,
      "loss": 2.5627,
      "step": 129612
    },
    {
      "epoch": 24.89,
      "learning_rate": 0.001,
      "loss": 2.5679,
      "step": 129624
    },
    {
      "epoch": 24.89,
      "learning_rate": 0.001,
      "loss": 2.554,
      "step": 129636
    },
    {
      "epoch": 24.89,
      "learning_rate": 0.001,
      "loss": 2.5694,
      "step": 129648
    },
    {
      "epoch": 24.9,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 129660
    },
    {
      "epoch": 24.9,
      "learning_rate": 0.001,
      "loss": 2.5591,
      "step": 129672
    },
    {
      "epoch": 24.9,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 129684
    },
    {
      "epoch": 24.9,
      "learning_rate": 0.001,
      "loss": 2.569,
      "step": 129696
    },
    {
      "epoch": 24.91,
      "learning_rate": 0.001,
      "loss": 2.5593,
      "step": 129708
    },
    {
      "epoch": 24.91,
      "learning_rate": 0.001,
      "loss": 2.5584,
      "step": 129720
    },
    {
      "epoch": 24.91,
      "learning_rate": 0.001,
      "loss": 2.5597,
      "step": 129732
    },
    {
      "epoch": 24.91,
      "learning_rate": 0.001,
      "loss": 2.5599,
      "step": 129744
    },
    {
      "epoch": 24.91,
      "learning_rate": 0.001,
      "loss": 2.5616,
      "step": 129756
    },
    {
      "epoch": 24.92,
      "learning_rate": 0.001,
      "loss": 2.5573,
      "step": 129768
    },
    {
      "epoch": 24.92,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 129780
    },
    {
      "epoch": 24.92,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 129792
    },
    {
      "epoch": 24.92,
      "learning_rate": 0.001,
      "loss": 2.5573,
      "step": 129804
    },
    {
      "epoch": 24.93,
      "learning_rate": 0.001,
      "loss": 2.5674,
      "step": 129816
    },
    {
      "epoch": 24.93,
      "learning_rate": 0.001,
      "loss": 2.5661,
      "step": 129828
    },
    {
      "epoch": 24.93,
      "learning_rate": 0.001,
      "loss": 2.5579,
      "step": 129840
    },
    {
      "epoch": 24.93,
      "learning_rate": 0.001,
      "loss": 2.5613,
      "step": 129852
    },
    {
      "epoch": 24.94,
      "learning_rate": 0.001,
      "loss": 2.5582,
      "step": 129864
    },
    {
      "epoch": 24.94,
      "learning_rate": 0.001,
      "loss": 2.5575,
      "step": 129876
    },
    {
      "epoch": 24.94,
      "learning_rate": 0.001,
      "loss": 2.563,
      "step": 129888
    },
    {
      "epoch": 24.94,
      "learning_rate": 0.001,
      "loss": 2.5615,
      "step": 129900
    },
    {
      "epoch": 24.94,
      "learning_rate": 0.001,
      "loss": 2.5595,
      "step": 129912
    },
    {
      "epoch": 24.95,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 129924
    },
    {
      "epoch": 24.95,
      "learning_rate": 0.001,
      "loss": 2.5616,
      "step": 129936
    },
    {
      "epoch": 24.95,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 129948
    },
    {
      "epoch": 24.95,
      "learning_rate": 0.001,
      "loss": 2.5461,
      "step": 129960
    },
    {
      "epoch": 24.96,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 129972
    },
    {
      "epoch": 24.96,
      "learning_rate": 0.001,
      "loss": 2.5641,
      "step": 129984
    },
    {
      "epoch": 24.96,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 129996
    },
    {
      "epoch": 24.96,
      "eval_ag_news_accuracy": 0.32228125,
      "eval_ag_news_bleu_score": 4.8569670206542535,
      "eval_ag_news_bleu_score_sem": 0.15385616976491615,
      "eval_ag_news_emb_cos_sim": 0.8157208561897278,
      "eval_ag_news_emb_cos_sim_sem": 0.006540289341459325,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5547828674316406,
      "eval_ag_news_n_ngrams_match_1": 14.062,
      "eval_ag_news_n_ngrams_match_2": 3.154,
      "eval_ag_news_n_ngrams_match_3": 0.926,
      "eval_ag_news_num_pred_words": 46.91,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.980223797200544,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.346820653733372,
      "eval_ag_news_runtime": 10.3574,
      "eval_ag_news_samples_per_second": 48.275,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3499038006623323,
      "eval_ag_news_token_set_f1_sem": 0.004577170389225165,
      "eval_ag_news_token_set_precision": 0.33433701648938513,
      "eval_ag_news_token_set_recall": 0.3822383014388413,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 130000
    },
    {
      "epoch": 24.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.1155625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.075671100653126,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11603741744058574,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6625092029571533,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009459481485178075,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.23411226272583,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.152,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.87,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.548,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.383827603751666,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.20985994770471703,
      "eval_anthropic_toxic_prompts_runtime": 9.7717,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.168,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3590743707530839,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006544998491800654,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4327766721258468,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3336060762924124,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 130000
    },
    {
      "epoch": 24.96,
      "eval_arxiv_accuracy": 0.34746875,
      "eval_arxiv_bleu_score": 4.20879397179608,
      "eval_arxiv_bleu_score_sem": 0.11361532866047358,
      "eval_arxiv_emb_cos_sim": 0.7663363218307495,
      "eval_arxiv_emb_cos_sim_sem": 0.007776095525893472,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4092836380004883,
      "eval_arxiv_n_ngrams_match_1": 15.126,
      "eval_arxiv_n_ngrams_match_2": 2.9,
      "eval_arxiv_n_ngrams_match_3": 0.598,
      "eval_arxiv_num_pred_words": 41.1,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.243571152328705,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35988053960868455,
      "eval_arxiv_runtime": 11.026,
      "eval_arxiv_samples_per_second": 45.347,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.35151394019497023,
      "eval_arxiv_token_set_f1_sem": 0.00427413125255946,
      "eval_arxiv_token_set_precision": 0.30549235783273615,
      "eval_arxiv_token_set_recall": 0.43096732321731296,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 130000
    },
    {
      "epoch": 24.96,
      "eval_python_code_alpaca_accuracy": 0.16109375,
      "eval_python_code_alpaca_bleu_score": 4.513227663224373,
      "eval_python_code_alpaca_bleu_score_sem": 0.13573887174341479,
      "eval_python_code_alpaca_emb_cos_sim": 0.7477669715881348,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009026385738236192,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8647093772888184,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.784,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.868,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.938,
      "eval_python_code_alpaca_num_pred_words": 43.394,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.54395379220402,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32830788366778396,
      "eval_python_code_alpaca_runtime": 9.877,
      "eval_python_code_alpaca_samples_per_second": 50.623,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4799839217138252,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005614848629865109,
      "eval_python_code_alpaca_token_set_precision": 0.5326985761324451,
      "eval_python_code_alpaca_token_set_recall": 0.4626581472235106,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 130000
    },
    {
      "epoch": 24.96,
      "eval_wikibio_accuracy": 0.3193125,
      "eval_wikibio_bleu_score": 5.905620036352893,
      "eval_wikibio_bleu_score_sem": 0.20810696312487797,
      "eval_wikibio_emb_cos_sim": 0.7314857244491577,
      "eval_wikibio_emb_cos_sim_sem": 0.011306030407394,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7453036308288574,
      "eval_wikibio_n_ngrams_match_1": 9.974,
      "eval_wikibio_n_ngrams_match_2": 3.396,
      "eval_wikibio_n_ngrams_match_3": 1.258,
      "eval_wikibio_num_pred_words": 36.23,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.32185548823443,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34956803516986734,
      "eval_wikibio_runtime": 9.7783,
      "eval_wikibio_samples_per_second": 51.133,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.31749773947651205,
      "eval_wikibio_token_set_f1_sem": 0.00559183095783198,
      "eval_wikibio_token_set_precision": 0.32478569732171025,
      "eval_wikibio_token_set_recall": 0.32668020057657265,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 130000
    },
    {
      "epoch": 24.96,
      "eval_nq_accuracy": 0.52759375,
      "eval_nq_bleu_score": 11.705387817943874,
      "eval_nq_bleu_score_sem": 0.4739122759241288,
      "eval_nq_emb_cos_sim": 0.8311482667922974,
      "eval_nq_emb_cos_sim_sem": 0.007096436763895811,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1851646900177,
      "eval_nq_n_ngrams_match_1": 23.072,
      "eval_nq_n_ngrams_match_2": 8.552,
      "eval_nq_n_ngrams_match_3": 3.956,
      "eval_nq_num_pred_words": 49.61,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.892112875015261,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4450731379087566,
      "eval_nq_runtime": 10.4336,
      "eval_nq_samples_per_second": 47.922,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.461650529745682,
      "eval_nq_token_set_f1_sem": 0.004983275319338765,
      "eval_nq_token_set_precision": 0.4196844084014667,
      "eval_nq_token_set_recall": 0.5218515770661769,
      "eval_nq_true_num_tokens": 64.0,
      "step": 130000
    },
    {
      "epoch": 24.96,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 130008
    },
    {
      "epoch": 24.97,
      "learning_rate": 0.001,
      "loss": 2.5603,
      "step": 130020
    },
    {
      "epoch": 24.97,
      "learning_rate": 0.001,
      "loss": 2.5549,
      "step": 130032
    },
    {
      "epoch": 24.97,
      "learning_rate": 0.001,
      "loss": 2.5665,
      "step": 130044
    },
    {
      "epoch": 24.97,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 130056
    },
    {
      "epoch": 24.97,
      "learning_rate": 0.001,
      "loss": 2.5564,
      "step": 130068
    },
    {
      "epoch": 24.98,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 130080
    },
    {
      "epoch": 24.98,
      "learning_rate": 0.001,
      "loss": 2.5452,
      "step": 130092
    },
    {
      "epoch": 24.98,
      "learning_rate": 0.001,
      "loss": 2.567,
      "step": 130104
    },
    {
      "epoch": 24.98,
      "learning_rate": 0.001,
      "loss": 2.5579,
      "step": 130116
    },
    {
      "epoch": 24.99,
      "learning_rate": 0.001,
      "loss": 2.5676,
      "step": 130128
    },
    {
      "epoch": 24.99,
      "learning_rate": 0.001,
      "loss": 2.5604,
      "step": 130140
    },
    {
      "epoch": 24.99,
      "learning_rate": 0.001,
      "loss": 2.5604,
      "step": 130152
    },
    {
      "epoch": 24.99,
      "learning_rate": 0.001,
      "loss": 2.5575,
      "step": 130164
    },
    {
      "epoch": 25.0,
      "learning_rate": 0.001,
      "loss": 2.555,
      "step": 130176
    },
    {
      "epoch": 25.0,
      "learning_rate": 0.001,
      "loss": 2.5601,
      "step": 130188
    },
    {
      "epoch": 25.0,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 130200
    },
    {
      "epoch": 25.0,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 130212
    },
    {
      "epoch": 25.0,
      "learning_rate": 0.001,
      "loss": 2.5412,
      "step": 130224
    },
    {
      "epoch": 25.01,
      "learning_rate": 0.001,
      "loss": 2.5395,
      "step": 130236
    },
    {
      "epoch": 25.01,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 130248
    },
    {
      "epoch": 25.01,
      "learning_rate": 0.001,
      "loss": 2.5427,
      "step": 130260
    },
    {
      "epoch": 25.01,
      "learning_rate": 0.001,
      "loss": 2.5468,
      "step": 130272
    },
    {
      "epoch": 25.02,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 130284
    },
    {
      "epoch": 25.02,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 130296
    },
    {
      "epoch": 25.02,
      "learning_rate": 0.001,
      "loss": 2.5341,
      "step": 130308
    },
    {
      "epoch": 25.02,
      "learning_rate": 0.001,
      "loss": 2.5488,
      "step": 130320
    },
    {
      "epoch": 25.03,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 130332
    },
    {
      "epoch": 25.03,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 130344
    },
    {
      "epoch": 25.03,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 130356
    },
    {
      "epoch": 25.03,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 130368
    },
    {
      "epoch": 25.03,
      "learning_rate": 0.001,
      "loss": 2.5467,
      "step": 130380
    },
    {
      "epoch": 25.04,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 130392
    },
    {
      "epoch": 25.04,
      "learning_rate": 0.001,
      "loss": 2.5408,
      "step": 130404
    },
    {
      "epoch": 25.04,
      "learning_rate": 0.001,
      "loss": 2.5479,
      "step": 130416
    },
    {
      "epoch": 25.04,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 130428
    },
    {
      "epoch": 25.05,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 130440
    },
    {
      "epoch": 25.05,
      "learning_rate": 0.001,
      "loss": 2.5492,
      "step": 130452
    },
    {
      "epoch": 25.05,
      "learning_rate": 0.001,
      "loss": 2.5499,
      "step": 130464
    },
    {
      "epoch": 25.05,
      "learning_rate": 0.001,
      "loss": 2.5383,
      "step": 130476
    },
    {
      "epoch": 25.06,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 130488
    },
    {
      "epoch": 25.06,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 130500
    },
    {
      "epoch": 25.06,
      "learning_rate": 0.001,
      "loss": 2.5339,
      "step": 130512
    },
    {
      "epoch": 25.06,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 130524
    },
    {
      "epoch": 25.06,
      "learning_rate": 0.001,
      "loss": 2.5518,
      "step": 130536
    },
    {
      "epoch": 25.07,
      "learning_rate": 0.001,
      "loss": 2.5448,
      "step": 130548
    },
    {
      "epoch": 25.07,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 130560
    },
    {
      "epoch": 25.07,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 130572
    },
    {
      "epoch": 25.07,
      "learning_rate": 0.001,
      "loss": 2.5495,
      "step": 130584
    },
    {
      "epoch": 25.08,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 130596
    },
    {
      "epoch": 25.08,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 130608
    },
    {
      "epoch": 25.08,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 130620
    },
    {
      "epoch": 25.08,
      "eval_ag_news_accuracy": 0.3236875,
      "eval_ag_news_bleu_score": 4.827984117861617,
      "eval_ag_news_bleu_score_sem": 0.15220740571278935,
      "eval_ag_news_emb_cos_sim": 0.8090826272964478,
      "eval_ag_news_emb_cos_sim_sem": 0.007669002359460815,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5572659969329834,
      "eval_ag_news_n_ngrams_match_1": 14.072,
      "eval_ag_news_n_ngrams_match_2": 3.15,
      "eval_ag_news_n_ngrams_match_3": 0.926,
      "eval_ag_news_num_pred_words": 46.744,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 35.06719215503571,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34900649881407964,
      "eval_ag_news_runtime": 10.6427,
      "eval_ag_news_samples_per_second": 46.981,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.35075826680282174,
      "eval_ag_news_token_set_f1_sem": 0.004355908259580651,
      "eval_ag_news_token_set_precision": 0.3363891934917141,
      "eval_ag_news_token_set_recall": 0.38339458619399286,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 130625
    },
    {
      "epoch": 25.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11515625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1666231315005953,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1254007163647972,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6742832660675049,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009745622366619106,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2476534843444824,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.272,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.916,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.71,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.444,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.729893427868554,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21501414096227137,
      "eval_anthropic_toxic_prompts_runtime": 9.6679,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.717,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3578723173352975,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006666168942377207,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43911005805304826,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32638078974927826,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 130625
    },
    {
      "epoch": 25.08,
      "eval_arxiv_accuracy": 0.34775,
      "eval_arxiv_bleu_score": 4.346147228921791,
      "eval_arxiv_bleu_score_sem": 0.12009390396867743,
      "eval_arxiv_emb_cos_sim": 0.7748437523841858,
      "eval_arxiv_emb_cos_sim_sem": 0.00682109140257414,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4118895530700684,
      "eval_arxiv_n_ngrams_match_1": 15.442,
      "eval_arxiv_n_ngrams_match_2": 3.056,
      "eval_arxiv_n_ngrams_match_3": 0.65,
      "eval_arxiv_num_pred_words": 41.424,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.322486108331084,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.365002423676854,
      "eval_arxiv_runtime": 10.2155,
      "eval_arxiv_samples_per_second": 48.945,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.35929546926595846,
      "eval_arxiv_token_set_f1_sem": 0.004223968695648756,
      "eval_arxiv_token_set_precision": 0.31240521488793094,
      "eval_arxiv_token_set_recall": 0.4388999656542089,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 130625
    },
    {
      "epoch": 25.08,
      "eval_python_code_alpaca_accuracy": 0.16159375,
      "eval_python_code_alpaca_bleu_score": 4.700190184608433,
      "eval_python_code_alpaca_bleu_score_sem": 0.14783404668100003,
      "eval_python_code_alpaca_emb_cos_sim": 0.7699594497680664,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008064217967759024,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.882859468460083,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.07,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.972,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.042,
      "eval_python_code_alpaca_num_pred_words": 44.262,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.865285431112,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33557557881595146,
      "eval_python_code_alpaca_runtime": 10.2308,
      "eval_python_code_alpaca_samples_per_second": 48.872,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.48789019025988795,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005465768917660974,
      "eval_python_code_alpaca_token_set_precision": 0.5520485122298064,
      "eval_python_code_alpaca_token_set_recall": 0.4612636806224925,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 130625
    },
    {
      "epoch": 25.08,
      "eval_wikibio_accuracy": 0.3235,
      "eval_wikibio_bleu_score": 5.884867750372959,
      "eval_wikibio_bleu_score_sem": 0.204063171310663,
      "eval_wikibio_emb_cos_sim": 0.7450792789459229,
      "eval_wikibio_emb_cos_sim_sem": 0.009133146339692318,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7214627265930176,
      "eval_wikibio_n_ngrams_match_1": 9.944,
      "eval_wikibio_n_ngrams_match_2": 3.312,
      "eval_wikibio_n_ngrams_match_3": 1.164,
      "eval_wikibio_num_pred_words": 35.418,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.32479680076062,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3543727444295229,
      "eval_wikibio_runtime": 11.0114,
      "eval_wikibio_samples_per_second": 45.408,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.3176341096998218,
      "eval_wikibio_token_set_f1_sem": 0.0055624397332888,
      "eval_wikibio_token_set_precision": 0.3237973147406713,
      "eval_wikibio_token_set_recall": 0.32794535105596967,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 130625
    },
    {
      "epoch": 25.08,
      "eval_nq_accuracy": 0.528625,
      "eval_nq_bleu_score": 11.67742032842143,
      "eval_nq_bleu_score_sem": 0.4638451677287935,
      "eval_nq_emb_cos_sim": 0.8356366157531738,
      "eval_nq_emb_cos_sim_sem": 0.006841456891483506,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1816728115081787,
      "eval_nq_n_ngrams_match_1": 23.31,
      "eval_nq_n_ngrams_match_2": 8.57,
      "eval_nq_n_ngrams_match_3": 3.844,
      "eval_nq_num_pred_words": 49.382,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.86111684584193,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4483338264516282,
      "eval_nq_runtime": 10.3559,
      "eval_nq_samples_per_second": 48.282,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4641876513952255,
      "eval_nq_token_set_f1_sem": 0.00501728527338106,
      "eval_nq_token_set_precision": 0.42385992188355665,
      "eval_nq_token_set_recall": 0.5197076134214403,
      "eval_nq_true_num_tokens": 64.0,
      "step": 130625
    },
    {
      "epoch": 25.08,
      "learning_rate": 0.001,
      "loss": 2.5493,
      "step": 130632
    },
    {
      "epoch": 25.09,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 130644
    },
    {
      "epoch": 25.09,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 130656
    },
    {
      "epoch": 25.09,
      "learning_rate": 0.001,
      "loss": 2.548,
      "step": 130668
    },
    {
      "epoch": 25.09,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 130680
    },
    {
      "epoch": 25.09,
      "learning_rate": 0.001,
      "loss": 2.5407,
      "step": 130692
    },
    {
      "epoch": 25.1,
      "learning_rate": 0.001,
      "loss": 2.5408,
      "step": 130704
    },
    {
      "epoch": 25.1,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 130716
    },
    {
      "epoch": 25.1,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 130728
    },
    {
      "epoch": 25.1,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 130740
    },
    {
      "epoch": 25.11,
      "learning_rate": 0.001,
      "loss": 2.5332,
      "step": 130752
    },
    {
      "epoch": 25.11,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 130764
    },
    {
      "epoch": 25.11,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 130776
    },
    {
      "epoch": 25.11,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 130788
    },
    {
      "epoch": 25.12,
      "learning_rate": 0.001,
      "loss": 2.5474,
      "step": 130800
    },
    {
      "epoch": 25.12,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 130812
    },
    {
      "epoch": 25.12,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 130824
    },
    {
      "epoch": 25.12,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 130836
    },
    {
      "epoch": 25.12,
      "learning_rate": 0.001,
      "loss": 2.5469,
      "step": 130848
    },
    {
      "epoch": 25.13,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 130860
    },
    {
      "epoch": 25.13,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 130872
    },
    {
      "epoch": 25.13,
      "learning_rate": 0.001,
      "loss": 2.5469,
      "step": 130884
    },
    {
      "epoch": 25.13,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 130896
    },
    {
      "epoch": 25.14,
      "learning_rate": 0.001,
      "loss": 2.565,
      "step": 130908
    },
    {
      "epoch": 25.14,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 130920
    },
    {
      "epoch": 25.14,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 130932
    },
    {
      "epoch": 25.14,
      "learning_rate": 0.001,
      "loss": 2.5386,
      "step": 130944
    },
    {
      "epoch": 25.15,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 130956
    },
    {
      "epoch": 25.15,
      "learning_rate": 0.001,
      "loss": 2.545,
      "step": 130968
    },
    {
      "epoch": 25.15,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 130980
    },
    {
      "epoch": 25.15,
      "learning_rate": 0.001,
      "loss": 2.5493,
      "step": 130992
    },
    {
      "epoch": 25.15,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 131004
    },
    {
      "epoch": 25.16,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 131016
    },
    {
      "epoch": 25.16,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 131028
    },
    {
      "epoch": 25.16,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 131040
    },
    {
      "epoch": 25.16,
      "learning_rate": 0.001,
      "loss": 2.546,
      "step": 131052
    },
    {
      "epoch": 25.17,
      "learning_rate": 0.001,
      "loss": 2.5387,
      "step": 131064
    },
    {
      "epoch": 25.17,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 131076
    },
    {
      "epoch": 25.17,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 131088
    },
    {
      "epoch": 25.17,
      "learning_rate": 0.001,
      "loss": 2.5598,
      "step": 131100
    },
    {
      "epoch": 25.18,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 131112
    },
    {
      "epoch": 25.18,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 131124
    },
    {
      "epoch": 25.18,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 131136
    },
    {
      "epoch": 25.18,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 131148
    },
    {
      "epoch": 25.18,
      "learning_rate": 0.001,
      "loss": 2.5502,
      "step": 131160
    },
    {
      "epoch": 25.19,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 131172
    },
    {
      "epoch": 25.19,
      "learning_rate": 0.001,
      "loss": 2.5419,
      "step": 131184
    },
    {
      "epoch": 25.19,
      "learning_rate": 0.001,
      "loss": 2.5449,
      "step": 131196
    },
    {
      "epoch": 25.19,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 131208
    },
    {
      "epoch": 25.2,
      "learning_rate": 0.001,
      "loss": 2.5546,
      "step": 131220
    },
    {
      "epoch": 25.2,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 131232
    },
    {
      "epoch": 25.2,
      "learning_rate": 0.001,
      "loss": 2.5495,
      "step": 131244
    },
    {
      "epoch": 25.2,
      "eval_ag_news_accuracy": 0.32171875,
      "eval_ag_news_bleu_score": 4.805631448252006,
      "eval_ag_news_bleu_score_sem": 0.15731309758821782,
      "eval_ag_news_emb_cos_sim": 0.8115901947021484,
      "eval_ag_news_emb_cos_sim_sem": 0.007044800947569394,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5540506839752197,
      "eval_ag_news_n_ngrams_match_1": 14.088,
      "eval_ag_news_n_ngrams_match_2": 3.062,
      "eval_ag_news_n_ngrams_match_3": 0.91,
      "eval_ag_news_num_pred_words": 46.798,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.95462123006617,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3487181900166695,
      "eval_ag_news_runtime": 10.2804,
      "eval_ag_news_samples_per_second": 48.636,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3539297425194083,
      "eval_ag_news_token_set_f1_sem": 0.004664780213336639,
      "eval_ag_news_token_set_precision": 0.3368518935516282,
      "eval_ag_news_token_set_recall": 0.39110883270092806,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 131250
    },
    {
      "epoch": 25.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.11459375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1142799442175075,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1180712984255266,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6735135316848755,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0087461631962884,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2482686042785645,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.684,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.74572526695856,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21633015064721026,
      "eval_anthropic_toxic_prompts_runtime": 10.6216,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.074,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.094,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3698789348153006,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006514893728509565,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44766389158759734,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.34046532730912593,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 131250
    },
    {
      "epoch": 25.2,
      "eval_arxiv_accuracy": 0.34784375,
      "eval_arxiv_bleu_score": 4.252698195553729,
      "eval_arxiv_bleu_score_sem": 0.125878825147803,
      "eval_arxiv_emb_cos_sim": 0.7599482536315918,
      "eval_arxiv_emb_cos_sim_sem": 0.008686516186022375,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.402564764022827,
      "eval_arxiv_n_ngrams_match_1": 14.614,
      "eval_arxiv_n_ngrams_match_2": 2.878,
      "eval_arxiv_n_ngrams_match_3": 0.644,
      "eval_arxiv_num_pred_words": 39.636,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.04104952962554,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35258337465492606,
      "eval_arxiv_runtime": 10.1102,
      "eval_arxiv_samples_per_second": 49.455,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3471174740892369,
      "eval_arxiv_token_set_f1_sem": 0.004402086723114342,
      "eval_arxiv_token_set_precision": 0.29591355807088987,
      "eval_arxiv_token_set_recall": 0.4433432911664083,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 131250
    },
    {
      "epoch": 25.2,
      "eval_python_code_alpaca_accuracy": 0.16275,
      "eval_python_code_alpaca_bleu_score": 4.569554837567205,
      "eval_python_code_alpaca_bleu_score_sem": 0.13904510355918395,
      "eval_python_code_alpaca_emb_cos_sim": 0.7596986293792725,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008280630181061321,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8721566200256348,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.79,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.822,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.936,
      "eval_python_code_alpaca_num_pred_words": 43.292,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.675095590953486,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3358129209246936,
      "eval_python_code_alpaca_runtime": 9.8984,
      "eval_python_code_alpaca_samples_per_second": 50.513,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4789527117372141,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005252355757502807,
      "eval_python_code_alpaca_token_set_precision": 0.5335525771130952,
      "eval_python_code_alpaca_token_set_recall": 0.4589934698895055,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 131250
    },
    {
      "epoch": 25.2,
      "eval_wikibio_accuracy": 0.32084375,
      "eval_wikibio_bleu_score": 5.760687101937728,
      "eval_wikibio_bleu_score_sem": 0.19543921355987715,
      "eval_wikibio_emb_cos_sim": 0.7488494515419006,
      "eval_wikibio_emb_cos_sim_sem": 0.009071617867766984,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.730569362640381,
      "eval_wikibio_n_ngrams_match_1": 10.176,
      "eval_wikibio_n_ngrams_match_2": 3.314,
      "eval_wikibio_n_ngrams_match_3": 1.156,
      "eval_wikibio_num_pred_words": 36.604,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.70284544802238,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3533984380167211,
      "eval_wikibio_runtime": 11.2338,
      "eval_wikibio_samples_per_second": 44.509,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.3214486717882299,
      "eval_wikibio_token_set_f1_sem": 0.0053460076038565115,
      "eval_wikibio_token_set_precision": 0.3303092102677939,
      "eval_wikibio_token_set_recall": 0.32763994857562584,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 131250
    },
    {
      "epoch": 25.2,
      "eval_nq_accuracy": 0.52728125,
      "eval_nq_bleu_score": 11.561191462132927,
      "eval_nq_bleu_score_sem": 0.4662156399093546,
      "eval_nq_emb_cos_sim": 0.8296736478805542,
      "eval_nq_emb_cos_sim_sem": 0.008866525376012402,
      "eval_nq_emb_top1_equal": 0.3359375,
      "eval_nq_emb_top1_equal_sem": 0.04191137143408563,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1852967739105225,
      "eval_nq_n_ngrams_match_1": 23.062,
      "eval_nq_n_ngrams_match_2": 8.428,
      "eval_nq_n_ngrams_match_3": 3.824,
      "eval_nq_num_pred_words": 49.076,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.893287457469212,
      "eval_nq_pred_num_tokens": 62.9921875,
      "eval_nq_rouge_score": 0.44907523634455804,
      "eval_nq_runtime": 10.4274,
      "eval_nq_samples_per_second": 47.951,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46337964098634604,
      "eval_nq_token_set_f1_sem": 0.004823173527116388,
      "eval_nq_token_set_precision": 0.4205088953532891,
      "eval_nq_token_set_recall": 0.5258761476364512,
      "eval_nq_true_num_tokens": 64.0,
      "step": 131250
    },
    {
      "epoch": 25.2,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 131256
    },
    {
      "epoch": 25.21,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 131268
    },
    {
      "epoch": 25.21,
      "learning_rate": 0.001,
      "loss": 2.5533,
      "step": 131280
    },
    {
      "epoch": 25.21,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 131292
    },
    {
      "epoch": 25.21,
      "learning_rate": 0.001,
      "loss": 2.5498,
      "step": 131304
    },
    {
      "epoch": 25.21,
      "learning_rate": 0.001,
      "loss": 2.5542,
      "step": 131316
    },
    {
      "epoch": 25.22,
      "learning_rate": 0.001,
      "loss": 2.5348,
      "step": 131328
    },
    {
      "epoch": 25.22,
      "learning_rate": 0.001,
      "loss": 2.5387,
      "step": 131340
    },
    {
      "epoch": 25.22,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 131352
    },
    {
      "epoch": 25.22,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 131364
    },
    {
      "epoch": 25.23,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 131376
    },
    {
      "epoch": 25.23,
      "learning_rate": 0.001,
      "loss": 2.5479,
      "step": 131388
    },
    {
      "epoch": 25.23,
      "learning_rate": 0.001,
      "loss": 2.5509,
      "step": 131400
    },
    {
      "epoch": 25.23,
      "learning_rate": 0.001,
      "loss": 2.5557,
      "step": 131412
    },
    {
      "epoch": 25.24,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 131424
    },
    {
      "epoch": 25.24,
      "learning_rate": 0.001,
      "loss": 2.5403,
      "step": 131436
    },
    {
      "epoch": 25.24,
      "learning_rate": 0.001,
      "loss": 2.5495,
      "step": 131448
    },
    {
      "epoch": 25.24,
      "learning_rate": 0.001,
      "loss": 2.5548,
      "step": 131460
    },
    {
      "epoch": 25.24,
      "learning_rate": 0.001,
      "loss": 2.5526,
      "step": 131472
    },
    {
      "epoch": 25.25,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 131484
    },
    {
      "epoch": 25.25,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 131496
    },
    {
      "epoch": 25.25,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 131508
    },
    {
      "epoch": 25.25,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 131520
    },
    {
      "epoch": 25.26,
      "learning_rate": 0.001,
      "loss": 2.5534,
      "step": 131532
    },
    {
      "epoch": 25.26,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 131544
    },
    {
      "epoch": 25.26,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 131556
    },
    {
      "epoch": 25.26,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 131568
    },
    {
      "epoch": 25.26,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 131580
    },
    {
      "epoch": 25.27,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 131592
    },
    {
      "epoch": 25.27,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 131604
    },
    {
      "epoch": 25.27,
      "learning_rate": 0.001,
      "loss": 2.5533,
      "step": 131616
    },
    {
      "epoch": 25.27,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 131628
    },
    {
      "epoch": 25.28,
      "learning_rate": 0.001,
      "loss": 2.5453,
      "step": 131640
    },
    {
      "epoch": 25.28,
      "learning_rate": 0.001,
      "loss": 2.5584,
      "step": 131652
    },
    {
      "epoch": 25.28,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 131664
    },
    {
      "epoch": 25.28,
      "learning_rate": 0.001,
      "loss": 2.5506,
      "step": 131676
    },
    {
      "epoch": 25.29,
      "learning_rate": 0.001,
      "loss": 2.5584,
      "step": 131688
    },
    {
      "epoch": 25.29,
      "learning_rate": 0.001,
      "loss": 2.5578,
      "step": 131700
    },
    {
      "epoch": 25.29,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 131712
    },
    {
      "epoch": 25.29,
      "learning_rate": 0.001,
      "loss": 2.5587,
      "step": 131724
    },
    {
      "epoch": 25.29,
      "learning_rate": 0.001,
      "loss": 2.5574,
      "step": 131736
    },
    {
      "epoch": 25.3,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 131748
    },
    {
      "epoch": 25.3,
      "learning_rate": 0.001,
      "loss": 2.5475,
      "step": 131760
    },
    {
      "epoch": 25.3,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 131772
    },
    {
      "epoch": 25.3,
      "learning_rate": 0.001,
      "loss": 2.5556,
      "step": 131784
    },
    {
      "epoch": 25.31,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 131796
    },
    {
      "epoch": 25.31,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 131808
    },
    {
      "epoch": 25.31,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 131820
    },
    {
      "epoch": 25.31,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 131832
    },
    {
      "epoch": 25.32,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 131844
    },
    {
      "epoch": 25.32,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 131856
    },
    {
      "epoch": 25.32,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 131868
    },
    {
      "epoch": 25.32,
      "eval_ag_news_accuracy": 0.3233125,
      "eval_ag_news_bleu_score": 4.914054492516313,
      "eval_ag_news_bleu_score_sem": 0.15882457708417028,
      "eval_ag_news_emb_cos_sim": 0.8165811896324158,
      "eval_ag_news_emb_cos_sim_sem": 0.006833821067913446,
      "eval_ag_news_emb_top1_equal": 0.3046875,
      "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5426907539367676,
      "eval_ag_news_n_ngrams_match_1": 14.02,
      "eval_ag_news_n_ngrams_match_2": 3.194,
      "eval_ag_news_n_ngrams_match_3": 0.924,
      "eval_ag_news_num_pred_words": 46.3,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.55978607429364,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3476716971761488,
      "eval_ag_news_runtime": 10.1809,
      "eval_ag_news_samples_per_second": 49.112,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3514273899288549,
      "eval_ag_news_token_set_f1_sem": 0.004484274728591805,
      "eval_ag_news_token_set_precision": 0.33579671758364776,
      "eval_ag_news_token_set_recall": 0.38635494547837046,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 131875
    },
    {
      "epoch": 25.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.11375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.085019595742232,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11654913967861044,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6766707897186279,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008607759531223705,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.253098726272583,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.142,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.85,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.676,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.986,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.87038106983766,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21420840518099032,
      "eval_anthropic_toxic_prompts_runtime": 9.6824,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.64,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3577461416490362,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006384952942136405,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4404934531102299,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3272516559822401,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 131875
    },
    {
      "epoch": 25.32,
      "eval_arxiv_accuracy": 0.34528125,
      "eval_arxiv_bleu_score": 4.265711408339266,
      "eval_arxiv_bleu_score_sem": 0.12273177469112136,
      "eval_arxiv_emb_cos_sim": 0.7665755748748779,
      "eval_arxiv_emb_cos_sim_sem": 0.00850778523092234,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4275219440460205,
      "eval_arxiv_n_ngrams_match_1": 15.034,
      "eval_arxiv_n_ngrams_match_2": 2.91,
      "eval_arxiv_n_ngrams_match_3": 0.61,
      "eval_arxiv_num_pred_words": 40.14,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.800223426058317,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36384411351929746,
      "eval_arxiv_runtime": 10.0873,
      "eval_arxiv_samples_per_second": 49.567,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3560836132591048,
      "eval_arxiv_token_set_f1_sem": 0.004144378183015886,
      "eval_arxiv_token_set_precision": 0.30640336475178753,
      "eval_arxiv_token_set_recall": 0.4413355803588468,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 131875
    },
    {
      "epoch": 25.32,
      "eval_python_code_alpaca_accuracy": 0.16096875,
      "eval_python_code_alpaca_bleu_score": 4.849660229188227,
      "eval_python_code_alpaca_bleu_score_sem": 0.15057476290733582,
      "eval_python_code_alpaca_emb_cos_sim": 0.7644432783126831,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00885730706998966,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.857652187347412,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.896,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.948,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.032,
      "eval_python_code_alpaca_num_pred_words": 42.368,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.420578630989453,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34201870244315064,
      "eval_python_code_alpaca_runtime": 9.6805,
      "eval_python_code_alpaca_samples_per_second": 51.65,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.4788301706153694,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005446574107318026,
      "eval_python_code_alpaca_token_set_precision": 0.5419286879636789,
      "eval_python_code_alpaca_token_set_recall": 0.45094325898384224,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 131875
    },
    {
      "epoch": 25.32,
      "eval_wikibio_accuracy": 0.32046875,
      "eval_wikibio_bleu_score": 6.115802189788201,
      "eval_wikibio_bleu_score_sem": 0.2152826429351957,
      "eval_wikibio_emb_cos_sim": 0.7278193235397339,
      "eval_wikibio_emb_cos_sim_sem": 0.010483447072284342,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7603769302368164,
      "eval_wikibio_n_ngrams_match_1": 10.044,
      "eval_wikibio_n_ngrams_match_2": 3.428,
      "eval_wikibio_n_ngrams_match_3": 1.27,
      "eval_wikibio_num_pred_words": 35.51,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.96461759050041,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3536068608678047,
      "eval_wikibio_runtime": 9.9175,
      "eval_wikibio_samples_per_second": 50.416,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.31950334286278564,
      "eval_wikibio_token_set_f1_sem": 0.0054629541468927826,
      "eval_wikibio_token_set_precision": 0.3268275953142922,
      "eval_wikibio_token_set_recall": 0.32785432442488327,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 131875
    },
    {
      "epoch": 25.32,
      "eval_nq_accuracy": 0.52884375,
      "eval_nq_bleu_score": 11.720867354101722,
      "eval_nq_bleu_score_sem": 0.47696404351242483,
      "eval_nq_emb_cos_sim": 0.8370710611343384,
      "eval_nq_emb_cos_sim_sem": 0.00703817862399712,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.183683395385742,
      "eval_nq_n_ngrams_match_1": 22.91,
      "eval_nq_n_ngrams_match_2": 8.512,
      "eval_nq_n_ngrams_match_3": 3.944,
      "eval_nq_num_pred_words": 48.704,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.878950786818756,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4457343118349473,
      "eval_nq_runtime": 11.5829,
      "eval_nq_samples_per_second": 43.167,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4591167797022442,
      "eval_nq_token_set_f1_sem": 0.004982949629145637,
      "eval_nq_token_set_precision": 0.4157857532761781,
      "eval_nq_token_set_recall": 0.5211131689654769,
      "eval_nq_true_num_tokens": 64.0,
      "step": 131875
    },
    {
      "epoch": 25.32,
      "learning_rate": 0.001,
      "loss": 2.5571,
      "step": 131880
    },
    {
      "epoch": 25.32,
      "learning_rate": 0.001,
      "loss": 2.5511,
      "step": 131892
    },
    {
      "epoch": 25.33,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 131904
    },
    {
      "epoch": 25.33,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 131916
    },
    {
      "epoch": 25.33,
      "learning_rate": 0.001,
      "loss": 2.5511,
      "step": 131928
    },
    {
      "epoch": 25.33,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 131940
    },
    {
      "epoch": 25.34,
      "learning_rate": 0.001,
      "loss": 2.5545,
      "step": 131952
    },
    {
      "epoch": 25.34,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 131964
    },
    {
      "epoch": 25.34,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 131976
    },
    {
      "epoch": 25.34,
      "learning_rate": 0.001,
      "loss": 2.559,
      "step": 131988
    },
    {
      "epoch": 25.35,
      "learning_rate": 0.001,
      "loss": 2.5575,
      "step": 132000
    },
    {
      "epoch": 25.35,
      "learning_rate": 0.001,
      "loss": 2.5597,
      "step": 132012
    },
    {
      "epoch": 25.35,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 132024
    },
    {
      "epoch": 25.35,
      "learning_rate": 0.001,
      "loss": 2.5592,
      "step": 132036
    },
    {
      "epoch": 25.35,
      "learning_rate": 0.001,
      "loss": 2.5445,
      "step": 132048
    },
    {
      "epoch": 25.36,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 132060
    },
    {
      "epoch": 25.36,
      "learning_rate": 0.001,
      "loss": 2.5517,
      "step": 132072
    },
    {
      "epoch": 25.36,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 132084
    },
    {
      "epoch": 25.36,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 132096
    },
    {
      "epoch": 25.37,
      "learning_rate": 0.001,
      "loss": 2.5469,
      "step": 132108
    },
    {
      "epoch": 25.37,
      "learning_rate": 0.001,
      "loss": 2.5631,
      "step": 132120
    },
    {
      "epoch": 25.37,
      "learning_rate": 0.001,
      "loss": 2.5572,
      "step": 132132
    },
    {
      "epoch": 25.37,
      "learning_rate": 0.001,
      "loss": 2.546,
      "step": 132144
    },
    {
      "epoch": 25.38,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 132156
    },
    {
      "epoch": 25.38,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 132168
    },
    {
      "epoch": 25.38,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 132180
    },
    {
      "epoch": 25.38,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 132192
    },
    {
      "epoch": 25.38,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 132204
    },
    {
      "epoch": 25.39,
      "learning_rate": 0.001,
      "loss": 2.5454,
      "step": 132216
    },
    {
      "epoch": 25.39,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 132228
    },
    {
      "epoch": 25.39,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 132240
    },
    {
      "epoch": 25.39,
      "learning_rate": 0.001,
      "loss": 2.555,
      "step": 132252
    },
    {
      "epoch": 25.4,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 132264
    },
    {
      "epoch": 25.4,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 132276
    },
    {
      "epoch": 25.4,
      "learning_rate": 0.001,
      "loss": 2.545,
      "step": 132288
    },
    {
      "epoch": 25.4,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 132300
    },
    {
      "epoch": 25.41,
      "learning_rate": 0.001,
      "loss": 2.5493,
      "step": 132312
    },
    {
      "epoch": 25.41,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 132324
    },
    {
      "epoch": 25.41,
      "learning_rate": 0.001,
      "loss": 2.5535,
      "step": 132336
    },
    {
      "epoch": 25.41,
      "learning_rate": 0.001,
      "loss": 2.5464,
      "step": 132348
    },
    {
      "epoch": 25.41,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 132360
    },
    {
      "epoch": 25.42,
      "learning_rate": 0.001,
      "loss": 2.5601,
      "step": 132372
    },
    {
      "epoch": 25.42,
      "learning_rate": 0.001,
      "loss": 2.5565,
      "step": 132384
    },
    {
      "epoch": 25.42,
      "learning_rate": 0.001,
      "loss": 2.5552,
      "step": 132396
    },
    {
      "epoch": 25.42,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 132408
    },
    {
      "epoch": 25.43,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 132420
    },
    {
      "epoch": 25.43,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 132432
    },
    {
      "epoch": 25.43,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 132444
    },
    {
      "epoch": 25.43,
      "learning_rate": 0.001,
      "loss": 2.542,
      "step": 132456
    },
    {
      "epoch": 25.44,
      "learning_rate": 0.001,
      "loss": 2.5554,
      "step": 132468
    },
    {
      "epoch": 25.44,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 132480
    },
    {
      "epoch": 25.44,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 132492
    },
    {
      "epoch": 25.44,
      "eval_ag_news_accuracy": 0.322875,
      "eval_ag_news_bleu_score": 4.7698868245981245,
      "eval_ag_news_bleu_score_sem": 0.15752912959053497,
      "eval_ag_news_emb_cos_sim": 0.8068827390670776,
      "eval_ag_news_emb_cos_sim_sem": 0.007690376080011901,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5438425540924072,
      "eval_ag_news_n_ngrams_match_1": 13.864,
      "eval_ag_news_n_ngrams_match_2": 3.016,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.124,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.59961497433629,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3477881285632682,
      "eval_ag_news_runtime": 10.7334,
      "eval_ag_news_samples_per_second": 46.584,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.3470764898765906,
      "eval_ag_news_token_set_f1_sem": 0.0044581620414478424,
      "eval_ag_news_token_set_precision": 0.3307261801418196,
      "eval_ag_news_token_set_recall": 0.3841999551202603,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 132500
    },
    {
      "epoch": 25.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.11340625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.089625103414488,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11688325696314535,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.669338583946228,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00898543283091827,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.279155731201172,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.098,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.864,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.688,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.358,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.553345072983696,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2102696532187595,
      "eval_anthropic_toxic_prompts_runtime": 10.1677,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.176,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36356602580647374,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006562408310480911,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.431138789570558,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.34125571557806916,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 132500
    },
    {
      "epoch": 25.44,
      "eval_arxiv_accuracy": 0.34915625,
      "eval_arxiv_bleu_score": 4.351104214910977,
      "eval_arxiv_bleu_score_sem": 0.1201916460933994,
      "eval_arxiv_emb_cos_sim": 0.7684433460235596,
      "eval_arxiv_emb_cos_sim_sem": 0.007544569057588464,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.400456428527832,
      "eval_arxiv_n_ngrams_match_1": 15.238,
      "eval_arxiv_n_ngrams_match_2": 2.966,
      "eval_arxiv_n_ngrams_match_3": 0.652,
      "eval_arxiv_num_pred_words": 40.916,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.977779639109926,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3641389821534941,
      "eval_arxiv_runtime": 10.3876,
      "eval_arxiv_samples_per_second": 48.134,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.3555969960165585,
      "eval_arxiv_token_set_f1_sem": 0.003999183443474901,
      "eval_arxiv_token_set_precision": 0.3098904766935479,
      "eval_arxiv_token_set_recall": 0.4295684457700368,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 132500
    },
    {
      "epoch": 25.44,
      "eval_python_code_alpaca_accuracy": 0.15925,
      "eval_python_code_alpaca_bleu_score": 4.565926151490827,
      "eval_python_code_alpaca_bleu_score_sem": 0.15168405190953407,
      "eval_python_code_alpaca_emb_cos_sim": 0.7494837045669556,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009453031700266892,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9003562927246094,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.74,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.772,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.936,
      "eval_python_code_alpaca_num_pred_words": 43.198,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.18062183890515,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3312216624541677,
      "eval_python_code_alpaca_runtime": 11.5417,
      "eval_python_code_alpaca_samples_per_second": 43.321,
      "eval_python_code_alpaca_steps_per_second": 0.087,
      "eval_python_code_alpaca_token_set_f1": 0.4819077003065002,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005277984873226256,
      "eval_python_code_alpaca_token_set_precision": 0.5340350551546666,
      "eval_python_code_alpaca_token_set_recall": 0.45880801977434044,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 132500
    },
    {
      "epoch": 25.44,
      "eval_wikibio_accuracy": 0.32434375,
      "eval_wikibio_bleu_score": 6.027446613976045,
      "eval_wikibio_bleu_score_sem": 0.22882628857670487,
      "eval_wikibio_emb_cos_sim": 0.7307478785514832,
      "eval_wikibio_emb_cos_sim_sem": 0.00914073277242428,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.695962429046631,
      "eval_wikibio_n_ngrams_match_1": 9.754,
      "eval_wikibio_n_ngrams_match_2": 3.346,
      "eval_wikibio_n_ngrams_match_3": 1.246,
      "eval_wikibio_num_pred_words": 35.07,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.28432474113627,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34753822265327605,
      "eval_wikibio_runtime": 10.1668,
      "eval_wikibio_samples_per_second": 49.18,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.3134439116195845,
      "eval_wikibio_token_set_f1_sem": 0.005674375377611694,
      "eval_wikibio_token_set_precision": 0.3178393349170109,
      "eval_wikibio_token_set_recall": 0.32681329408075077,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 132500
    },
    {
      "epoch": 25.44,
      "eval_nq_accuracy": 0.52809375,
      "eval_nq_bleu_score": 11.53092895746729,
      "eval_nq_bleu_score_sem": 0.4843067273193696,
      "eval_nq_emb_cos_sim": 0.8284263014793396,
      "eval_nq_emb_cos_sim_sem": 0.007505142656440047,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1828134059906006,
      "eval_nq_n_ngrams_match_1": 22.816,
      "eval_nq_n_ngrams_match_2": 8.256,
      "eval_nq_n_ngrams_match_3": 3.816,
      "eval_nq_num_pred_words": 48.766,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.871229552977024,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44225201895672694,
      "eval_nq_runtime": 10.5392,
      "eval_nq_samples_per_second": 47.442,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.456554878112556,
      "eval_nq_token_set_f1_sem": 0.004977605428584864,
      "eval_nq_token_set_precision": 0.41275398990833295,
      "eval_nq_token_set_recall": 0.520890101964246,
      "eval_nq_true_num_tokens": 64.0,
      "step": 132500
    },
    {
      "epoch": 25.44,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 132504
    },
    {
      "epoch": 25.44,
      "learning_rate": 0.001,
      "loss": 2.5508,
      "step": 132516
    },
    {
      "epoch": 25.45,
      "learning_rate": 0.001,
      "loss": 2.5597,
      "step": 132528
    },
    {
      "epoch": 25.45,
      "learning_rate": 0.001,
      "loss": 2.5504,
      "step": 132540
    },
    {
      "epoch": 25.45,
      "learning_rate": 0.001,
      "loss": 2.5359,
      "step": 132552
    },
    {
      "epoch": 25.45,
      "learning_rate": 0.001,
      "loss": 2.5543,
      "step": 132564
    },
    {
      "epoch": 25.46,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 132576
    },
    {
      "epoch": 25.46,
      "learning_rate": 0.001,
      "loss": 2.5508,
      "step": 132588
    },
    {
      "epoch": 25.46,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 132600
    },
    {
      "epoch": 25.46,
      "learning_rate": 0.001,
      "loss": 2.5501,
      "step": 132612
    },
    {
      "epoch": 25.47,
      "learning_rate": 0.001,
      "loss": 2.5553,
      "step": 132624
    },
    {
      "epoch": 25.47,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 132636
    },
    {
      "epoch": 25.47,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 132648
    },
    {
      "epoch": 25.47,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 132660
    },
    {
      "epoch": 25.47,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 132672
    },
    {
      "epoch": 25.48,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 132684
    },
    {
      "epoch": 25.48,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 132696
    },
    {
      "epoch": 25.48,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 132708
    },
    {
      "epoch": 25.48,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 132720
    },
    {
      "epoch": 25.49,
      "learning_rate": 0.001,
      "loss": 2.5537,
      "step": 132732
    },
    {
      "epoch": 25.49,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 132744
    },
    {
      "epoch": 25.49,
      "learning_rate": 0.001,
      "loss": 2.5513,
      "step": 132756
    },
    {
      "epoch": 25.49,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 132768
    },
    {
      "epoch": 25.5,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 132780
    },
    {
      "epoch": 25.5,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 132792
    },
    {
      "epoch": 25.5,
      "learning_rate": 0.001,
      "loss": 2.5556,
      "step": 132804
    },
    {
      "epoch": 25.5,
      "learning_rate": 0.001,
      "loss": 2.554,
      "step": 132816
    },
    {
      "epoch": 25.5,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 132828
    },
    {
      "epoch": 25.51,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 132840
    },
    {
      "epoch": 25.51,
      "learning_rate": 0.001,
      "loss": 2.5556,
      "step": 132852
    },
    {
      "epoch": 25.51,
      "learning_rate": 0.001,
      "loss": 2.5537,
      "step": 132864
    },
    {
      "epoch": 25.51,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 132876
    },
    {
      "epoch": 25.52,
      "learning_rate": 0.001,
      "loss": 2.5504,
      "step": 132888
    },
    {
      "epoch": 25.52,
      "learning_rate": 0.001,
      "loss": 2.5531,
      "step": 132900
    },
    {
      "epoch": 25.52,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 132912
    },
    {
      "epoch": 25.52,
      "learning_rate": 0.001,
      "loss": 2.5554,
      "step": 132924
    },
    {
      "epoch": 25.53,
      "learning_rate": 0.001,
      "loss": 2.5534,
      "step": 132936
    },
    {
      "epoch": 25.53,
      "learning_rate": 0.001,
      "loss": 2.5548,
      "step": 132948
    },
    {
      "epoch": 25.53,
      "learning_rate": 0.001,
      "loss": 2.5521,
      "step": 132960
    },
    {
      "epoch": 25.53,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 132972
    },
    {
      "epoch": 25.53,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 132984
    },
    {
      "epoch": 25.54,
      "learning_rate": 0.001,
      "loss": 2.5521,
      "step": 132996
    },
    {
      "epoch": 25.54,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 133008
    },
    {
      "epoch": 25.54,
      "learning_rate": 0.001,
      "loss": 2.5467,
      "step": 133020
    },
    {
      "epoch": 25.54,
      "learning_rate": 0.001,
      "loss": 2.5509,
      "step": 133032
    },
    {
      "epoch": 25.55,
      "learning_rate": 0.001,
      "loss": 2.5609,
      "step": 133044
    },
    {
      "epoch": 25.55,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 133056
    },
    {
      "epoch": 25.55,
      "learning_rate": 0.001,
      "loss": 2.555,
      "step": 133068
    },
    {
      "epoch": 25.55,
      "learning_rate": 0.001,
      "loss": 2.555,
      "step": 133080
    },
    {
      "epoch": 25.56,
      "learning_rate": 0.001,
      "loss": 2.5527,
      "step": 133092
    },
    {
      "epoch": 25.56,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 133104
    },
    {
      "epoch": 25.56,
      "learning_rate": 0.001,
      "loss": 2.5608,
      "step": 133116
    },
    {
      "epoch": 25.56,
      "eval_ag_news_accuracy": 0.32365625,
      "eval_ag_news_bleu_score": 4.855899871609612,
      "eval_ag_news_bleu_score_sem": 0.15629417852463312,
      "eval_ag_news_emb_cos_sim": 0.80861496925354,
      "eval_ag_news_emb_cos_sim_sem": 0.007653365220118765,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5360374450683594,
      "eval_ag_news_n_ngrams_match_1": 14.054,
      "eval_ag_news_n_ngrams_match_2": 3.138,
      "eval_ag_news_n_ngrams_match_3": 0.848,
      "eval_ag_news_num_pred_words": 46.054,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.33061237022698,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35103040909370664,
      "eval_ag_news_runtime": 10.5655,
      "eval_ag_news_samples_per_second": 47.324,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3513876953873009,
      "eval_ag_news_token_set_f1_sem": 0.004428657807683609,
      "eval_ag_news_token_set_precision": 0.33518842970883794,
      "eval_ag_news_token_set_recall": 0.38715639119014816,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 133125
    },
    {
      "epoch": 25.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.11446875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1903966164722344,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12270087868482252,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6759926080703735,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008792292263439066,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.229008674621582,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.164,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.926,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.776,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.254609022834476,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9765625,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21487705897450765,
      "eval_anthropic_toxic_prompts_runtime": 10.453,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.833,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3626626272298002,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006788346091085388,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4399205435156726,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3366358630871735,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 133125
    },
    {
      "epoch": 25.56,
      "eval_arxiv_accuracy": 0.34753125,
      "eval_arxiv_bleu_score": 4.314217105866603,
      "eval_arxiv_bleu_score_sem": 0.12240891262977042,
      "eval_arxiv_emb_cos_sim": 0.7705197334289551,
      "eval_arxiv_emb_cos_sim_sem": 0.006838412440223102,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3973591327667236,
      "eval_arxiv_n_ngrams_match_1": 15.024,
      "eval_arxiv_n_ngrams_match_2": 2.908,
      "eval_arxiv_n_ngrams_match_3": 0.63,
      "eval_arxiv_num_pred_words": 40.222,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.885073232998504,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36388975262662504,
      "eval_arxiv_runtime": 10.3797,
      "eval_arxiv_samples_per_second": 48.171,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.35392897313565735,
      "eval_arxiv_token_set_f1_sem": 0.004024507589042814,
      "eval_arxiv_token_set_precision": 0.3062499404385056,
      "eval_arxiv_token_set_recall": 0.4355530940608332,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 133125
    },
    {
      "epoch": 25.56,
      "eval_python_code_alpaca_accuracy": 0.16053125,
      "eval_python_code_alpaca_bleu_score": 4.788908634479267,
      "eval_python_code_alpaca_bleu_score_sem": 0.1513937749102946,
      "eval_python_code_alpaca_emb_cos_sim": 0.7552530765533447,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008792524071105876,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8843941688537598,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.996,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.004,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.004,
      "eval_python_code_alpaca_num_pred_words": 43.416,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.892724341564026,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34000495503536365,
      "eval_python_code_alpaca_runtime": 10.0607,
      "eval_python_code_alpaca_samples_per_second": 49.698,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.48455450275954537,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005331723089022349,
      "eval_python_code_alpaca_token_set_precision": 0.5457429878672126,
      "eval_python_code_alpaca_token_set_recall": 0.4585016187534243,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 133125
    },
    {
      "epoch": 25.56,
      "eval_wikibio_accuracy": 0.32196875,
      "eval_wikibio_bleu_score": 6.062982074780387,
      "eval_wikibio_bleu_score_sem": 0.2025494902855392,
      "eval_wikibio_emb_cos_sim": 0.744976282119751,
      "eval_wikibio_emb_cos_sim_sem": 0.010711892869381007,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7191264629364014,
      "eval_wikibio_n_ngrams_match_1": 10.364,
      "eval_wikibio_n_ngrams_match_2": 3.566,
      "eval_wikibio_n_ngrams_match_3": 1.288,
      "eval_wikibio_num_pred_words": 36.894,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.22836387011547,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3639462322648924,
      "eval_wikibio_runtime": 10.6158,
      "eval_wikibio_samples_per_second": 47.1,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.32465973139475823,
      "eval_wikibio_token_set_f1_sem": 0.005354998232170549,
      "eval_wikibio_token_set_precision": 0.33592074502898145,
      "eval_wikibio_token_set_recall": 0.3281273416307827,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 133125
    },
    {
      "epoch": 25.56,
      "eval_nq_accuracy": 0.52859375,
      "eval_nq_bleu_score": 11.766816965162063,
      "eval_nq_bleu_score_sem": 0.4753187977408753,
      "eval_nq_emb_cos_sim": 0.8320537805557251,
      "eval_nq_emb_cos_sim_sem": 0.00716613118772013,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1797211170196533,
      "eval_nq_n_ngrams_match_1": 23.184,
      "eval_nq_n_ngrams_match_2": 8.492,
      "eval_nq_n_ngrams_match_3": 3.916,
      "eval_nq_num_pred_words": 49.2,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.843839518448318,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4488687975217286,
      "eval_nq_runtime": 10.4915,
      "eval_nq_samples_per_second": 47.658,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4619329690907166,
      "eval_nq_token_set_f1_sem": 0.005041814777617996,
      "eval_nq_token_set_precision": 0.42067824182411534,
      "eval_nq_token_set_recall": 0.52084940326046,
      "eval_nq_true_num_tokens": 64.0,
      "step": 133125
    },
    {
      "epoch": 25.56,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 133128
    },
    {
      "epoch": 25.56,
      "learning_rate": 0.001,
      "loss": 2.5595,
      "step": 133140
    },
    {
      "epoch": 25.57,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 133152
    },
    {
      "epoch": 25.57,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 133164
    },
    {
      "epoch": 25.57,
      "learning_rate": 0.001,
      "loss": 2.5525,
      "step": 133176
    },
    {
      "epoch": 25.57,
      "learning_rate": 0.001,
      "loss": 2.5533,
      "step": 133188
    },
    {
      "epoch": 25.58,
      "learning_rate": 0.001,
      "loss": 2.5619,
      "step": 133200
    },
    {
      "epoch": 25.58,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 133212
    },
    {
      "epoch": 25.58,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 133224
    },
    {
      "epoch": 25.58,
      "learning_rate": 0.001,
      "loss": 2.5509,
      "step": 133236
    },
    {
      "epoch": 25.59,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 133248
    },
    {
      "epoch": 25.59,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 133260
    },
    {
      "epoch": 25.59,
      "learning_rate": 0.001,
      "loss": 2.5602,
      "step": 133272
    },
    {
      "epoch": 25.59,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 133284
    },
    {
      "epoch": 25.59,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 133296
    },
    {
      "epoch": 25.6,
      "learning_rate": 0.001,
      "loss": 2.5518,
      "step": 133308
    },
    {
      "epoch": 25.6,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 133320
    },
    {
      "epoch": 25.6,
      "learning_rate": 0.001,
      "loss": 2.5683,
      "step": 133332
    },
    {
      "epoch": 25.6,
      "learning_rate": 0.001,
      "loss": 2.557,
      "step": 133344
    },
    {
      "epoch": 25.61,
      "learning_rate": 0.001,
      "loss": 2.5578,
      "step": 133356
    },
    {
      "epoch": 25.61,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 133368
    },
    {
      "epoch": 25.61,
      "learning_rate": 0.001,
      "loss": 2.5526,
      "step": 133380
    },
    {
      "epoch": 25.61,
      "learning_rate": 0.001,
      "loss": 2.5452,
      "step": 133392
    },
    {
      "epoch": 25.62,
      "learning_rate": 0.001,
      "loss": 2.556,
      "step": 133404
    },
    {
      "epoch": 25.62,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 133416
    },
    {
      "epoch": 25.62,
      "learning_rate": 0.001,
      "loss": 2.5575,
      "step": 133428
    },
    {
      "epoch": 25.62,
      "learning_rate": 0.001,
      "loss": 2.557,
      "step": 133440
    },
    {
      "epoch": 25.62,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 133452
    },
    {
      "epoch": 25.63,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 133464
    },
    {
      "epoch": 25.63,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 133476
    },
    {
      "epoch": 25.63,
      "learning_rate": 0.001,
      "loss": 2.5498,
      "step": 133488
    },
    {
      "epoch": 25.63,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 133500
    },
    {
      "epoch": 25.64,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 133512
    },
    {
      "epoch": 25.64,
      "learning_rate": 0.001,
      "loss": 2.5534,
      "step": 133524
    },
    {
      "epoch": 25.64,
      "learning_rate": 0.001,
      "loss": 2.5563,
      "step": 133536
    },
    {
      "epoch": 25.64,
      "learning_rate": 0.001,
      "loss": 2.5556,
      "step": 133548
    },
    {
      "epoch": 25.65,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 133560
    },
    {
      "epoch": 25.65,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 133572
    },
    {
      "epoch": 25.65,
      "learning_rate": 0.001,
      "loss": 2.5553,
      "step": 133584
    },
    {
      "epoch": 25.65,
      "learning_rate": 0.001,
      "loss": 2.545,
      "step": 133596
    },
    {
      "epoch": 25.65,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 133608
    },
    {
      "epoch": 25.66,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 133620
    },
    {
      "epoch": 25.66,
      "learning_rate": 0.001,
      "loss": 2.5535,
      "step": 133632
    },
    {
      "epoch": 25.66,
      "learning_rate": 0.001,
      "loss": 2.5545,
      "step": 133644
    },
    {
      "epoch": 25.66,
      "learning_rate": 0.001,
      "loss": 2.5533,
      "step": 133656
    },
    {
      "epoch": 25.67,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 133668
    },
    {
      "epoch": 25.67,
      "learning_rate": 0.001,
      "loss": 2.5581,
      "step": 133680
    },
    {
      "epoch": 25.67,
      "learning_rate": 0.001,
      "loss": 2.5548,
      "step": 133692
    },
    {
      "epoch": 25.67,
      "learning_rate": 0.001,
      "loss": 2.5543,
      "step": 133704
    },
    {
      "epoch": 25.68,
      "learning_rate": 0.001,
      "loss": 2.5394,
      "step": 133716
    },
    {
      "epoch": 25.68,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 133728
    },
    {
      "epoch": 25.68,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 133740
    },
    {
      "epoch": 25.68,
      "eval_ag_news_accuracy": 0.32309375,
      "eval_ag_news_bleu_score": 4.888573567916063,
      "eval_ag_news_bleu_score_sem": 0.15650234872976646,
      "eval_ag_news_emb_cos_sim": 0.8169499635696411,
      "eval_ag_news_emb_cos_sim_sem": 0.006926774625215207,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.543426752090454,
      "eval_ag_news_n_ngrams_match_1": 14.258,
      "eval_ag_news_n_ngrams_match_2": 3.242,
      "eval_ag_news_n_ngrams_match_3": 0.92,
      "eval_ag_news_num_pred_words": 46.846,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.585231375734935,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3526426557351461,
      "eval_ag_news_runtime": 10.9592,
      "eval_ag_news_samples_per_second": 45.624,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.3539517565845531,
      "eval_ag_news_token_set_f1_sem": 0.0043802896680215555,
      "eval_ag_news_token_set_precision": 0.33947344760386655,
      "eval_ag_news_token_set_recall": 0.3875487690266719,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 133750
    },
    {
      "epoch": 25.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.11534375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.24550285156498,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1275891704996912,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6736080646514893,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008341823453832092,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2241549491882324,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.334,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.882,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.13232708625022,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21755447094292285,
      "eval_anthropic_toxic_prompts_runtime": 9.9447,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.278,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36727662832547525,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006746261540125967,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44669465724382945,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3390676853152514,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 133750
    },
    {
      "epoch": 25.68,
      "eval_arxiv_accuracy": 0.34921875,
      "eval_arxiv_bleu_score": 4.2929043807675695,
      "eval_arxiv_bleu_score_sem": 0.12803835219053986,
      "eval_arxiv_emb_cos_sim": 0.7631653547286987,
      "eval_arxiv_emb_cos_sim_sem": 0.008592578104681179,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3955345153808594,
      "eval_arxiv_n_ngrams_match_1": 15.068,
      "eval_arxiv_n_ngrams_match_2": 2.97,
      "eval_arxiv_n_ngrams_match_3": 0.65,
      "eval_arxiv_num_pred_words": 40.42,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.83059412567739,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3608827035761182,
      "eval_arxiv_runtime": 10.2855,
      "eval_arxiv_samples_per_second": 48.612,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3567580426630008,
      "eval_arxiv_token_set_f1_sem": 0.004291661397986758,
      "eval_arxiv_token_set_precision": 0.3065091526444031,
      "eval_arxiv_token_set_recall": 0.4455519866199295,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 133750
    },
    {
      "epoch": 25.68,
      "eval_python_code_alpaca_accuracy": 0.16246875,
      "eval_python_code_alpaca_bleu_score": 4.5237646833939325,
      "eval_python_code_alpaca_bleu_score_sem": 0.13009332769464257,
      "eval_python_code_alpaca_emb_cos_sim": 0.7561140656471252,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009422573358228183,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8891215324401855,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.89,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.898,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.926,
      "eval_python_code_alpaca_num_pred_words": 43.184,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.97751000350488,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3319159991278319,
      "eval_python_code_alpaca_runtime": 9.9612,
      "eval_python_code_alpaca_samples_per_second": 50.195,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4817946429770144,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005398740906617264,
      "eval_python_code_alpaca_token_set_precision": 0.54204073260342,
      "eval_python_code_alpaca_token_set_recall": 0.4575423473347422,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 133750
    },
    {
      "epoch": 25.68,
      "eval_wikibio_accuracy": 0.32628125,
      "eval_wikibio_bleu_score": 6.0395154360742715,
      "eval_wikibio_bleu_score_sem": 0.2209590227537619,
      "eval_wikibio_emb_cos_sim": 0.7338325381278992,
      "eval_wikibio_emb_cos_sim_sem": 0.010597537274114315,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7107479572296143,
      "eval_wikibio_n_ngrams_match_1": 9.85,
      "eval_wikibio_n_ngrams_match_2": 3.338,
      "eval_wikibio_n_ngrams_match_3": 1.248,
      "eval_wikibio_num_pred_words": 35.674,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.88437485741616,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34899591740478264,
      "eval_wikibio_runtime": 13.1109,
      "eval_wikibio_samples_per_second": 38.136,
      "eval_wikibio_steps_per_second": 0.076,
      "eval_wikibio_token_set_f1": 0.3153780916314267,
      "eval_wikibio_token_set_f1_sem": 0.005680015025179767,
      "eval_wikibio_token_set_precision": 0.32243100120275797,
      "eval_wikibio_token_set_recall": 0.32583361109637027,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 133750
    },
    {
      "epoch": 25.68,
      "eval_nq_accuracy": 0.52840625,
      "eval_nq_bleu_score": 11.779300615308497,
      "eval_nq_bleu_score_sem": 0.4967497881099778,
      "eval_nq_emb_cos_sim": 0.8297677040100098,
      "eval_nq_emb_cos_sim_sem": 0.007091601730118202,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1775362491607666,
      "eval_nq_n_ngrams_match_1": 23.152,
      "eval_nq_n_ngrams_match_2": 8.516,
      "eval_nq_n_ngrams_match_3": 3.914,
      "eval_nq_num_pred_words": 49.208,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.824537991056944,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44897050359226154,
      "eval_nq_runtime": 10.4495,
      "eval_nq_samples_per_second": 47.849,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46081202162357,
      "eval_nq_token_set_f1_sem": 0.005103763159529631,
      "eval_nq_token_set_precision": 0.41988725971033675,
      "eval_nq_token_set_recall": 0.5174729913996847,
      "eval_nq_true_num_tokens": 64.0,
      "step": 133750
    },
    {
      "epoch": 25.68,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 133752
    },
    {
      "epoch": 25.68,
      "learning_rate": 0.001,
      "loss": 2.5492,
      "step": 133764
    },
    {
      "epoch": 25.69,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 133776
    },
    {
      "epoch": 25.69,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 133788
    },
    {
      "epoch": 25.69,
      "learning_rate": 0.001,
      "loss": 2.5546,
      "step": 133800
    },
    {
      "epoch": 25.69,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 133812
    },
    {
      "epoch": 25.7,
      "learning_rate": 0.001,
      "loss": 2.5595,
      "step": 133824
    },
    {
      "epoch": 25.7,
      "learning_rate": 0.001,
      "loss": 2.5498,
      "step": 133836
    },
    {
      "epoch": 25.7,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 133848
    },
    {
      "epoch": 25.7,
      "learning_rate": 0.001,
      "loss": 2.5598,
      "step": 133860
    },
    {
      "epoch": 25.71,
      "learning_rate": 0.001,
      "loss": 2.5627,
      "step": 133872
    },
    {
      "epoch": 25.71,
      "learning_rate": 0.001,
      "loss": 2.5553,
      "step": 133884
    },
    {
      "epoch": 25.71,
      "learning_rate": 0.001,
      "loss": 2.5495,
      "step": 133896
    },
    {
      "epoch": 25.71,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 133908
    },
    {
      "epoch": 25.71,
      "learning_rate": 0.001,
      "loss": 2.5566,
      "step": 133920
    },
    {
      "epoch": 25.72,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 133932
    },
    {
      "epoch": 25.72,
      "learning_rate": 0.001,
      "loss": 2.5611,
      "step": 133944
    },
    {
      "epoch": 25.72,
      "learning_rate": 0.001,
      "loss": 2.5531,
      "step": 133956
    },
    {
      "epoch": 25.72,
      "learning_rate": 0.001,
      "loss": 2.5518,
      "step": 133968
    },
    {
      "epoch": 25.73,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 133980
    },
    {
      "epoch": 25.73,
      "learning_rate": 0.001,
      "loss": 2.5587,
      "step": 133992
    },
    {
      "epoch": 25.73,
      "learning_rate": 0.001,
      "loss": 2.5533,
      "step": 134004
    },
    {
      "epoch": 25.73,
      "learning_rate": 0.001,
      "loss": 2.5594,
      "step": 134016
    },
    {
      "epoch": 25.74,
      "learning_rate": 0.001,
      "loss": 2.5548,
      "step": 134028
    },
    {
      "epoch": 25.74,
      "learning_rate": 0.001,
      "loss": 2.5538,
      "step": 134040
    },
    {
      "epoch": 25.74,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 134052
    },
    {
      "epoch": 25.74,
      "learning_rate": 0.001,
      "loss": 2.5592,
      "step": 134064
    },
    {
      "epoch": 25.74,
      "learning_rate": 0.001,
      "loss": 2.5569,
      "step": 134076
    },
    {
      "epoch": 25.75,
      "learning_rate": 0.001,
      "loss": 2.5549,
      "step": 134088
    },
    {
      "epoch": 25.75,
      "learning_rate": 0.001,
      "loss": 2.5505,
      "step": 134100
    },
    {
      "epoch": 25.75,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 134112
    },
    {
      "epoch": 25.75,
      "learning_rate": 0.001,
      "loss": 2.5574,
      "step": 134124
    },
    {
      "epoch": 25.76,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 134136
    },
    {
      "epoch": 25.76,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 134148
    },
    {
      "epoch": 25.76,
      "learning_rate": 0.001,
      "loss": 2.5599,
      "step": 134160
    },
    {
      "epoch": 25.76,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 134172
    },
    {
      "epoch": 25.76,
      "learning_rate": 0.001,
      "loss": 2.5656,
      "step": 134184
    },
    {
      "epoch": 25.77,
      "learning_rate": 0.001,
      "loss": 2.5588,
      "step": 134196
    },
    {
      "epoch": 25.77,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 134208
    },
    {
      "epoch": 25.77,
      "learning_rate": 0.001,
      "loss": 2.5507,
      "step": 134220
    },
    {
      "epoch": 25.77,
      "learning_rate": 0.001,
      "loss": 2.5548,
      "step": 134232
    },
    {
      "epoch": 25.78,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 134244
    },
    {
      "epoch": 25.78,
      "learning_rate": 0.001,
      "loss": 2.5507,
      "step": 134256
    },
    {
      "epoch": 25.78,
      "learning_rate": 0.001,
      "loss": 2.557,
      "step": 134268
    },
    {
      "epoch": 25.78,
      "learning_rate": 0.001,
      "loss": 2.5576,
      "step": 134280
    },
    {
      "epoch": 25.79,
      "learning_rate": 0.001,
      "loss": 2.5559,
      "step": 134292
    },
    {
      "epoch": 25.79,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 134304
    },
    {
      "epoch": 25.79,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 134316
    },
    {
      "epoch": 25.79,
      "learning_rate": 0.001,
      "loss": 2.5566,
      "step": 134328
    },
    {
      "epoch": 25.79,
      "learning_rate": 0.001,
      "loss": 2.5672,
      "step": 134340
    },
    {
      "epoch": 25.8,
      "learning_rate": 0.001,
      "loss": 2.5651,
      "step": 134352
    },
    {
      "epoch": 25.8,
      "learning_rate": 0.001,
      "loss": 2.556,
      "step": 134364
    },
    {
      "epoch": 25.8,
      "eval_ag_news_accuracy": 0.3225625,
      "eval_ag_news_bleu_score": 4.936972664786717,
      "eval_ag_news_bleu_score_sem": 0.16388769154047406,
      "eval_ag_news_emb_cos_sim": 0.8119990825653076,
      "eval_ag_news_emb_cos_sim_sem": 0.0071752744196689625,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5403366088867188,
      "eval_ag_news_n_ngrams_match_1": 14.07,
      "eval_ag_news_n_ngrams_match_2": 3.112,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.376,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.47852301502045,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3486201575707193,
      "eval_ag_news_runtime": 10.4267,
      "eval_ag_news_samples_per_second": 47.954,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35000945644344394,
      "eval_ag_news_token_set_f1_sem": 0.004259476031331064,
      "eval_ag_news_token_set_precision": 0.33525634181367797,
      "eval_ag_news_token_set_recall": 0.3812157984813743,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 134375
    },
    {
      "epoch": 25.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.1145,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9194052884215744,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10985525394116184,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6746144890785217,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009121698860510055,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2397563457489014,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.25,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.786,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.624,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.65,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.52750110537512,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21325897167956348,
      "eval_anthropic_toxic_prompts_runtime": 10.0563,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.72,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3547631983238783,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066454033441821735,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43760832773443115,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3234539542376684,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 134375
    },
    {
      "epoch": 25.8,
      "eval_arxiv_accuracy": 0.34865625,
      "eval_arxiv_bleu_score": 4.359990279945595,
      "eval_arxiv_bleu_score_sem": 0.12775837319983965,
      "eval_arxiv_emb_cos_sim": 0.7650954127311707,
      "eval_arxiv_emb_cos_sim_sem": 0.008417891929949175,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.400831460952759,
      "eval_arxiv_n_ngrams_match_1": 15.302,
      "eval_arxiv_n_ngrams_match_2": 2.956,
      "eval_arxiv_n_ngrams_match_3": 0.684,
      "eval_arxiv_num_pred_words": 40.93,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.989024386942628,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.362906544609063,
      "eval_arxiv_runtime": 10.2873,
      "eval_arxiv_samples_per_second": 48.603,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.35605140419661274,
      "eval_arxiv_token_set_f1_sem": 0.004243659280433926,
      "eval_arxiv_token_set_precision": 0.30981840333793964,
      "eval_arxiv_token_set_recall": 0.43443289292970755,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 134375
    },
    {
      "epoch": 25.8,
      "eval_python_code_alpaca_accuracy": 0.16234375,
      "eval_python_code_alpaca_bleu_score": 4.628174647388354,
      "eval_python_code_alpaca_bleu_score_sem": 0.14117937250506507,
      "eval_python_code_alpaca_emb_cos_sim": 0.7622000575065613,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007454475559084872,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.89320969581604,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.036,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.036,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.026,
      "eval_python_code_alpaca_num_pred_words": 44.668,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.0511554362005,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.330449537463229,
      "eval_python_code_alpaca_runtime": 14.1576,
      "eval_python_code_alpaca_samples_per_second": 35.317,
      "eval_python_code_alpaca_steps_per_second": 0.071,
      "eval_python_code_alpaca_token_set_f1": 0.488121423386487,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005540800217105159,
      "eval_python_code_alpaca_token_set_precision": 0.5517540567682055,
      "eval_python_code_alpaca_token_set_recall": 0.4590513954978373,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 134375
    },
    {
      "epoch": 25.8,
      "eval_wikibio_accuracy": 0.32578125,
      "eval_wikibio_bleu_score": 5.709751126467004,
      "eval_wikibio_bleu_score_sem": 0.20657633570043119,
      "eval_wikibio_emb_cos_sim": 0.7230824828147888,
      "eval_wikibio_emb_cos_sim_sem": 0.010519697971815265,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6777310371398926,
      "eval_wikibio_n_ngrams_match_1": 9.974,
      "eval_wikibio_n_ngrams_match_2": 3.304,
      "eval_wikibio_n_ngrams_match_3": 1.17,
      "eval_wikibio_num_pred_words": 36.46,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.556539853376734,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3462751251601449,
      "eval_wikibio_runtime": 10.258,
      "eval_wikibio_samples_per_second": 48.742,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3137252096935702,
      "eval_wikibio_token_set_f1_sem": 0.0056553984606375176,
      "eval_wikibio_token_set_precision": 0.3214622151294059,
      "eval_wikibio_token_set_recall": 0.3245149902879184,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 134375
    },
    {
      "epoch": 25.8,
      "eval_nq_accuracy": 0.52803125,
      "eval_nq_bleu_score": 11.514893952271498,
      "eval_nq_bleu_score_sem": 0.4745440691142446,
      "eval_nq_emb_cos_sim": 0.8224170207977295,
      "eval_nq_emb_cos_sim_sem": 0.008182158679327173,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.180854082107544,
      "eval_nq_n_ngrams_match_1": 23.15,
      "eval_nq_n_ngrams_match_2": 8.46,
      "eval_nq_n_ngrams_match_3": 3.842,
      "eval_nq_num_pred_words": 49.18,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.85386495802975,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44760593975440643,
      "eval_nq_runtime": 10.3444,
      "eval_nq_samples_per_second": 48.335,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4628755583514812,
      "eval_nq_token_set_f1_sem": 0.005065217356340255,
      "eval_nq_token_set_precision": 0.4200706643271948,
      "eval_nq_token_set_recall": 0.5255489521942965,
      "eval_nq_true_num_tokens": 64.0,
      "step": 134375
    },
    {
      "epoch": 25.8,
      "learning_rate": 0.001,
      "loss": 2.5603,
      "step": 134376
    },
    {
      "epoch": 25.8,
      "learning_rate": 0.001,
      "loss": 2.5637,
      "step": 134388
    },
    {
      "epoch": 25.81,
      "learning_rate": 0.001,
      "loss": 2.5573,
      "step": 134400
    },
    {
      "epoch": 25.81,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 134412
    },
    {
      "epoch": 25.81,
      "learning_rate": 0.001,
      "loss": 2.5634,
      "step": 134424
    },
    {
      "epoch": 25.81,
      "learning_rate": 0.001,
      "loss": 2.5543,
      "step": 134436
    },
    {
      "epoch": 25.82,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 134448
    },
    {
      "epoch": 25.82,
      "learning_rate": 0.001,
      "loss": 2.5453,
      "step": 134460
    },
    {
      "epoch": 25.82,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 134472
    },
    {
      "epoch": 25.82,
      "learning_rate": 0.001,
      "loss": 2.554,
      "step": 134484
    },
    {
      "epoch": 25.82,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 134496
    },
    {
      "epoch": 25.83,
      "learning_rate": 0.001,
      "loss": 2.5576,
      "step": 134508
    },
    {
      "epoch": 25.83,
      "learning_rate": 0.001,
      "loss": 2.5541,
      "step": 134520
    },
    {
      "epoch": 25.83,
      "learning_rate": 0.001,
      "loss": 2.5467,
      "step": 134532
    },
    {
      "epoch": 25.83,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 134544
    },
    {
      "epoch": 25.84,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 134556
    },
    {
      "epoch": 25.84,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 134568
    },
    {
      "epoch": 25.84,
      "learning_rate": 0.001,
      "loss": 2.5549,
      "step": 134580
    },
    {
      "epoch": 25.84,
      "learning_rate": 0.001,
      "loss": 2.5548,
      "step": 134592
    },
    {
      "epoch": 25.85,
      "learning_rate": 0.001,
      "loss": 2.5609,
      "step": 134604
    },
    {
      "epoch": 25.85,
      "learning_rate": 0.001,
      "loss": 2.5601,
      "step": 134616
    },
    {
      "epoch": 25.85,
      "learning_rate": 0.001,
      "loss": 2.5621,
      "step": 134628
    },
    {
      "epoch": 25.85,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 134640
    },
    {
      "epoch": 25.85,
      "learning_rate": 0.001,
      "loss": 2.5566,
      "step": 134652
    },
    {
      "epoch": 25.86,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 134664
    },
    {
      "epoch": 25.86,
      "learning_rate": 0.001,
      "loss": 2.5633,
      "step": 134676
    },
    {
      "epoch": 25.86,
      "learning_rate": 0.001,
      "loss": 2.5627,
      "step": 134688
    },
    {
      "epoch": 25.86,
      "learning_rate": 0.001,
      "loss": 2.5493,
      "step": 134700
    },
    {
      "epoch": 25.87,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 134712
    },
    {
      "epoch": 25.87,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 134724
    },
    {
      "epoch": 25.87,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 134736
    },
    {
      "epoch": 25.87,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 134748
    },
    {
      "epoch": 25.88,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 134760
    },
    {
      "epoch": 25.88,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 134772
    },
    {
      "epoch": 25.88,
      "learning_rate": 0.001,
      "loss": 2.5573,
      "step": 134784
    },
    {
      "epoch": 25.88,
      "learning_rate": 0.001,
      "loss": 2.5517,
      "step": 134796
    },
    {
      "epoch": 25.88,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 134808
    },
    {
      "epoch": 25.89,
      "learning_rate": 0.001,
      "loss": 2.5477,
      "step": 134820
    },
    {
      "epoch": 25.89,
      "learning_rate": 0.001,
      "loss": 2.5526,
      "step": 134832
    },
    {
      "epoch": 25.89,
      "learning_rate": 0.001,
      "loss": 2.5526,
      "step": 134844
    },
    {
      "epoch": 25.89,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 134856
    },
    {
      "epoch": 25.9,
      "learning_rate": 0.001,
      "loss": 2.5582,
      "step": 134868
    },
    {
      "epoch": 25.9,
      "learning_rate": 0.001,
      "loss": 2.5622,
      "step": 134880
    },
    {
      "epoch": 25.9,
      "learning_rate": 0.001,
      "loss": 2.5539,
      "step": 134892
    },
    {
      "epoch": 25.9,
      "learning_rate": 0.001,
      "loss": 2.5589,
      "step": 134904
    },
    {
      "epoch": 25.91,
      "learning_rate": 0.001,
      "loss": 2.5581,
      "step": 134916
    },
    {
      "epoch": 25.91,
      "learning_rate": 0.001,
      "loss": 2.5435,
      "step": 134928
    },
    {
      "epoch": 25.91,
      "learning_rate": 0.001,
      "loss": 2.5573,
      "step": 134940
    },
    {
      "epoch": 25.91,
      "learning_rate": 0.001,
      "loss": 2.5522,
      "step": 134952
    },
    {
      "epoch": 25.91,
      "learning_rate": 0.001,
      "loss": 2.5552,
      "step": 134964
    },
    {
      "epoch": 25.92,
      "learning_rate": 0.001,
      "loss": 2.5626,
      "step": 134976
    },
    {
      "epoch": 25.92,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 134988
    },
    {
      "epoch": 25.92,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 135000
    },
    {
      "epoch": 25.92,
      "eval_ag_news_accuracy": 0.324625,
      "eval_ag_news_bleu_score": 4.8215466061035945,
      "eval_ag_news_bleu_score_sem": 0.15782144989987337,
      "eval_ag_news_emb_cos_sim": 0.8168601989746094,
      "eval_ag_news_emb_cos_sim_sem": 0.005857727056725968,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5389437675476074,
      "eval_ag_news_n_ngrams_match_1": 14.146,
      "eval_ag_news_n_ngrams_match_2": 3.096,
      "eval_ag_news_n_ngrams_match_3": 0.892,
      "eval_ag_news_num_pred_words": 46.884,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.43053333161943,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3530551643132303,
      "eval_ag_news_runtime": 10.4201,
      "eval_ag_news_samples_per_second": 47.984,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.34989439806463696,
      "eval_ag_news_token_set_f1_sem": 0.004337027871653999,
      "eval_ag_news_token_set_precision": 0.33528392211218166,
      "eval_ag_news_token_set_recall": 0.38263716450838275,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 135000
    },
    {
      "epoch": 25.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.1155625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.248820541653724,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12388414549316776,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6773720979690552,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008940576071757028,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2112584114074707,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.392,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.964,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.088,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.81028813766348,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2209994635794737,
      "eval_anthropic_toxic_prompts_runtime": 10.3619,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.254,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3608351765559386,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006268429188372862,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.452080697008561,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3260422245809032,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 135000
    },
    {
      "epoch": 25.92,
      "eval_arxiv_accuracy": 0.34821875,
      "eval_arxiv_bleu_score": 4.414458346154399,
      "eval_arxiv_bleu_score_sem": 0.12981745992836125,
      "eval_arxiv_emb_cos_sim": 0.7740839719772339,
      "eval_arxiv_emb_cos_sim_sem": 0.007271887508801583,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3873417377471924,
      "eval_arxiv_n_ngrams_match_1": 15.364,
      "eval_arxiv_n_ngrams_match_2": 3.0,
      "eval_arxiv_n_ngrams_match_3": 0.68,
      "eval_arxiv_num_pred_words": 40.734,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.587197111563878,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3670790227490169,
      "eval_arxiv_runtime": 13.9243,
      "eval_arxiv_samples_per_second": 35.908,
      "eval_arxiv_steps_per_second": 0.072,
      "eval_arxiv_token_set_f1": 0.3582978618413922,
      "eval_arxiv_token_set_f1_sem": 0.004188978589808033,
      "eval_arxiv_token_set_precision": 0.3103418486459011,
      "eval_arxiv_token_set_recall": 0.44124318320252637,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 135000
    },
    {
      "epoch": 25.92,
      "eval_python_code_alpaca_accuracy": 0.16065625,
      "eval_python_code_alpaca_bleu_score": 4.612681237207016,
      "eval_python_code_alpaca_bleu_score_sem": 0.142013135243516,
      "eval_python_code_alpaca_emb_cos_sim": 0.7625421285629272,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00792974778904051,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8573057651519775,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.184,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.998,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.014,
      "eval_python_code_alpaca_num_pred_words": 44.598,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.41454480108101,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3390603081545783,
      "eval_python_code_alpaca_runtime": 10.2266,
      "eval_python_code_alpaca_samples_per_second": 48.892,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4834815272456369,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00544191619967506,
      "eval_python_code_alpaca_token_set_precision": 0.5563364195786793,
      "eval_python_code_alpaca_token_set_recall": 0.447065452883681,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 135000
    },
    {
      "epoch": 25.92,
      "eval_wikibio_accuracy": 0.3250625,
      "eval_wikibio_bleu_score": 5.7693631875653395,
      "eval_wikibio_bleu_score_sem": 0.19686744688937188,
      "eval_wikibio_emb_cos_sim": 0.7337939739227295,
      "eval_wikibio_emb_cos_sim_sem": 0.00984931546775705,
      "eval_wikibio_emb_top1_equal": 0.1171875,
      "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7203829288482666,
      "eval_wikibio_n_ngrams_match_1": 10.166,
      "eval_wikibio_n_ngrams_match_2": 3.396,
      "eval_wikibio_n_ngrams_match_3": 1.176,
      "eval_wikibio_num_pred_words": 36.68,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.28019846129939,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35702674164601056,
      "eval_wikibio_runtime": 10.0444,
      "eval_wikibio_samples_per_second": 49.779,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.32086572052134377,
      "eval_wikibio_token_set_f1_sem": 0.005320723673554833,
      "eval_wikibio_token_set_precision": 0.32908280801527406,
      "eval_wikibio_token_set_recall": 0.3299145150312103,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 135000
    },
    {
      "epoch": 25.92,
      "eval_nq_accuracy": 0.53034375,
      "eval_nq_bleu_score": 11.852041455321265,
      "eval_nq_bleu_score_sem": 0.48372725850744325,
      "eval_nq_emb_cos_sim": 0.8295139074325562,
      "eval_nq_emb_cos_sim_sem": 0.007649390245468581,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1738548278808594,
      "eval_nq_n_ngrams_match_1": 23.166,
      "eval_nq_n_ngrams_match_2": 8.558,
      "eval_nq_n_ngrams_match_3": 3.972,
      "eval_nq_num_pred_words": 49.102,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.792110874672815,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4505863833185626,
      "eval_nq_runtime": 10.5048,
      "eval_nq_samples_per_second": 47.597,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4626684146917264,
      "eval_nq_token_set_f1_sem": 0.005149669122032737,
      "eval_nq_token_set_precision": 0.42074960115249344,
      "eval_nq_token_set_recall": 0.5229615161500419,
      "eval_nq_true_num_tokens": 64.0,
      "step": 135000
    },
    {
      "epoch": 25.92,
      "learning_rate": 0.001,
      "loss": 2.546,
      "step": 135012
    },
    {
      "epoch": 25.93,
      "learning_rate": 0.001,
      "loss": 2.5593,
      "step": 135024
    },
    {
      "epoch": 25.93,
      "learning_rate": 0.001,
      "loss": 2.548,
      "step": 135036
    },
    {
      "epoch": 25.93,
      "learning_rate": 0.001,
      "loss": 2.556,
      "step": 135048
    },
    {
      "epoch": 25.93,
      "learning_rate": 0.001,
      "loss": 2.5558,
      "step": 135060
    },
    {
      "epoch": 25.94,
      "learning_rate": 0.001,
      "loss": 2.5578,
      "step": 135072
    },
    {
      "epoch": 25.94,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 135084
    },
    {
      "epoch": 25.94,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 135096
    },
    {
      "epoch": 25.94,
      "learning_rate": 0.001,
      "loss": 2.5539,
      "step": 135108
    },
    {
      "epoch": 25.94,
      "learning_rate": 0.001,
      "loss": 2.5569,
      "step": 135120
    },
    {
      "epoch": 25.95,
      "learning_rate": 0.001,
      "loss": 2.5658,
      "step": 135132
    },
    {
      "epoch": 25.95,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 135144
    },
    {
      "epoch": 25.95,
      "learning_rate": 0.001,
      "loss": 2.56,
      "step": 135156
    },
    {
      "epoch": 25.95,
      "learning_rate": 0.001,
      "loss": 2.5504,
      "step": 135168
    },
    {
      "epoch": 25.96,
      "learning_rate": 0.001,
      "loss": 2.5573,
      "step": 135180
    },
    {
      "epoch": 25.96,
      "learning_rate": 0.001,
      "loss": 2.564,
      "step": 135192
    },
    {
      "epoch": 25.96,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 135204
    },
    {
      "epoch": 25.96,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 135216
    },
    {
      "epoch": 25.97,
      "learning_rate": 0.001,
      "loss": 2.5501,
      "step": 135228
    },
    {
      "epoch": 25.97,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 135240
    },
    {
      "epoch": 25.97,
      "learning_rate": 0.001,
      "loss": 2.5581,
      "step": 135252
    },
    {
      "epoch": 25.97,
      "learning_rate": 0.001,
      "loss": 2.5461,
      "step": 135264
    },
    {
      "epoch": 25.97,
      "learning_rate": 0.001,
      "loss": 2.5541,
      "step": 135276
    },
    {
      "epoch": 25.98,
      "learning_rate": 0.001,
      "loss": 2.5541,
      "step": 135288
    },
    {
      "epoch": 25.98,
      "learning_rate": 0.001,
      "loss": 2.5516,
      "step": 135300
    },
    {
      "epoch": 25.98,
      "learning_rate": 0.001,
      "loss": 2.554,
      "step": 135312
    },
    {
      "epoch": 25.98,
      "learning_rate": 0.001,
      "loss": 2.5474,
      "step": 135324
    },
    {
      "epoch": 25.99,
      "learning_rate": 0.001,
      "loss": 2.5568,
      "step": 135336
    },
    {
      "epoch": 25.99,
      "learning_rate": 0.001,
      "loss": 2.5468,
      "step": 135348
    },
    {
      "epoch": 25.99,
      "learning_rate": 0.001,
      "loss": 2.5571,
      "step": 135360
    },
    {
      "epoch": 25.99,
      "learning_rate": 0.001,
      "loss": 2.5516,
      "step": 135372
    },
    {
      "epoch": 26.0,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 135384
    },
    {
      "epoch": 26.0,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 135396
    },
    {
      "epoch": 26.0,
      "learning_rate": 0.001,
      "loss": 2.5505,
      "step": 135408
    },
    {
      "epoch": 26.0,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 135420
    },
    {
      "epoch": 26.0,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 135432
    },
    {
      "epoch": 26.01,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 135444
    },
    {
      "epoch": 26.01,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 135456
    },
    {
      "epoch": 26.01,
      "learning_rate": 0.001,
      "loss": 2.5342,
      "step": 135468
    },
    {
      "epoch": 26.01,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 135480
    },
    {
      "epoch": 26.02,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 135492
    },
    {
      "epoch": 26.02,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 135504
    },
    {
      "epoch": 26.02,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 135516
    },
    {
      "epoch": 26.02,
      "learning_rate": 0.001,
      "loss": 2.5304,
      "step": 135528
    },
    {
      "epoch": 26.03,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 135540
    },
    {
      "epoch": 26.03,
      "learning_rate": 0.001,
      "loss": 2.5385,
      "step": 135552
    },
    {
      "epoch": 26.03,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 135564
    },
    {
      "epoch": 26.03,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 135576
    },
    {
      "epoch": 26.03,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 135588
    },
    {
      "epoch": 26.04,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 135600
    },
    {
      "epoch": 26.04,
      "learning_rate": 0.001,
      "loss": 2.5499,
      "step": 135612
    },
    {
      "epoch": 26.04,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 135624
    },
    {
      "epoch": 26.04,
      "eval_ag_news_accuracy": 0.32403125,
      "eval_ag_news_bleu_score": 4.590910691753977,
      "eval_ag_news_bleu_score_sem": 0.14774585171485072,
      "eval_ag_news_emb_cos_sim": 0.8111287951469421,
      "eval_ag_news_emb_cos_sim_sem": 0.007094043613153802,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.54253888130188,
      "eval_ag_news_n_ngrams_match_1": 14.126,
      "eval_ag_news_n_ngrams_match_2": 3.03,
      "eval_ag_news_n_ngrams_match_3": 0.778,
      "eval_ag_news_num_pred_words": 46.774,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.554537787067076,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35369404731794807,
      "eval_ag_news_runtime": 10.3381,
      "eval_ag_news_samples_per_second": 48.365,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.35162081455483996,
      "eval_ag_news_token_set_f1_sem": 0.004197430205313869,
      "eval_ag_news_token_set_precision": 0.33796050845385917,
      "eval_ag_news_token_set_recall": 0.3816561790739224,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 135625
    },
    {
      "epoch": 26.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.1141875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2594359282646783,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1246410124995687,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6739163398742676,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00865119844405926,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2369542121887207,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.996,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.774,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.302,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.45606976463838,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2177168882032835,
      "eval_anthropic_toxic_prompts_runtime": 10.3373,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.369,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3632432499569225,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064447337418982445,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4506778821014783,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3275703381121131,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 135625
    },
    {
      "epoch": 26.04,
      "eval_arxiv_accuracy": 0.3488125,
      "eval_arxiv_bleu_score": 4.344606939093127,
      "eval_arxiv_bleu_score_sem": 0.12211452841307469,
      "eval_arxiv_emb_cos_sim": 0.7621860504150391,
      "eval_arxiv_emb_cos_sim_sem": 0.00938866292587863,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3966190814971924,
      "eval_arxiv_n_ngrams_match_1": 15.316,
      "eval_arxiv_n_ngrams_match_2": 2.98,
      "eval_arxiv_n_ngrams_match_3": 0.658,
      "eval_arxiv_num_pred_words": 41.302,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.862964928260897,
      "eval_arxiv_pred_num_tokens": 62.9921875,
      "eval_arxiv_rouge_score": 0.362235737094995,
      "eval_arxiv_runtime": 10.0227,
      "eval_arxiv_samples_per_second": 49.887,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.35471993227548804,
      "eval_arxiv_token_set_f1_sem": 0.004285801798175079,
      "eval_arxiv_token_set_precision": 0.308939019407551,
      "eval_arxiv_token_set_recall": 0.4333996262695078,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 135625
    },
    {
      "epoch": 26.04,
      "eval_python_code_alpaca_accuracy": 0.16053125,
      "eval_python_code_alpaca_bleu_score": 4.555752557558127,
      "eval_python_code_alpaca_bleu_score_sem": 0.13748333417367972,
      "eval_python_code_alpaca_emb_cos_sim": 0.7590852975845337,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007779874912824785,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8728814125061035,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.992,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.926,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.946,
      "eval_python_code_alpaca_num_pred_words": 43.998,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.687911011028405,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3393173819108022,
      "eval_python_code_alpaca_runtime": 9.8497,
      "eval_python_code_alpaca_samples_per_second": 50.763,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.48191817785688534,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00522353860919341,
      "eval_python_code_alpaca_token_set_precision": 0.5477991796605628,
      "eval_python_code_alpaca_token_set_recall": 0.45174149684965764,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 135625
    },
    {
      "epoch": 26.04,
      "eval_wikibio_accuracy": 0.3250625,
      "eval_wikibio_bleu_score": 5.935405123280823,
      "eval_wikibio_bleu_score_sem": 0.21304024124245813,
      "eval_wikibio_emb_cos_sim": 0.7358399629592896,
      "eval_wikibio_emb_cos_sim_sem": 0.01074842496741684,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7152676582336426,
      "eval_wikibio_n_ngrams_match_1": 9.982,
      "eval_wikibio_n_ngrams_match_2": 3.394,
      "eval_wikibio_n_ngrams_match_3": 1.236,
      "eval_wikibio_num_pred_words": 35.552,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.069578224156515,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34920475810298784,
      "eval_wikibio_runtime": 10.1291,
      "eval_wikibio_samples_per_second": 49.363,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.31497383578099925,
      "eval_wikibio_token_set_f1_sem": 0.005805803427327527,
      "eval_wikibio_token_set_precision": 0.32213102834355295,
      "eval_wikibio_token_set_recall": 0.3237996382968531,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 135625
    },
    {
      "epoch": 26.04,
      "eval_nq_accuracy": 0.52965625,
      "eval_nq_bleu_score": 11.926697426339699,
      "eval_nq_bleu_score_sem": 0.492345157062038,
      "eval_nq_emb_cos_sim": 0.8344102501869202,
      "eval_nq_emb_cos_sim_sem": 0.00695168538831867,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1763744354248047,
      "eval_nq_n_ngrams_match_1": 23.292,
      "eval_nq_n_ngrams_match_2": 8.574,
      "eval_nq_n_ngrams_match_3": 4.026,
      "eval_nq_num_pred_words": 49.42,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.81429147502952,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45102300446386084,
      "eval_nq_runtime": 10.9002,
      "eval_nq_samples_per_second": 45.871,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.46501276606574626,
      "eval_nq_token_set_f1_sem": 0.005065437908397407,
      "eval_nq_token_set_precision": 0.42433663644825226,
      "eval_nq_token_set_recall": 0.5210483141291445,
      "eval_nq_true_num_tokens": 64.0,
      "step": 135625
    },
    {
      "epoch": 26.04,
      "learning_rate": 0.001,
      "loss": 2.5297,
      "step": 135636
    },
    {
      "epoch": 26.05,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 135648
    },
    {
      "epoch": 26.05,
      "learning_rate": 0.001,
      "loss": 2.5403,
      "step": 135660
    },
    {
      "epoch": 26.05,
      "learning_rate": 0.001,
      "loss": 2.5348,
      "step": 135672
    },
    {
      "epoch": 26.05,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 135684
    },
    {
      "epoch": 26.06,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 135696
    },
    {
      "epoch": 26.06,
      "learning_rate": 0.001,
      "loss": 2.5363,
      "step": 135708
    },
    {
      "epoch": 26.06,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 135720
    },
    {
      "epoch": 26.06,
      "learning_rate": 0.001,
      "loss": 2.539,
      "step": 135732
    },
    {
      "epoch": 26.06,
      "learning_rate": 0.001,
      "loss": 2.5445,
      "step": 135744
    },
    {
      "epoch": 26.07,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 135756
    },
    {
      "epoch": 26.07,
      "learning_rate": 0.001,
      "loss": 2.537,
      "step": 135768
    },
    {
      "epoch": 26.07,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 135780
    },
    {
      "epoch": 26.07,
      "learning_rate": 0.001,
      "loss": 2.5385,
      "step": 135792
    },
    {
      "epoch": 26.08,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 135804
    },
    {
      "epoch": 26.08,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 135816
    },
    {
      "epoch": 26.08,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 135828
    },
    {
      "epoch": 26.08,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 135840
    },
    {
      "epoch": 26.09,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 135852
    },
    {
      "epoch": 26.09,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 135864
    },
    {
      "epoch": 26.09,
      "learning_rate": 0.001,
      "loss": 2.533,
      "step": 135876
    },
    {
      "epoch": 26.09,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 135888
    },
    {
      "epoch": 26.09,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 135900
    },
    {
      "epoch": 26.1,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 135912
    },
    {
      "epoch": 26.1,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 135924
    },
    {
      "epoch": 26.1,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 135936
    },
    {
      "epoch": 26.1,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 135948
    },
    {
      "epoch": 26.11,
      "learning_rate": 0.001,
      "loss": 2.5312,
      "step": 135960
    },
    {
      "epoch": 26.11,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 135972
    },
    {
      "epoch": 26.11,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 135984
    },
    {
      "epoch": 26.11,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 135996
    },
    {
      "epoch": 26.12,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 136008
    },
    {
      "epoch": 26.12,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 136020
    },
    {
      "epoch": 26.12,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 136032
    },
    {
      "epoch": 26.12,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 136044
    },
    {
      "epoch": 26.12,
      "learning_rate": 0.001,
      "loss": 2.5474,
      "step": 136056
    },
    {
      "epoch": 26.13,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 136068
    },
    {
      "epoch": 26.13,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 136080
    },
    {
      "epoch": 26.13,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 136092
    },
    {
      "epoch": 26.13,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 136104
    },
    {
      "epoch": 26.14,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 136116
    },
    {
      "epoch": 26.14,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 136128
    },
    {
      "epoch": 26.14,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 136140
    },
    {
      "epoch": 26.14,
      "learning_rate": 0.001,
      "loss": 2.5315,
      "step": 136152
    },
    {
      "epoch": 26.15,
      "learning_rate": 0.001,
      "loss": 2.5361,
      "step": 136164
    },
    {
      "epoch": 26.15,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 136176
    },
    {
      "epoch": 26.15,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 136188
    },
    {
      "epoch": 26.15,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 136200
    },
    {
      "epoch": 26.15,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 136212
    },
    {
      "epoch": 26.16,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 136224
    },
    {
      "epoch": 26.16,
      "learning_rate": 0.001,
      "loss": 2.5373,
      "step": 136236
    },
    {
      "epoch": 26.16,
      "learning_rate": 0.001,
      "loss": 2.5399,
      "step": 136248
    },
    {
      "epoch": 26.16,
      "eval_ag_news_accuracy": 0.32609375,
      "eval_ag_news_bleu_score": 4.7534053863205346,
      "eval_ag_news_bleu_score_sem": 0.1436586508635874,
      "eval_ag_news_emb_cos_sim": 0.8094553351402283,
      "eval_ag_news_emb_cos_sim_sem": 0.006793587291198242,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5403218269348145,
      "eval_ag_news_n_ngrams_match_1": 14.188,
      "eval_ag_news_n_ngrams_match_2": 3.094,
      "eval_ag_news_n_ngrams_match_3": 0.832,
      "eval_ag_news_num_pred_words": 46.52,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.478013358918375,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35556657263447833,
      "eval_ag_news_runtime": 10.7699,
      "eval_ag_news_samples_per_second": 46.426,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.35001001072982785,
      "eval_ag_news_token_set_f1_sem": 0.004270852844131644,
      "eval_ag_news_token_set_precision": 0.33776192722003534,
      "eval_ag_news_token_set_recall": 0.3751954875034251,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 136250
    },
    {
      "epoch": 26.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.11446875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1740314544838166,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1236341983682375,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6758701801300049,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008016001314518052,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2240450382232666,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.246,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.862,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.144,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.12956491972731,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21463533868913415,
      "eval_anthropic_toxic_prompts_runtime": 10.0734,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.635,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35637729894753334,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006495450137988693,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43936010001946035,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3253769992586194,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 136250
    },
    {
      "epoch": 26.16,
      "eval_arxiv_accuracy": 0.34896875,
      "eval_arxiv_bleu_score": 4.198809583659221,
      "eval_arxiv_bleu_score_sem": 0.11817792796855849,
      "eval_arxiv_emb_cos_sim": 0.7708814144134521,
      "eval_arxiv_emb_cos_sim_sem": 0.0073460185469761815,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.404740571975708,
      "eval_arxiv_n_ngrams_match_1": 15.02,
      "eval_arxiv_n_ngrams_match_2": 2.862,
      "eval_arxiv_n_ngrams_match_3": 0.606,
      "eval_arxiv_num_pred_words": 40.278,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.106484244977256,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36243692727416343,
      "eval_arxiv_runtime": 10.4605,
      "eval_arxiv_samples_per_second": 47.799,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.3531461121161466,
      "eval_arxiv_token_set_f1_sem": 0.004170485006289717,
      "eval_arxiv_token_set_precision": 0.30482242880959065,
      "eval_arxiv_token_set_recall": 0.43681992029202804,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 136250
    },
    {
      "epoch": 26.16,
      "eval_python_code_alpaca_accuracy": 0.161125,
      "eval_python_code_alpaca_bleu_score": 4.553928158502647,
      "eval_python_code_alpaca_bleu_score_sem": 0.13522952121475498,
      "eval_python_code_alpaca_emb_cos_sim": 0.7599102258682251,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008040670391787085,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8596158027648926,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.016,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.93,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.954,
      "eval_python_code_alpaca_num_pred_words": 43.546,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.45481955477148,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3438928807825201,
      "eval_python_code_alpaca_runtime": 9.6393,
      "eval_python_code_alpaca_samples_per_second": 51.871,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.4784491578718069,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005472352866820055,
      "eval_python_code_alpaca_token_set_precision": 0.5473837131364725,
      "eval_python_code_alpaca_token_set_recall": 0.44501932904987707,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 136250
    },
    {
      "epoch": 26.16,
      "eval_wikibio_accuracy": 0.32453125,
      "eval_wikibio_bleu_score": 5.878704931551258,
      "eval_wikibio_bleu_score_sem": 0.20375367412176457,
      "eval_wikibio_emb_cos_sim": 0.7443228363990784,
      "eval_wikibio_emb_cos_sim_sem": 0.0086008046426749,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.736111879348755,
      "eval_wikibio_n_ngrams_match_1": 10.048,
      "eval_wikibio_n_ngrams_match_2": 3.29,
      "eval_wikibio_n_ngrams_match_3": 1.198,
      "eval_wikibio_num_pred_words": 36.232,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.93462589585937,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35490149540642446,
      "eval_wikibio_runtime": 9.9723,
      "eval_wikibio_samples_per_second": 50.139,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3177924991133672,
      "eval_wikibio_token_set_f1_sem": 0.005372607721016776,
      "eval_wikibio_token_set_precision": 0.32588086357287527,
      "eval_wikibio_token_set_recall": 0.3258573969922704,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 136250
    },
    {
      "epoch": 26.16,
      "eval_nq_accuracy": 0.52878125,
      "eval_nq_bleu_score": 11.619591471832347,
      "eval_nq_bleu_score_sem": 0.4760657884699022,
      "eval_nq_emb_cos_sim": 0.8333326578140259,
      "eval_nq_emb_cos_sim_sem": 0.007278220731334853,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1724135875701904,
      "eval_nq_n_ngrams_match_1": 23.06,
      "eval_nq_n_ngrams_match_2": 8.314,
      "eval_nq_n_ngrams_match_3": 3.802,
      "eval_nq_num_pred_words": 49.046,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.779448457049556,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44818040968729433,
      "eval_nq_runtime": 10.4038,
      "eval_nq_samples_per_second": 48.059,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4599892953133288,
      "eval_nq_token_set_f1_sem": 0.005008508671166199,
      "eval_nq_token_set_precision": 0.4193728890237574,
      "eval_nq_token_set_recall": 0.5177488915911149,
      "eval_nq_true_num_tokens": 64.0,
      "step": 136250
    },
    {
      "epoch": 26.16,
      "learning_rate": 0.001,
      "loss": 2.5232,
      "step": 136260
    },
    {
      "epoch": 26.17,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 136272
    },
    {
      "epoch": 26.17,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 136284
    },
    {
      "epoch": 26.17,
      "learning_rate": 0.001,
      "loss": 2.5479,
      "step": 136296
    },
    {
      "epoch": 26.17,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 136308
    },
    {
      "epoch": 26.18,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 136320
    },
    {
      "epoch": 26.18,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 136332
    },
    {
      "epoch": 26.18,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 136344
    },
    {
      "epoch": 26.18,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 136356
    },
    {
      "epoch": 26.18,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 136368
    },
    {
      "epoch": 26.19,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 136380
    },
    {
      "epoch": 26.19,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 136392
    },
    {
      "epoch": 26.19,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 136404
    },
    {
      "epoch": 26.19,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 136416
    },
    {
      "epoch": 26.2,
      "learning_rate": 0.001,
      "loss": 2.5394,
      "step": 136428
    },
    {
      "epoch": 26.2,
      "learning_rate": 0.001,
      "loss": 2.5422,
      "step": 136440
    },
    {
      "epoch": 26.2,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 136452
    },
    {
      "epoch": 26.2,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 136464
    },
    {
      "epoch": 26.21,
      "learning_rate": 0.001,
      "loss": 2.5433,
      "step": 136476
    },
    {
      "epoch": 26.21,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 136488
    },
    {
      "epoch": 26.21,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 136500
    },
    {
      "epoch": 26.21,
      "learning_rate": 0.001,
      "loss": 2.5464,
      "step": 136512
    },
    {
      "epoch": 26.21,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 136524
    },
    {
      "epoch": 26.22,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 136536
    },
    {
      "epoch": 26.22,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 136548
    },
    {
      "epoch": 26.22,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 136560
    },
    {
      "epoch": 26.22,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 136572
    },
    {
      "epoch": 26.23,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 136584
    },
    {
      "epoch": 26.23,
      "learning_rate": 0.001,
      "loss": 2.546,
      "step": 136596
    },
    {
      "epoch": 26.23,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 136608
    },
    {
      "epoch": 26.23,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 136620
    },
    {
      "epoch": 26.24,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 136632
    },
    {
      "epoch": 26.24,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 136644
    },
    {
      "epoch": 26.24,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 136656
    },
    {
      "epoch": 26.24,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 136668
    },
    {
      "epoch": 26.24,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 136680
    },
    {
      "epoch": 26.25,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 136692
    },
    {
      "epoch": 26.25,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 136704
    },
    {
      "epoch": 26.25,
      "learning_rate": 0.001,
      "loss": 2.5469,
      "step": 136716
    },
    {
      "epoch": 26.25,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 136728
    },
    {
      "epoch": 26.26,
      "learning_rate": 0.001,
      "loss": 2.5499,
      "step": 136740
    },
    {
      "epoch": 26.26,
      "learning_rate": 0.001,
      "loss": 2.5453,
      "step": 136752
    },
    {
      "epoch": 26.26,
      "learning_rate": 0.001,
      "loss": 2.5419,
      "step": 136764
    },
    {
      "epoch": 26.26,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 136776
    },
    {
      "epoch": 26.26,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 136788
    },
    {
      "epoch": 26.27,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 136800
    },
    {
      "epoch": 26.27,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 136812
    },
    {
      "epoch": 26.27,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 136824
    },
    {
      "epoch": 26.27,
      "learning_rate": 0.001,
      "loss": 2.5467,
      "step": 136836
    },
    {
      "epoch": 26.28,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 136848
    },
    {
      "epoch": 26.28,
      "learning_rate": 0.001,
      "loss": 2.5453,
      "step": 136860
    },
    {
      "epoch": 26.28,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 136872
    },
    {
      "epoch": 26.28,
      "eval_ag_news_accuracy": 0.32390625,
      "eval_ag_news_bleu_score": 5.010337133307245,
      "eval_ag_news_bleu_score_sem": 0.16365978257351546,
      "eval_ag_news_emb_cos_sim": 0.8188580870628357,
      "eval_ag_news_emb_cos_sim_sem": 0.006255775064953011,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5462374687194824,
      "eval_ag_news_n_ngrams_match_1": 14.176,
      "eval_ag_news_n_ngrams_match_2": 3.226,
      "eval_ag_news_n_ngrams_match_3": 0.954,
      "eval_ag_news_num_pred_words": 46.444,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.68257740264264,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3518776534142356,
      "eval_ag_news_runtime": 11.4499,
      "eval_ag_news_samples_per_second": 43.668,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.3537365554646819,
      "eval_ag_news_token_set_f1_sem": 0.004595980424030815,
      "eval_ag_news_token_set_precision": 0.33868839092978453,
      "eval_ag_news_token_set_recall": 0.38467899536128697,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 136875
    },
    {
      "epoch": 26.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.11384375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.113344748058637,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11685076588236008,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.669208288192749,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009204963252465866,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.23777437210083,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.21,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.894,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.718,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.676,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.476956376589303,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21266123423207317,
      "eval_anthropic_toxic_prompts_runtime": 9.9588,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.207,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3591344786901012,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006655155952238647,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4441902119053794,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32864163490749215,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 136875
    },
    {
      "epoch": 26.28,
      "eval_arxiv_accuracy": 0.34915625,
      "eval_arxiv_bleu_score": 4.333011328166027,
      "eval_arxiv_bleu_score_sem": 0.12615184594541862,
      "eval_arxiv_emb_cos_sim": 0.7579234838485718,
      "eval_arxiv_emb_cos_sim_sem": 0.007518748844519477,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.38987398147583,
      "eval_arxiv_n_ngrams_match_1": 15.066,
      "eval_arxiv_n_ngrams_match_2": 2.938,
      "eval_arxiv_n_ngrams_match_3": 0.65,
      "eval_arxiv_num_pred_words": 40.424,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.662214046393327,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3604611427623603,
      "eval_arxiv_runtime": 10.3209,
      "eval_arxiv_samples_per_second": 48.445,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3548279845397079,
      "eval_arxiv_token_set_f1_sem": 0.004075179992290366,
      "eval_arxiv_token_set_precision": 0.3052151464510861,
      "eval_arxiv_token_set_recall": 0.44324363720376597,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 136875
    },
    {
      "epoch": 26.28,
      "eval_python_code_alpaca_accuracy": 0.163375,
      "eval_python_code_alpaca_bleu_score": 4.715599250215951,
      "eval_python_code_alpaca_bleu_score_sem": 0.14824706152068332,
      "eval_python_code_alpaca_emb_cos_sim": 0.7673449516296387,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008126806037797701,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8577284812927246,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.16,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.056,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.052,
      "eval_python_code_alpaca_num_pred_words": 44.004,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.421907766364683,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3412971944011274,
      "eval_python_code_alpaca_runtime": 9.7176,
      "eval_python_code_alpaca_samples_per_second": 51.453,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.484713177447905,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005649177262070943,
      "eval_python_code_alpaca_token_set_precision": 0.5539526744933355,
      "eval_python_code_alpaca_token_set_recall": 0.4492507534197922,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 136875
    },
    {
      "epoch": 26.28,
      "eval_wikibio_accuracy": 0.3281875,
      "eval_wikibio_bleu_score": 5.818690450125476,
      "eval_wikibio_bleu_score_sem": 0.21608202173265756,
      "eval_wikibio_emb_cos_sim": 0.7250394821166992,
      "eval_wikibio_emb_cos_sim_sem": 0.01122964056155702,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.692779541015625,
      "eval_wikibio_n_ngrams_match_1": 9.58,
      "eval_wikibio_n_ngrams_match_2": 3.244,
      "eval_wikibio_n_ngrams_match_3": 1.192,
      "eval_wikibio_num_pred_words": 34.72,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.156308085496704,
      "eval_wikibio_pred_num_tokens": 62.9921875,
      "eval_wikibio_rouge_score": 0.33666921256597937,
      "eval_wikibio_runtime": 9.8949,
      "eval_wikibio_samples_per_second": 50.531,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.31053074019118176,
      "eval_wikibio_token_set_f1_sem": 0.005821424953035725,
      "eval_wikibio_token_set_precision": 0.31318598808710874,
      "eval_wikibio_token_set_recall": 0.3302876270624791,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 136875
    },
    {
      "epoch": 26.28,
      "eval_nq_accuracy": 0.5296875,
      "eval_nq_bleu_score": 11.965890207913443,
      "eval_nq_bleu_score_sem": 0.5056489717638976,
      "eval_nq_emb_cos_sim": 0.8246257901191711,
      "eval_nq_emb_cos_sim_sem": 0.007787292758162702,
      "eval_nq_emb_top1_equal": 0.3515625,
      "eval_nq_emb_top1_equal_sem": 0.04236756101983345,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1769216060638428,
      "eval_nq_n_ngrams_match_1": 23.036,
      "eval_nq_n_ngrams_match_2": 8.636,
      "eval_nq_n_ngrams_match_3": 4.038,
      "eval_nq_num_pred_words": 49.032,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.819115716249794,
      "eval_nq_pred_num_tokens": 62.96875,
      "eval_nq_rouge_score": 0.4445829886526741,
      "eval_nq_runtime": 10.7013,
      "eval_nq_samples_per_second": 46.723,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.4579759030373418,
      "eval_nq_token_set_f1_sem": 0.005142090637970512,
      "eval_nq_token_set_precision": 0.4153846846481496,
      "eval_nq_token_set_recall": 0.5205017468738073,
      "eval_nq_true_num_tokens": 64.0,
      "step": 136875
    },
    {
      "epoch": 26.28,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 136884
    },
    {
      "epoch": 26.29,
      "learning_rate": 0.001,
      "loss": 2.5399,
      "step": 136896
    },
    {
      "epoch": 26.29,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 136908
    },
    {
      "epoch": 26.29,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 136920
    },
    {
      "epoch": 26.29,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 136932
    },
    {
      "epoch": 26.29,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 136944
    },
    {
      "epoch": 26.3,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 136956
    },
    {
      "epoch": 26.3,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 136968
    },
    {
      "epoch": 26.3,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 136980
    },
    {
      "epoch": 26.3,
      "learning_rate": 0.001,
      "loss": 2.5451,
      "step": 136992
    },
    {
      "epoch": 26.31,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 137004
    },
    {
      "epoch": 26.31,
      "learning_rate": 0.001,
      "loss": 2.5407,
      "step": 137016
    },
    {
      "epoch": 26.31,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 137028
    },
    {
      "epoch": 26.31,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 137040
    },
    {
      "epoch": 26.32,
      "learning_rate": 0.001,
      "loss": 2.5424,
      "step": 137052
    },
    {
      "epoch": 26.32,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 137064
    },
    {
      "epoch": 26.32,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 137076
    },
    {
      "epoch": 26.32,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 137088
    },
    {
      "epoch": 26.32,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 137100
    },
    {
      "epoch": 26.33,
      "learning_rate": 0.001,
      "loss": 2.5408,
      "step": 137112
    },
    {
      "epoch": 26.33,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 137124
    },
    {
      "epoch": 26.33,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 137136
    },
    {
      "epoch": 26.33,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 137148
    },
    {
      "epoch": 26.34,
      "learning_rate": 0.001,
      "loss": 2.5435,
      "step": 137160
    },
    {
      "epoch": 26.34,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 137172
    },
    {
      "epoch": 26.34,
      "learning_rate": 0.001,
      "loss": 2.5399,
      "step": 137184
    },
    {
      "epoch": 26.34,
      "learning_rate": 0.001,
      "loss": 2.5395,
      "step": 137196
    },
    {
      "epoch": 26.35,
      "learning_rate": 0.001,
      "loss": 2.5419,
      "step": 137208
    },
    {
      "epoch": 26.35,
      "learning_rate": 0.001,
      "loss": 2.542,
      "step": 137220
    },
    {
      "epoch": 26.35,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 137232
    },
    {
      "epoch": 26.35,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 137244
    },
    {
      "epoch": 26.35,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 137256
    },
    {
      "epoch": 26.36,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 137268
    },
    {
      "epoch": 26.36,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 137280
    },
    {
      "epoch": 26.36,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 137292
    },
    {
      "epoch": 26.36,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 137304
    },
    {
      "epoch": 26.37,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 137316
    },
    {
      "epoch": 26.37,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 137328
    },
    {
      "epoch": 26.37,
      "learning_rate": 0.001,
      "loss": 2.5357,
      "step": 137340
    },
    {
      "epoch": 26.37,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 137352
    },
    {
      "epoch": 26.38,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 137364
    },
    {
      "epoch": 26.38,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 137376
    },
    {
      "epoch": 26.38,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 137388
    },
    {
      "epoch": 26.38,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 137400
    },
    {
      "epoch": 26.38,
      "learning_rate": 0.001,
      "loss": 2.5405,
      "step": 137412
    },
    {
      "epoch": 26.39,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 137424
    },
    {
      "epoch": 26.39,
      "learning_rate": 0.001,
      "loss": 2.5587,
      "step": 137436
    },
    {
      "epoch": 26.39,
      "learning_rate": 0.001,
      "loss": 2.5451,
      "step": 137448
    },
    {
      "epoch": 26.39,
      "learning_rate": 0.001,
      "loss": 2.5477,
      "step": 137460
    },
    {
      "epoch": 26.4,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 137472
    },
    {
      "epoch": 26.4,
      "learning_rate": 0.001,
      "loss": 2.5451,
      "step": 137484
    },
    {
      "epoch": 26.4,
      "learning_rate": 0.001,
      "loss": 2.5452,
      "step": 137496
    },
    {
      "epoch": 26.4,
      "eval_ag_news_accuracy": 0.3243125,
      "eval_ag_news_bleu_score": 4.955759470323633,
      "eval_ag_news_bleu_score_sem": 0.15887039219714075,
      "eval_ag_news_emb_cos_sim": 0.8114238977432251,
      "eval_ag_news_emb_cos_sim_sem": 0.007117613579639011,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.533785581588745,
      "eval_ag_news_n_ngrams_match_1": 14.126,
      "eval_ag_news_n_ngrams_match_2": 3.226,
      "eval_ag_news_n_ngrams_match_3": 0.932,
      "eval_ag_news_num_pred_words": 46.806,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.253391496062406,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35256318161874883,
      "eval_ag_news_runtime": 10.8308,
      "eval_ag_news_samples_per_second": 46.164,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.35316782346785075,
      "eval_ag_news_token_set_f1_sem": 0.004266996819858838,
      "eval_ag_news_token_set_precision": 0.3376001556690009,
      "eval_ag_news_token_set_recall": 0.3861518414493657,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 137500
    },
    {
      "epoch": 26.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.11515625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1110553648078065,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11634531628327847,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6803028583526611,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008580157955260626,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2036924362182617,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.932,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.6,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.62328244546882,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21807319733408637,
      "eval_anthropic_toxic_prompts_runtime": 10.0044,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.978,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36441856595593025,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006599289802246864,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4495157000227447,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33079276349794706,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 137500
    },
    {
      "epoch": 26.4,
      "eval_arxiv_accuracy": 0.34925,
      "eval_arxiv_bleu_score": 4.179773553975574,
      "eval_arxiv_bleu_score_sem": 0.11278790991979552,
      "eval_arxiv_emb_cos_sim": 0.7635753750801086,
      "eval_arxiv_emb_cos_sim_sem": 0.007356731090487537,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3825840950012207,
      "eval_arxiv_n_ngrams_match_1": 15.188,
      "eval_arxiv_n_ngrams_match_2": 2.888,
      "eval_arxiv_n_ngrams_match_3": 0.614,
      "eval_arxiv_num_pred_words": 41.094,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.446766122979263,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35820472439116297,
      "eval_arxiv_runtime": 10.1665,
      "eval_arxiv_samples_per_second": 49.181,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.35417310242164035,
      "eval_arxiv_token_set_f1_sem": 0.004257793044281869,
      "eval_arxiv_token_set_precision": 0.3075897923303374,
      "eval_arxiv_token_set_recall": 0.43718891607634053,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 137500
    },
    {
      "epoch": 26.4,
      "eval_python_code_alpaca_accuracy": 0.15865625,
      "eval_python_code_alpaca_bleu_score": 4.717075394164557,
      "eval_python_code_alpaca_bleu_score_sem": 0.1513849380381095,
      "eval_python_code_alpaca_emb_cos_sim": 0.7640969753265381,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007780952291639845,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.894763946533203,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.204,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.026,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.064,
      "eval_python_code_alpaca_num_pred_words": 44.904,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.079233271828627,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3398542276247796,
      "eval_python_code_alpaca_runtime": 10.1131,
      "eval_python_code_alpaca_samples_per_second": 49.441,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.485626048143985,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005575612169828964,
      "eval_python_code_alpaca_token_set_precision": 0.5573860876167339,
      "eval_python_code_alpaca_token_set_recall": 0.4494263120640594,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 137500
    },
    {
      "epoch": 26.4,
      "eval_wikibio_accuracy": 0.32596875,
      "eval_wikibio_bleu_score": 5.881323417754504,
      "eval_wikibio_bleu_score_sem": 0.23294533006336862,
      "eval_wikibio_emb_cos_sim": 0.7359535694122314,
      "eval_wikibio_emb_cos_sim_sem": 0.009895827149266853,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7024004459381104,
      "eval_wikibio_n_ngrams_match_1": 9.672,
      "eval_wikibio_n_ngrams_match_2": 3.222,
      "eval_wikibio_n_ngrams_match_3": 1.182,
      "eval_wikibio_num_pred_words": 34.98,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.54451255235392,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34343898460069355,
      "eval_wikibio_runtime": 10.0527,
      "eval_wikibio_samples_per_second": 49.738,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3096431583939992,
      "eval_wikibio_token_set_f1_sem": 0.005894352753507593,
      "eval_wikibio_token_set_precision": 0.3128749873512374,
      "eval_wikibio_token_set_recall": 0.32681829243231664,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 137500
    },
    {
      "epoch": 26.4,
      "eval_nq_accuracy": 0.52903125,
      "eval_nq_bleu_score": 11.424583829580019,
      "eval_nq_bleu_score_sem": 0.4727421297078216,
      "eval_nq_emb_cos_sim": 0.8270809650421143,
      "eval_nq_emb_cos_sim_sem": 0.008246695382564971,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1732428073883057,
      "eval_nq_n_ngrams_match_1": 23.002,
      "eval_nq_n_ngrams_match_2": 8.284,
      "eval_nq_n_ngrams_match_3": 3.794,
      "eval_nq_num_pred_words": 49.4,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.786731568935295,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44401144925390756,
      "eval_nq_runtime": 10.3349,
      "eval_nq_samples_per_second": 48.38,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4577020947749211,
      "eval_nq_token_set_f1_sem": 0.004893191246329938,
      "eval_nq_token_set_precision": 0.41763552281650806,
      "eval_nq_token_set_recall": 0.5173763896640758,
      "eval_nq_true_num_tokens": 64.0,
      "step": 137500
    },
    {
      "epoch": 26.4,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 137508
    },
    {
      "epoch": 26.41,
      "learning_rate": 0.001,
      "loss": 2.5433,
      "step": 137520
    },
    {
      "epoch": 26.41,
      "learning_rate": 0.001,
      "loss": 2.5505,
      "step": 137532
    },
    {
      "epoch": 26.41,
      "learning_rate": 0.001,
      "loss": 2.5452,
      "step": 137544
    },
    {
      "epoch": 26.41,
      "learning_rate": 0.001,
      "loss": 2.5488,
      "step": 137556
    },
    {
      "epoch": 26.41,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 137568
    },
    {
      "epoch": 26.42,
      "learning_rate": 0.001,
      "loss": 2.5451,
      "step": 137580
    },
    {
      "epoch": 26.42,
      "learning_rate": 0.001,
      "loss": 2.5449,
      "step": 137592
    },
    {
      "epoch": 26.42,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 137604
    },
    {
      "epoch": 26.42,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 137616
    },
    {
      "epoch": 26.43,
      "learning_rate": 0.001,
      "loss": 2.5341,
      "step": 137628
    },
    {
      "epoch": 26.43,
      "learning_rate": 0.001,
      "loss": 2.5488,
      "step": 137640
    },
    {
      "epoch": 26.43,
      "learning_rate": 0.001,
      "loss": 2.5511,
      "step": 137652
    },
    {
      "epoch": 26.43,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 137664
    },
    {
      "epoch": 26.44,
      "learning_rate": 0.001,
      "loss": 2.5403,
      "step": 137676
    },
    {
      "epoch": 26.44,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 137688
    },
    {
      "epoch": 26.44,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 137700
    },
    {
      "epoch": 26.44,
      "learning_rate": 0.001,
      "loss": 2.5468,
      "step": 137712
    },
    {
      "epoch": 26.44,
      "learning_rate": 0.001,
      "loss": 2.5468,
      "step": 137724
    },
    {
      "epoch": 26.45,
      "learning_rate": 0.001,
      "loss": 2.5479,
      "step": 137736
    },
    {
      "epoch": 26.45,
      "learning_rate": 0.001,
      "loss": 2.5518,
      "step": 137748
    },
    {
      "epoch": 26.45,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 137760
    },
    {
      "epoch": 26.45,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 137772
    },
    {
      "epoch": 26.46,
      "learning_rate": 0.001,
      "loss": 2.5445,
      "step": 137784
    },
    {
      "epoch": 26.46,
      "learning_rate": 0.001,
      "loss": 2.5493,
      "step": 137796
    },
    {
      "epoch": 26.46,
      "learning_rate": 0.001,
      "loss": 2.5629,
      "step": 137808
    },
    {
      "epoch": 26.46,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 137820
    },
    {
      "epoch": 26.47,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 137832
    },
    {
      "epoch": 26.47,
      "learning_rate": 0.001,
      "loss": 2.5432,
      "step": 137844
    },
    {
      "epoch": 26.47,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 137856
    },
    {
      "epoch": 26.47,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 137868
    },
    {
      "epoch": 26.47,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 137880
    },
    {
      "epoch": 26.48,
      "learning_rate": 0.001,
      "loss": 2.5545,
      "step": 137892
    },
    {
      "epoch": 26.48,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 137904
    },
    {
      "epoch": 26.48,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 137916
    },
    {
      "epoch": 26.48,
      "learning_rate": 0.001,
      "loss": 2.5474,
      "step": 137928
    },
    {
      "epoch": 26.49,
      "learning_rate": 0.001,
      "loss": 2.5498,
      "step": 137940
    },
    {
      "epoch": 26.49,
      "learning_rate": 0.001,
      "loss": 2.5525,
      "step": 137952
    },
    {
      "epoch": 26.49,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 137964
    },
    {
      "epoch": 26.49,
      "learning_rate": 0.001,
      "loss": 2.5563,
      "step": 137976
    },
    {
      "epoch": 26.5,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 137988
    },
    {
      "epoch": 26.5,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 138000
    },
    {
      "epoch": 26.5,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 138012
    },
    {
      "epoch": 26.5,
      "learning_rate": 0.001,
      "loss": 2.5538,
      "step": 138024
    },
    {
      "epoch": 26.5,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 138036
    },
    {
      "epoch": 26.51,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 138048
    },
    {
      "epoch": 26.51,
      "learning_rate": 0.001,
      "loss": 2.5464,
      "step": 138060
    },
    {
      "epoch": 26.51,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 138072
    },
    {
      "epoch": 26.51,
      "learning_rate": 0.001,
      "loss": 2.5556,
      "step": 138084
    },
    {
      "epoch": 26.52,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 138096
    },
    {
      "epoch": 26.52,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 138108
    },
    {
      "epoch": 26.52,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 138120
    },
    {
      "epoch": 26.52,
      "eval_ag_news_accuracy": 0.3253125,
      "eval_ag_news_bleu_score": 4.959226138590679,
      "eval_ag_news_bleu_score_sem": 0.16219360894267384,
      "eval_ag_news_emb_cos_sim": 0.8112995028495789,
      "eval_ag_news_emb_cos_sim_sem": 0.006759624175288049,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5298244953155518,
      "eval_ag_news_n_ngrams_match_1": 14.096,
      "eval_ag_news_n_ngrams_match_2": 3.142,
      "eval_ag_news_n_ngrams_match_3": 0.908,
      "eval_ag_news_num_pred_words": 46.252,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.11797922409669,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3511656920225139,
      "eval_ag_news_runtime": 10.2514,
      "eval_ag_news_samples_per_second": 48.774,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.35243850091046586,
      "eval_ag_news_token_set_f1_sem": 0.004429363013131422,
      "eval_ag_news_token_set_precision": 0.3378667578097021,
      "eval_ag_news_token_set_recall": 0.38483197844049744,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 138125
    },
    {
      "epoch": 26.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.11390625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1131375045066556,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11796619077821163,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6754779815673828,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009643804800239898,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.236358880996704,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.246,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.902,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.7,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.44091948244658,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2153345177363887,
      "eval_anthropic_toxic_prompts_runtime": 9.8146,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.944,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3587887664654874,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006763941670662588,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43860774979640527,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3292519758103467,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 138125
    },
    {
      "epoch": 26.52,
      "eval_arxiv_accuracy": 0.34796875,
      "eval_arxiv_bleu_score": 4.402037482769789,
      "eval_arxiv_bleu_score_sem": 0.12520143481700993,
      "eval_arxiv_emb_cos_sim": 0.7637171745300293,
      "eval_arxiv_emb_cos_sim_sem": 0.0067443913822739015,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.388087272644043,
      "eval_arxiv_n_ngrams_match_1": 15.148,
      "eval_arxiv_n_ngrams_match_2": 2.978,
      "eval_arxiv_n_ngrams_match_3": 0.666,
      "eval_arxiv_num_pred_words": 40.444,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.609263624166058,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36246474064846745,
      "eval_arxiv_runtime": 10.7169,
      "eval_arxiv_samples_per_second": 46.655,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.3544349286576337,
      "eval_arxiv_token_set_f1_sem": 0.004211841254079962,
      "eval_arxiv_token_set_precision": 0.30692930093919746,
      "eval_arxiv_token_set_recall": 0.434815790712515,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 138125
    },
    {
      "epoch": 26.52,
      "eval_python_code_alpaca_accuracy": 0.15709375,
      "eval_python_code_alpaca_bleu_score": 4.534078637726143,
      "eval_python_code_alpaca_bleu_score_sem": 0.14233550072533638,
      "eval_python_code_alpaca_emb_cos_sim": 0.7551637291908264,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00868436933092635,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.92767333984375,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.72,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.814,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.918,
      "eval_python_code_alpaca_num_pred_words": 42.468,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.684108315271413,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3377472714490662,
      "eval_python_code_alpaca_runtime": 10.0014,
      "eval_python_code_alpaca_samples_per_second": 49.993,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4701137754043401,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005551559877368672,
      "eval_python_code_alpaca_token_set_precision": 0.5265204169817107,
      "eval_python_code_alpaca_token_set_recall": 0.4461888894310651,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 138125
    },
    {
      "epoch": 26.52,
      "eval_wikibio_accuracy": 0.3255625,
      "eval_wikibio_bleu_score": 6.062646065586861,
      "eval_wikibio_bleu_score_sem": 0.21360257706867236,
      "eval_wikibio_emb_cos_sim": 0.7402997612953186,
      "eval_wikibio_emb_cos_sim_sem": 0.010037151459224453,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7152249813079834,
      "eval_wikibio_n_ngrams_match_1": 10.028,
      "eval_wikibio_n_ngrams_match_2": 3.35,
      "eval_wikibio_n_ngrams_match_3": 1.22,
      "eval_wikibio_num_pred_words": 35.242,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.06782553821968,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35530582903784064,
      "eval_wikibio_runtime": 10.1063,
      "eval_wikibio_samples_per_second": 49.474,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3214020396085377,
      "eval_wikibio_token_set_f1_sem": 0.005528925450837666,
      "eval_wikibio_token_set_precision": 0.3260291403205192,
      "eval_wikibio_token_set_recall": 0.33389809075319343,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 138125
    },
    {
      "epoch": 26.52,
      "eval_nq_accuracy": 0.52846875,
      "eval_nq_bleu_score": 11.711828768129264,
      "eval_nq_bleu_score_sem": 0.4737353012203361,
      "eval_nq_emb_cos_sim": 0.8314865827560425,
      "eval_nq_emb_cos_sim_sem": 0.0072692750627384155,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1723036766052246,
      "eval_nq_n_ngrams_match_1": 23.12,
      "eval_nq_n_ngrams_match_2": 8.448,
      "eval_nq_n_ngrams_match_3": 3.892,
      "eval_nq_num_pred_words": 49.106,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.778483552425545,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44833025644502966,
      "eval_nq_runtime": 10.3517,
      "eval_nq_samples_per_second": 48.301,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4626036655039273,
      "eval_nq_token_set_f1_sem": 0.00496835247379176,
      "eval_nq_token_set_precision": 0.4207643543177327,
      "eval_nq_token_set_recall": 0.5210255985059231,
      "eval_nq_true_num_tokens": 64.0,
      "step": 138125
    },
    {
      "epoch": 26.52,
      "learning_rate": 0.001,
      "loss": 2.5507,
      "step": 138132
    },
    {
      "epoch": 26.53,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 138144
    },
    {
      "epoch": 26.53,
      "learning_rate": 0.001,
      "loss": 2.5539,
      "step": 138156
    },
    {
      "epoch": 26.53,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 138168
    },
    {
      "epoch": 26.53,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 138180
    },
    {
      "epoch": 26.53,
      "learning_rate": 0.001,
      "loss": 2.5537,
      "step": 138192
    },
    {
      "epoch": 26.54,
      "learning_rate": 0.001,
      "loss": 2.554,
      "step": 138204
    },
    {
      "epoch": 26.54,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 138216
    },
    {
      "epoch": 26.54,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 138228
    },
    {
      "epoch": 26.54,
      "learning_rate": 0.001,
      "loss": 2.5427,
      "step": 138240
    },
    {
      "epoch": 26.55,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 138252
    },
    {
      "epoch": 26.55,
      "learning_rate": 0.001,
      "loss": 2.5433,
      "step": 138264
    },
    {
      "epoch": 26.55,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 138276
    },
    {
      "epoch": 26.55,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 138288
    },
    {
      "epoch": 26.56,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 138300
    },
    {
      "epoch": 26.56,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 138312
    },
    {
      "epoch": 26.56,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 138324
    },
    {
      "epoch": 26.56,
      "learning_rate": 0.001,
      "loss": 2.5445,
      "step": 138336
    },
    {
      "epoch": 26.56,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 138348
    },
    {
      "epoch": 26.57,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 138360
    },
    {
      "epoch": 26.57,
      "learning_rate": 0.001,
      "loss": 2.542,
      "step": 138372
    },
    {
      "epoch": 26.57,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 138384
    },
    {
      "epoch": 26.57,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 138396
    },
    {
      "epoch": 26.58,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 138408
    },
    {
      "epoch": 26.58,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 138420
    },
    {
      "epoch": 26.58,
      "learning_rate": 0.001,
      "loss": 2.5477,
      "step": 138432
    },
    {
      "epoch": 26.58,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 138444
    },
    {
      "epoch": 26.59,
      "learning_rate": 0.001,
      "loss": 2.5506,
      "step": 138456
    },
    {
      "epoch": 26.59,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 138468
    },
    {
      "epoch": 26.59,
      "learning_rate": 0.001,
      "loss": 2.5339,
      "step": 138480
    },
    {
      "epoch": 26.59,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 138492
    },
    {
      "epoch": 26.59,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 138504
    },
    {
      "epoch": 26.6,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 138516
    },
    {
      "epoch": 26.6,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 138528
    },
    {
      "epoch": 26.6,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 138540
    },
    {
      "epoch": 26.6,
      "learning_rate": 0.001,
      "loss": 2.5533,
      "step": 138552
    },
    {
      "epoch": 26.61,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 138564
    },
    {
      "epoch": 26.61,
      "learning_rate": 0.001,
      "loss": 2.5556,
      "step": 138576
    },
    {
      "epoch": 26.61,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 138588
    },
    {
      "epoch": 26.61,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 138600
    },
    {
      "epoch": 26.62,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 138612
    },
    {
      "epoch": 26.62,
      "learning_rate": 0.001,
      "loss": 2.5403,
      "step": 138624
    },
    {
      "epoch": 26.62,
      "learning_rate": 0.001,
      "loss": 2.5405,
      "step": 138636
    },
    {
      "epoch": 26.62,
      "learning_rate": 0.001,
      "loss": 2.5518,
      "step": 138648
    },
    {
      "epoch": 26.62,
      "learning_rate": 0.001,
      "loss": 2.5555,
      "step": 138660
    },
    {
      "epoch": 26.63,
      "learning_rate": 0.001,
      "loss": 2.5544,
      "step": 138672
    },
    {
      "epoch": 26.63,
      "learning_rate": 0.001,
      "loss": 2.5502,
      "step": 138684
    },
    {
      "epoch": 26.63,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 138696
    },
    {
      "epoch": 26.63,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 138708
    },
    {
      "epoch": 26.64,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 138720
    },
    {
      "epoch": 26.64,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 138732
    },
    {
      "epoch": 26.64,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 138744
    },
    {
      "epoch": 26.64,
      "eval_ag_news_accuracy": 0.3241875,
      "eval_ag_news_bleu_score": 4.965060862419303,
      "eval_ag_news_bleu_score_sem": 0.15438728512738223,
      "eval_ag_news_emb_cos_sim": 0.8131359815597534,
      "eval_ag_news_emb_cos_sim_sem": 0.0063308952369785995,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5344045162200928,
      "eval_ag_news_n_ngrams_match_1": 14.184,
      "eval_ag_news_n_ngrams_match_2": 3.186,
      "eval_ag_news_n_ngrams_match_3": 0.916,
      "eval_ag_news_num_pred_words": 47.018,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.27459866855017,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.350500314604809,
      "eval_ag_news_runtime": 10.4869,
      "eval_ag_news_samples_per_second": 47.678,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3532376550712028,
      "eval_ag_news_token_set_f1_sem": 0.004367471002802514,
      "eval_ag_news_token_set_precision": 0.3378850477441128,
      "eval_ag_news_token_set_recall": 0.38653801426449513,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 138750
    },
    {
      "epoch": 26.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.114625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.11163846657245,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11372678728501974,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6804838180541992,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007905600937566306,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2448742389678955,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.156,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.87,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.658483020007733,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2151872767905688,
      "eval_anthropic_toxic_prompts_runtime": 9.8784,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.616,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3553317607568192,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006635344215942749,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43286421188331903,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3285166876661079,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 138750
    },
    {
      "epoch": 26.64,
      "eval_arxiv_accuracy": 0.3485625,
      "eval_arxiv_bleu_score": 4.264796320712743,
      "eval_arxiv_bleu_score_sem": 0.12075134889061248,
      "eval_arxiv_emb_cos_sim": 0.771316647529602,
      "eval_arxiv_emb_cos_sim_sem": 0.00700699917572217,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.387031316757202,
      "eval_arxiv_n_ngrams_match_1": 15.254,
      "eval_arxiv_n_ngrams_match_2": 2.882,
      "eval_arxiv_n_ngrams_match_3": 0.61,
      "eval_arxiv_num_pred_words": 41.13,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.578014049926754,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36239528489991546,
      "eval_arxiv_runtime": 10.3113,
      "eval_arxiv_samples_per_second": 48.49,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3565038852463042,
      "eval_arxiv_token_set_f1_sem": 0.004005371012824581,
      "eval_arxiv_token_set_precision": 0.3072828568172363,
      "eval_arxiv_token_set_recall": 0.4405003398770094,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 138750
    },
    {
      "epoch": 26.64,
      "eval_python_code_alpaca_accuracy": 0.1610625,
      "eval_python_code_alpaca_bleu_score": 4.459944855256856,
      "eval_python_code_alpaca_bleu_score_sem": 0.14383230908246283,
      "eval_python_code_alpaca_emb_cos_sim": 0.7556705474853516,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008313643017246495,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8856201171875,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.87,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.876,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.942,
      "eval_python_code_alpaca_num_pred_words": 44.04,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.91467334858191,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3329802664418746,
      "eval_python_code_alpaca_runtime": 9.8936,
      "eval_python_code_alpaca_samples_per_second": 50.538,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4779585230603366,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005851969260384139,
      "eval_python_code_alpaca_token_set_precision": 0.537983528265224,
      "eval_python_code_alpaca_token_set_recall": 0.4513912894177452,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 138750
    },
    {
      "epoch": 26.64,
      "eval_wikibio_accuracy": 0.32840625,
      "eval_wikibio_bleu_score": 6.195257570122492,
      "eval_wikibio_bleu_score_sem": 0.22528679215553907,
      "eval_wikibio_emb_cos_sim": 0.7375030517578125,
      "eval_wikibio_emb_cos_sim_sem": 0.00922417299576372,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.700056552886963,
      "eval_wikibio_n_ngrams_match_1": 10.036,
      "eval_wikibio_n_ngrams_match_2": 3.45,
      "eval_wikibio_n_ngrams_match_3": 1.262,
      "eval_wikibio_num_pred_words": 35.798,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.44959183657991,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3554960810057535,
      "eval_wikibio_runtime": 9.9643,
      "eval_wikibio_samples_per_second": 50.179,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3254677870560993,
      "eval_wikibio_token_set_f1_sem": 0.0053984178241856294,
      "eval_wikibio_token_set_precision": 0.330026286269777,
      "eval_wikibio_token_set_recall": 0.3390868022910393,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 138750
    },
    {
      "epoch": 26.64,
      "eval_nq_accuracy": 0.52890625,
      "eval_nq_bleu_score": 11.753755750075214,
      "eval_nq_bleu_score_sem": 0.47813875186557536,
      "eval_nq_emb_cos_sim": 0.8355936408042908,
      "eval_nq_emb_cos_sim_sem": 0.0065716689872647015,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1703109741210938,
      "eval_nq_n_ngrams_match_1": 22.952,
      "eval_nq_n_ngrams_match_2": 8.528,
      "eval_nq_n_ngrams_match_3": 3.954,
      "eval_nq_num_pred_words": 48.99,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.761008063951103,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.446778549457096,
      "eval_nq_runtime": 10.6875,
      "eval_nq_samples_per_second": 46.784,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.460552629086646,
      "eval_nq_token_set_f1_sem": 0.004977581539049686,
      "eval_nq_token_set_precision": 0.4172179579331786,
      "eval_nq_token_set_recall": 0.5217868775197729,
      "eval_nq_true_num_tokens": 64.0,
      "step": 138750
    },
    {
      "epoch": 26.64,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 138756
    },
    {
      "epoch": 26.65,
      "learning_rate": 0.001,
      "loss": 2.5522,
      "step": 138768
    },
    {
      "epoch": 26.65,
      "learning_rate": 0.001,
      "loss": 2.5506,
      "step": 138780
    },
    {
      "epoch": 26.65,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 138792
    },
    {
      "epoch": 26.65,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 138804
    },
    {
      "epoch": 26.65,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 138816
    },
    {
      "epoch": 26.66,
      "learning_rate": 0.001,
      "loss": 2.5529,
      "step": 138828
    },
    {
      "epoch": 26.66,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 138840
    },
    {
      "epoch": 26.66,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 138852
    },
    {
      "epoch": 26.66,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 138864
    },
    {
      "epoch": 26.67,
      "learning_rate": 0.001,
      "loss": 2.5505,
      "step": 138876
    },
    {
      "epoch": 26.67,
      "learning_rate": 0.001,
      "loss": 2.5541,
      "step": 138888
    },
    {
      "epoch": 26.67,
      "learning_rate": 0.001,
      "loss": 2.5547,
      "step": 138900
    },
    {
      "epoch": 26.67,
      "learning_rate": 0.001,
      "loss": 2.5521,
      "step": 138912
    },
    {
      "epoch": 26.68,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 138924
    },
    {
      "epoch": 26.68,
      "learning_rate": 0.001,
      "loss": 2.555,
      "step": 138936
    },
    {
      "epoch": 26.68,
      "learning_rate": 0.001,
      "loss": 2.5611,
      "step": 138948
    },
    {
      "epoch": 26.68,
      "learning_rate": 0.001,
      "loss": 2.542,
      "step": 138960
    },
    {
      "epoch": 26.68,
      "learning_rate": 0.001,
      "loss": 2.5484,
      "step": 138972
    },
    {
      "epoch": 26.69,
      "learning_rate": 0.001,
      "loss": 2.5556,
      "step": 138984
    },
    {
      "epoch": 26.69,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 138996
    },
    {
      "epoch": 26.69,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 139008
    },
    {
      "epoch": 26.69,
      "learning_rate": 0.001,
      "loss": 2.5627,
      "step": 139020
    },
    {
      "epoch": 26.7,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 139032
    },
    {
      "epoch": 26.7,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 139044
    },
    {
      "epoch": 26.7,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 139056
    },
    {
      "epoch": 26.7,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 139068
    },
    {
      "epoch": 26.71,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 139080
    },
    {
      "epoch": 26.71,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 139092
    },
    {
      "epoch": 26.71,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 139104
    },
    {
      "epoch": 26.71,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 139116
    },
    {
      "epoch": 26.71,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 139128
    },
    {
      "epoch": 26.72,
      "learning_rate": 0.001,
      "loss": 2.5593,
      "step": 139140
    },
    {
      "epoch": 26.72,
      "learning_rate": 0.001,
      "loss": 2.56,
      "step": 139152
    },
    {
      "epoch": 26.72,
      "learning_rate": 0.001,
      "loss": 2.5405,
      "step": 139164
    },
    {
      "epoch": 26.72,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 139176
    },
    {
      "epoch": 26.73,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 139188
    },
    {
      "epoch": 26.73,
      "learning_rate": 0.001,
      "loss": 2.5495,
      "step": 139200
    },
    {
      "epoch": 26.73,
      "learning_rate": 0.001,
      "loss": 2.556,
      "step": 139212
    },
    {
      "epoch": 26.73,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 139224
    },
    {
      "epoch": 26.74,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 139236
    },
    {
      "epoch": 26.74,
      "learning_rate": 0.001,
      "loss": 2.5454,
      "step": 139248
    },
    {
      "epoch": 26.74,
      "learning_rate": 0.001,
      "loss": 2.5604,
      "step": 139260
    },
    {
      "epoch": 26.74,
      "learning_rate": 0.001,
      "loss": 2.5453,
      "step": 139272
    },
    {
      "epoch": 26.74,
      "learning_rate": 0.001,
      "loss": 2.5448,
      "step": 139284
    },
    {
      "epoch": 26.75,
      "learning_rate": 0.001,
      "loss": 2.5598,
      "step": 139296
    },
    {
      "epoch": 26.75,
      "learning_rate": 0.001,
      "loss": 2.5642,
      "step": 139308
    },
    {
      "epoch": 26.75,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 139320
    },
    {
      "epoch": 26.75,
      "learning_rate": 0.001,
      "loss": 2.554,
      "step": 139332
    },
    {
      "epoch": 26.76,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 139344
    },
    {
      "epoch": 26.76,
      "learning_rate": 0.001,
      "loss": 2.5579,
      "step": 139356
    },
    {
      "epoch": 26.76,
      "learning_rate": 0.001,
      "loss": 2.5582,
      "step": 139368
    },
    {
      "epoch": 26.76,
      "eval_ag_news_accuracy": 0.32209375,
      "eval_ag_news_bleu_score": 4.883039681182582,
      "eval_ag_news_bleu_score_sem": 0.15028572684274583,
      "eval_ag_news_emb_cos_sim": 0.8151203393936157,
      "eval_ag_news_emb_cos_sim_sem": 0.006459929789328226,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5450551509857178,
      "eval_ag_news_n_ngrams_match_1": 14.166,
      "eval_ag_news_n_ngrams_match_2": 3.092,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.15,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.64159580773397,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35509581835061726,
      "eval_ag_news_runtime": 10.3032,
      "eval_ag_news_samples_per_second": 48.529,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.35499297414895553,
      "eval_ag_news_token_set_f1_sem": 0.0044278571931696395,
      "eval_ag_news_token_set_precision": 0.34008887717726405,
      "eval_ag_news_token_set_recall": 0.3870041642876613,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 139375
    },
    {
      "epoch": 26.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.11240625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1405064442608244,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11492515895809967,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6847192049026489,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009212928348289916,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2757725715637207,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.192,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.878,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.0,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.46366265819675,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22013370095881896,
      "eval_anthropic_toxic_prompts_runtime": 10.2023,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.008,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35495125345294976,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00636732151368483,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.440083951460424,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32399824953124007,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 139375
    },
    {
      "epoch": 26.76,
      "eval_arxiv_accuracy": 0.3483125,
      "eval_arxiv_bleu_score": 4.266548561097967,
      "eval_arxiv_bleu_score_sem": 0.11951296949766975,
      "eval_arxiv_emb_cos_sim": 0.7608402967453003,
      "eval_arxiv_emb_cos_sim_sem": 0.008398710504065163,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3979198932647705,
      "eval_arxiv_n_ngrams_match_1": 15.262,
      "eval_arxiv_n_ngrams_match_2": 2.878,
      "eval_arxiv_n_ngrams_match_3": 0.626,
      "eval_arxiv_num_pred_words": 40.288,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.90183630114276,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36535832982275657,
      "eval_arxiv_runtime": 9.9141,
      "eval_arxiv_samples_per_second": 50.433,
      "eval_arxiv_steps_per_second": 0.101,
      "eval_arxiv_token_set_f1": 0.3588403458338021,
      "eval_arxiv_token_set_f1_sem": 0.004079002239419578,
      "eval_arxiv_token_set_precision": 0.3109485366434201,
      "eval_arxiv_token_set_recall": 0.4414263435502644,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 139375
    },
    {
      "epoch": 26.76,
      "eval_python_code_alpaca_accuracy": 0.1605,
      "eval_python_code_alpaca_bleu_score": 4.78814525467256,
      "eval_python_code_alpaca_bleu_score_sem": 0.15972130891421898,
      "eval_python_code_alpaca_emb_cos_sim": 0.7554644346237183,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008445594263221976,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.89371919631958,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.002,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.926,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.968,
      "eval_python_code_alpaca_num_pred_words": 42.432,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.060354852339294,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34485621994811294,
      "eval_python_code_alpaca_runtime": 9.7111,
      "eval_python_code_alpaca_samples_per_second": 51.488,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.47755522254118815,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005190540762148067,
      "eval_python_code_alpaca_token_set_precision": 0.5450351378485951,
      "eval_python_code_alpaca_token_set_recall": 0.4466616561824618,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 139375
    },
    {
      "epoch": 26.76,
      "eval_wikibio_accuracy": 0.32546875,
      "eval_wikibio_bleu_score": 6.012222877556776,
      "eval_wikibio_bleu_score_sem": 0.19930999058990118,
      "eval_wikibio_emb_cos_sim": 0.7461388111114502,
      "eval_wikibio_emb_cos_sim_sem": 0.009180695887346702,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7581255435943604,
      "eval_wikibio_n_ngrams_match_1": 10.098,
      "eval_wikibio_n_ngrams_match_2": 3.456,
      "eval_wikibio_n_ngrams_match_3": 1.252,
      "eval_wikibio_num_pred_words": 36.29,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.86799643096521,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3561306434892939,
      "eval_wikibio_runtime": 9.7768,
      "eval_wikibio_samples_per_second": 51.142,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.3219031784383085,
      "eval_wikibio_token_set_f1_sem": 0.005376487850117779,
      "eval_wikibio_token_set_precision": 0.3307109163064446,
      "eval_wikibio_token_set_recall": 0.3301599920682775,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 139375
    },
    {
      "epoch": 26.76,
      "eval_nq_accuracy": 0.528625,
      "eval_nq_bleu_score": 11.653447479118352,
      "eval_nq_bleu_score_sem": 0.4857224043780406,
      "eval_nq_emb_cos_sim": 0.8353527784347534,
      "eval_nq_emb_cos_sim_sem": 0.007095706438036458,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1789746284484863,
      "eval_nq_n_ngrams_match_1": 22.994,
      "eval_nq_n_ngrams_match_2": 8.334,
      "eval_nq_n_ngrams_match_3": 3.854,
      "eval_nq_num_pred_words": 48.58,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.83724015680304,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44741050847881686,
      "eval_nq_runtime": 10.2127,
      "eval_nq_samples_per_second": 48.959,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.46258269156278825,
      "eval_nq_token_set_f1_sem": 0.004992160416557622,
      "eval_nq_token_set_precision": 0.41992210252936485,
      "eval_nq_token_set_recall": 0.5230947265966663,
      "eval_nq_true_num_tokens": 64.0,
      "step": 139375
    },
    {
      "epoch": 26.76,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 139380
    },
    {
      "epoch": 26.76,
      "learning_rate": 0.001,
      "loss": 2.554,
      "step": 139392
    },
    {
      "epoch": 26.77,
      "learning_rate": 0.001,
      "loss": 2.5552,
      "step": 139404
    },
    {
      "epoch": 26.77,
      "learning_rate": 0.001,
      "loss": 2.5342,
      "step": 139416
    },
    {
      "epoch": 26.77,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 139428
    },
    {
      "epoch": 26.77,
      "learning_rate": 0.001,
      "loss": 2.5549,
      "step": 139440
    },
    {
      "epoch": 26.78,
      "learning_rate": 0.001,
      "loss": 2.5427,
      "step": 139452
    },
    {
      "epoch": 26.78,
      "learning_rate": 0.001,
      "loss": 2.5544,
      "step": 139464
    },
    {
      "epoch": 26.78,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 139476
    },
    {
      "epoch": 26.78,
      "learning_rate": 0.001,
      "loss": 2.5588,
      "step": 139488
    },
    {
      "epoch": 26.79,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 139500
    },
    {
      "epoch": 26.79,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 139512
    },
    {
      "epoch": 26.79,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 139524
    },
    {
      "epoch": 26.79,
      "learning_rate": 0.001,
      "loss": 2.5566,
      "step": 139536
    },
    {
      "epoch": 26.79,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 139548
    },
    {
      "epoch": 26.8,
      "learning_rate": 0.001,
      "loss": 2.5516,
      "step": 139560
    },
    {
      "epoch": 26.8,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 139572
    },
    {
      "epoch": 26.8,
      "learning_rate": 0.001,
      "loss": 2.5405,
      "step": 139584
    },
    {
      "epoch": 26.8,
      "learning_rate": 0.001,
      "loss": 2.5508,
      "step": 139596
    },
    {
      "epoch": 26.81,
      "learning_rate": 0.001,
      "loss": 2.5543,
      "step": 139608
    },
    {
      "epoch": 26.81,
      "learning_rate": 0.001,
      "loss": 2.5468,
      "step": 139620
    },
    {
      "epoch": 26.81,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 139632
    },
    {
      "epoch": 26.81,
      "learning_rate": 0.001,
      "loss": 2.5558,
      "step": 139644
    },
    {
      "epoch": 26.82,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 139656
    },
    {
      "epoch": 26.82,
      "learning_rate": 0.001,
      "loss": 2.5424,
      "step": 139668
    },
    {
      "epoch": 26.82,
      "learning_rate": 0.001,
      "loss": 2.5461,
      "step": 139680
    },
    {
      "epoch": 26.82,
      "learning_rate": 0.001,
      "loss": 2.5555,
      "step": 139692
    },
    {
      "epoch": 26.82,
      "learning_rate": 0.001,
      "loss": 2.5528,
      "step": 139704
    },
    {
      "epoch": 26.83,
      "learning_rate": 0.001,
      "loss": 2.5493,
      "step": 139716
    },
    {
      "epoch": 26.83,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 139728
    },
    {
      "epoch": 26.83,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 139740
    },
    {
      "epoch": 26.83,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 139752
    },
    {
      "epoch": 26.84,
      "learning_rate": 0.001,
      "loss": 2.5504,
      "step": 139764
    },
    {
      "epoch": 26.84,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 139776
    },
    {
      "epoch": 26.84,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 139788
    },
    {
      "epoch": 26.84,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 139800
    },
    {
      "epoch": 26.85,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 139812
    },
    {
      "epoch": 26.85,
      "learning_rate": 0.001,
      "loss": 2.5526,
      "step": 139824
    },
    {
      "epoch": 26.85,
      "learning_rate": 0.001,
      "loss": 2.5568,
      "step": 139836
    },
    {
      "epoch": 26.85,
      "learning_rate": 0.001,
      "loss": 2.5453,
      "step": 139848
    },
    {
      "epoch": 26.85,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 139860
    },
    {
      "epoch": 26.86,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 139872
    },
    {
      "epoch": 26.86,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 139884
    },
    {
      "epoch": 26.86,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 139896
    },
    {
      "epoch": 26.86,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 139908
    },
    {
      "epoch": 26.87,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 139920
    },
    {
      "epoch": 26.87,
      "learning_rate": 0.001,
      "loss": 2.5454,
      "step": 139932
    },
    {
      "epoch": 26.87,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 139944
    },
    {
      "epoch": 26.87,
      "learning_rate": 0.001,
      "loss": 2.5565,
      "step": 139956
    },
    {
      "epoch": 26.88,
      "learning_rate": 0.001,
      "loss": 2.5508,
      "step": 139968
    },
    {
      "epoch": 26.88,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 139980
    },
    {
      "epoch": 26.88,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 139992
    },
    {
      "epoch": 26.88,
      "eval_ag_news_accuracy": 0.32378125,
      "eval_ag_news_bleu_score": 4.859562710872875,
      "eval_ag_news_bleu_score_sem": 0.15112587920874196,
      "eval_ag_news_emb_cos_sim": 0.8180899620056152,
      "eval_ag_news_emb_cos_sim_sem": 0.006294373675654894,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5315823554992676,
      "eval_ag_news_n_ngrams_match_1": 14.054,
      "eval_ag_news_n_ngrams_match_2": 3.044,
      "eval_ag_news_n_ngrams_match_3": 0.906,
      "eval_ag_news_num_pred_words": 46.664,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.178006605738226,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35150963551075565,
      "eval_ag_news_runtime": 10.3828,
      "eval_ag_news_samples_per_second": 48.157,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.34922680013912466,
      "eval_ag_news_token_set_f1_sem": 0.0043831397881787856,
      "eval_ag_news_token_set_precision": 0.3355236480530844,
      "eval_ag_news_token_set_recall": 0.3815039976767892,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 140000
    },
    {
      "epoch": 26.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.114875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2388205154238707,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12482345241526657,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6881364583969116,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007808975336088033,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.246290922164917,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.386,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.784,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.948,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.6948587221024,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21760573167571967,
      "eval_anthropic_toxic_prompts_runtime": 11.0266,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.345,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.091,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3639004416417083,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006608106759269167,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45031523922665767,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33321317066671063,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 140000
    },
    {
      "epoch": 26.88,
      "eval_arxiv_accuracy": 0.349375,
      "eval_arxiv_bleu_score": 4.381076378331517,
      "eval_arxiv_bleu_score_sem": 0.12439305276843189,
      "eval_arxiv_emb_cos_sim": 0.7646592259407043,
      "eval_arxiv_emb_cos_sim_sem": 0.009558706361829355,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4013900756835938,
      "eval_arxiv_n_ngrams_match_1": 15.042,
      "eval_arxiv_n_ngrams_match_2": 2.982,
      "eval_arxiv_n_ngrams_match_3": 0.668,
      "eval_arxiv_num_pred_words": 40.172,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.005781377643714,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3626276623035773,
      "eval_arxiv_runtime": 10.7392,
      "eval_arxiv_samples_per_second": 46.558,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.3564813725332108,
      "eval_arxiv_token_set_f1_sem": 0.004310774975927624,
      "eval_arxiv_token_set_precision": 0.308464528726972,
      "eval_arxiv_token_set_recall": 0.4394898763358817,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 140000
    },
    {
      "epoch": 26.88,
      "eval_python_code_alpaca_accuracy": 0.1616875,
      "eval_python_code_alpaca_bleu_score": 4.819738200854623,
      "eval_python_code_alpaca_bleu_score_sem": 0.14958318953026548,
      "eval_python_code_alpaca_emb_cos_sim": 0.7647327184677124,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008782805929801575,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8652584552764893,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.142,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.054,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.074,
      "eval_python_code_alpaca_num_pred_words": 44.41,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.55358943616595,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33987289061179404,
      "eval_python_code_alpaca_runtime": 9.7905,
      "eval_python_code_alpaca_samples_per_second": 51.07,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.48364748544866415,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005537410214425081,
      "eval_python_code_alpaca_token_set_precision": 0.5552013014529864,
      "eval_python_code_alpaca_token_set_recall": 0.44523628866971093,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 140000
    },
    {
      "epoch": 26.88,
      "eval_wikibio_accuracy": 0.322875,
      "eval_wikibio_bleu_score": 5.694965861715147,
      "eval_wikibio_bleu_score_sem": 0.2083526008429106,
      "eval_wikibio_emb_cos_sim": 0.749782919883728,
      "eval_wikibio_emb_cos_sim_sem": 0.008007438260280754,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.720547914505005,
      "eval_wikibio_n_ngrams_match_1": 9.652,
      "eval_wikibio_n_ngrams_match_2": 3.232,
      "eval_wikibio_n_ngrams_match_3": 1.164,
      "eval_wikibio_num_pred_words": 35.384,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.287009663812725,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34065599092161936,
      "eval_wikibio_runtime": 12.0605,
      "eval_wikibio_samples_per_second": 41.458,
      "eval_wikibio_steps_per_second": 0.083,
      "eval_wikibio_token_set_f1": 0.3070669243097482,
      "eval_wikibio_token_set_f1_sem": 0.0058401314040885275,
      "eval_wikibio_token_set_precision": 0.3140311115333913,
      "eval_wikibio_token_set_recall": 0.32070890763575616,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 140000
    },
    {
      "epoch": 26.88,
      "eval_nq_accuracy": 0.52925,
      "eval_nq_bleu_score": 11.72532536143017,
      "eval_nq_bleu_score_sem": 0.47604284476268044,
      "eval_nq_emb_cos_sim": 0.8359426259994507,
      "eval_nq_emb_cos_sim_sem": 0.006911591881583665,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1711294651031494,
      "eval_nq_n_ngrams_match_1": 23.012,
      "eval_nq_n_ngrams_match_2": 8.508,
      "eval_nq_n_ngrams_match_3": 3.934,
      "eval_nq_num_pred_words": 48.744,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.768181805466043,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4488636174950027,
      "eval_nq_runtime": 10.4438,
      "eval_nq_samples_per_second": 47.875,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4615851902336044,
      "eval_nq_token_set_f1_sem": 0.00505296348586497,
      "eval_nq_token_set_precision": 0.4199677988052723,
      "eval_nq_token_set_recall": 0.5200451270792383,
      "eval_nq_true_num_tokens": 64.0,
      "step": 140000
    },
    {
      "epoch": 26.88,
      "learning_rate": 0.001,
      "loss": 2.5484,
      "step": 140004
    },
    {
      "epoch": 26.88,
      "learning_rate": 0.001,
      "loss": 2.5477,
      "step": 140016
    },
    {
      "epoch": 26.89,
      "learning_rate": 0.001,
      "loss": 2.5517,
      "step": 140028
    },
    {
      "epoch": 26.89,
      "learning_rate": 0.001,
      "loss": 2.5516,
      "step": 140040
    },
    {
      "epoch": 26.89,
      "learning_rate": 0.001,
      "loss": 2.5549,
      "step": 140052
    },
    {
      "epoch": 26.89,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 140064
    },
    {
      "epoch": 26.9,
      "learning_rate": 0.001,
      "loss": 2.5383,
      "step": 140076
    },
    {
      "epoch": 26.9,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 140088
    },
    {
      "epoch": 26.9,
      "learning_rate": 0.001,
      "loss": 2.5492,
      "step": 140100
    },
    {
      "epoch": 26.9,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 140112
    },
    {
      "epoch": 26.91,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 140124
    },
    {
      "epoch": 26.91,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 140136
    },
    {
      "epoch": 26.91,
      "learning_rate": 0.001,
      "loss": 2.5552,
      "step": 140148
    },
    {
      "epoch": 26.91,
      "learning_rate": 0.001,
      "loss": 2.5501,
      "step": 140160
    },
    {
      "epoch": 26.91,
      "learning_rate": 0.001,
      "loss": 2.5493,
      "step": 140172
    },
    {
      "epoch": 26.92,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 140184
    },
    {
      "epoch": 26.92,
      "learning_rate": 0.001,
      "loss": 2.5494,
      "step": 140196
    },
    {
      "epoch": 26.92,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 140208
    },
    {
      "epoch": 26.92,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 140220
    },
    {
      "epoch": 26.93,
      "learning_rate": 0.001,
      "loss": 2.561,
      "step": 140232
    },
    {
      "epoch": 26.93,
      "learning_rate": 0.001,
      "loss": 2.5533,
      "step": 140244
    },
    {
      "epoch": 26.93,
      "learning_rate": 0.001,
      "loss": 2.5517,
      "step": 140256
    },
    {
      "epoch": 26.93,
      "learning_rate": 0.001,
      "loss": 2.5517,
      "step": 140268
    },
    {
      "epoch": 26.94,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 140280
    },
    {
      "epoch": 26.94,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 140292
    },
    {
      "epoch": 26.94,
      "learning_rate": 0.001,
      "loss": 2.5598,
      "step": 140304
    },
    {
      "epoch": 26.94,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 140316
    },
    {
      "epoch": 26.94,
      "learning_rate": 0.001,
      "loss": 2.5558,
      "step": 140328
    },
    {
      "epoch": 26.95,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 140340
    },
    {
      "epoch": 26.95,
      "learning_rate": 0.001,
      "loss": 2.5553,
      "step": 140352
    },
    {
      "epoch": 26.95,
      "learning_rate": 0.001,
      "loss": 2.5537,
      "step": 140364
    },
    {
      "epoch": 26.95,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 140376
    },
    {
      "epoch": 26.96,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 140388
    },
    {
      "epoch": 26.96,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 140400
    },
    {
      "epoch": 26.96,
      "learning_rate": 0.001,
      "loss": 2.5626,
      "step": 140412
    },
    {
      "epoch": 26.96,
      "learning_rate": 0.001,
      "loss": 2.5524,
      "step": 140424
    },
    {
      "epoch": 26.97,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 140436
    },
    {
      "epoch": 26.97,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 140448
    },
    {
      "epoch": 26.97,
      "learning_rate": 0.001,
      "loss": 2.5405,
      "step": 140460
    },
    {
      "epoch": 26.97,
      "learning_rate": 0.001,
      "loss": 2.5509,
      "step": 140472
    },
    {
      "epoch": 26.97,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 140484
    },
    {
      "epoch": 26.98,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 140496
    },
    {
      "epoch": 26.98,
      "learning_rate": 0.001,
      "loss": 2.5553,
      "step": 140508
    },
    {
      "epoch": 26.98,
      "learning_rate": 0.001,
      "loss": 2.5424,
      "step": 140520
    },
    {
      "epoch": 26.98,
      "learning_rate": 0.001,
      "loss": 2.5325,
      "step": 140532
    },
    {
      "epoch": 26.99,
      "learning_rate": 0.001,
      "loss": 2.5561,
      "step": 140544
    },
    {
      "epoch": 26.99,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 140556
    },
    {
      "epoch": 26.99,
      "learning_rate": 0.001,
      "loss": 2.5513,
      "step": 140568
    },
    {
      "epoch": 26.99,
      "learning_rate": 0.001,
      "loss": 2.5445,
      "step": 140580
    },
    {
      "epoch": 27.0,
      "learning_rate": 0.001,
      "loss": 2.5494,
      "step": 140592
    },
    {
      "epoch": 27.0,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 140604
    },
    {
      "epoch": 27.0,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 140616
    },
    {
      "epoch": 27.0,
      "eval_ag_news_accuracy": 0.32353125,
      "eval_ag_news_bleu_score": 4.739044514090154,
      "eval_ag_news_bleu_score_sem": 0.140377611948644,
      "eval_ag_news_emb_cos_sim": 0.8107103705406189,
      "eval_ag_news_emb_cos_sim_sem": 0.006744443407290032,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.527357339859009,
      "eval_ag_news_n_ngrams_match_1": 14.024,
      "eval_ag_news_n_ngrams_match_2": 3.058,
      "eval_ag_news_n_ngrams_match_3": 0.844,
      "eval_ag_news_num_pred_words": 46.328,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.03390861576159,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3512726980769755,
      "eval_ag_news_runtime": 12.4669,
      "eval_ag_news_samples_per_second": 40.106,
      "eval_ag_news_steps_per_second": 0.08,
      "eval_ag_news_token_set_f1": 0.3512999233288417,
      "eval_ag_news_token_set_f1_sem": 0.004330093257944045,
      "eval_ag_news_token_set_precision": 0.33668663614916,
      "eval_ag_news_token_set_recall": 0.38277024613918653,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 140625
    },
    {
      "epoch": 27.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.11453125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1500712685853585,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12016290631420022,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6760477423667908,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009180294175196833,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2295303344726562,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.254,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.896,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.33,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.267786775269617,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21460008106013984,
      "eval_anthropic_toxic_prompts_runtime": 9.7578,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.241,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3593870896366552,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066447142719551835,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44027933446874573,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32803647486115683,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 140625
    },
    {
      "epoch": 27.0,
      "eval_arxiv_accuracy": 0.3488125,
      "eval_arxiv_bleu_score": 4.256183700835488,
      "eval_arxiv_bleu_score_sem": 0.12197246067995418,
      "eval_arxiv_emb_cos_sim": 0.7529629468917847,
      "eval_arxiv_emb_cos_sim_sem": 0.009230361997049743,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4022555351257324,
      "eval_arxiv_n_ngrams_match_1": 14.99,
      "eval_arxiv_n_ngrams_match_2": 2.912,
      "eval_arxiv_n_ngrams_match_3": 0.636,
      "eval_arxiv_num_pred_words": 39.9,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 30.03176140516418,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3601718659795816,
      "eval_arxiv_runtime": 10.5045,
      "eval_arxiv_samples_per_second": 47.599,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3523661766441768,
      "eval_arxiv_token_set_f1_sem": 0.004168593281648541,
      "eval_arxiv_token_set_precision": 0.30437402121242574,
      "eval_arxiv_token_set_recall": 0.4396984248189482,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 140625
    },
    {
      "epoch": 27.0,
      "eval_python_code_alpaca_accuracy": 0.162,
      "eval_python_code_alpaca_bleu_score": 4.855418194740201,
      "eval_python_code_alpaca_bleu_score_sem": 0.15619118291888343,
      "eval_python_code_alpaca_emb_cos_sim": 0.7572555541992188,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008597182647880994,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.866929531097412,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.92,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.95,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.042,
      "eval_python_code_alpaca_num_pred_words": 43.09,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.58294733785115,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34126812701357984,
      "eval_python_code_alpaca_runtime": 9.9135,
      "eval_python_code_alpaca_samples_per_second": 50.437,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.47922315503559504,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005561840676598813,
      "eval_python_code_alpaca_token_set_precision": 0.5407090048895702,
      "eval_python_code_alpaca_token_set_recall": 0.45118218335335475,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 140625
    },
    {
      "epoch": 27.0,
      "eval_wikibio_accuracy": 0.32640625,
      "eval_wikibio_bleu_score": 5.992813297432066,
      "eval_wikibio_bleu_score_sem": 0.2181657710188778,
      "eval_wikibio_emb_cos_sim": 0.7401313781738281,
      "eval_wikibio_emb_cos_sim_sem": 0.010332932773717843,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.703239679336548,
      "eval_wikibio_n_ngrams_match_1": 9.702,
      "eval_wikibio_n_ngrams_match_2": 3.286,
      "eval_wikibio_n_ngrams_match_3": 1.218,
      "eval_wikibio_num_pred_words": 35.008,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.57855314341375,
      "eval_wikibio_pred_num_tokens": 62.84375,
      "eval_wikibio_rouge_score": 0.34762803541341447,
      "eval_wikibio_runtime": 9.8407,
      "eval_wikibio_samples_per_second": 50.809,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.313201020253811,
      "eval_wikibio_token_set_f1_sem": 0.005815097802903207,
      "eval_wikibio_token_set_precision": 0.3179119318721146,
      "eval_wikibio_token_set_recall": 0.3278201567920565,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 140625
    },
    {
      "epoch": 27.0,
      "eval_nq_accuracy": 0.53040625,
      "eval_nq_bleu_score": 12.193885788459726,
      "eval_nq_bleu_score_sem": 0.482405015018087,
      "eval_nq_emb_cos_sim": 0.8364588618278503,
      "eval_nq_emb_cos_sim_sem": 0.007292546708561509,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.167343854904175,
      "eval_nq_n_ngrams_match_1": 23.15,
      "eval_nq_n_ngrams_match_2": 8.62,
      "eval_nq_n_ngrams_match_3": 4.106,
      "eval_nq_num_pred_words": 48.85,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.7350516355167,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4494291900578522,
      "eval_nq_runtime": 10.4137,
      "eval_nq_samples_per_second": 48.014,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.464658240093137,
      "eval_nq_token_set_f1_sem": 0.004993405310024307,
      "eval_nq_token_set_precision": 0.4231963435907049,
      "eval_nq_token_set_recall": 0.5223170698252272,
      "eval_nq_true_num_tokens": 64.0,
      "step": 140625
    },
    {
      "epoch": 27.0,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 140628
    },
    {
      "epoch": 27.0,
      "learning_rate": 0.001,
      "loss": 2.529,
      "step": 140640
    },
    {
      "epoch": 27.01,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 140652
    },
    {
      "epoch": 27.01,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 140664
    },
    {
      "epoch": 27.01,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 140676
    },
    {
      "epoch": 27.01,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 140688
    },
    {
      "epoch": 27.02,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 140700
    },
    {
      "epoch": 27.02,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 140712
    },
    {
      "epoch": 27.02,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 140724
    },
    {
      "epoch": 27.02,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 140736
    },
    {
      "epoch": 27.03,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 140748
    },
    {
      "epoch": 27.03,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 140760
    },
    {
      "epoch": 27.03,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 140772
    },
    {
      "epoch": 27.03,
      "learning_rate": 0.001,
      "loss": 2.5361,
      "step": 140784
    },
    {
      "epoch": 27.03,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 140796
    },
    {
      "epoch": 27.04,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 140808
    },
    {
      "epoch": 27.04,
      "learning_rate": 0.001,
      "loss": 2.5363,
      "step": 140820
    },
    {
      "epoch": 27.04,
      "learning_rate": 0.001,
      "loss": 2.5208,
      "step": 140832
    },
    {
      "epoch": 27.04,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 140844
    },
    {
      "epoch": 27.05,
      "learning_rate": 0.001,
      "loss": 2.5363,
      "step": 140856
    },
    {
      "epoch": 27.05,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 140868
    },
    {
      "epoch": 27.05,
      "learning_rate": 0.001,
      "loss": 2.5349,
      "step": 140880
    },
    {
      "epoch": 27.05,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 140892
    },
    {
      "epoch": 27.06,
      "learning_rate": 0.001,
      "loss": 2.5325,
      "step": 140904
    },
    {
      "epoch": 27.06,
      "learning_rate": 0.001,
      "loss": 2.5304,
      "step": 140916
    },
    {
      "epoch": 27.06,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 140928
    },
    {
      "epoch": 27.06,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 140940
    },
    {
      "epoch": 27.06,
      "learning_rate": 0.001,
      "loss": 2.5339,
      "step": 140952
    },
    {
      "epoch": 27.07,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 140964
    },
    {
      "epoch": 27.07,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 140976
    },
    {
      "epoch": 27.07,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 140988
    },
    {
      "epoch": 27.07,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 141000
    },
    {
      "epoch": 27.08,
      "learning_rate": 0.001,
      "loss": 2.539,
      "step": 141012
    },
    {
      "epoch": 27.08,
      "learning_rate": 0.001,
      "loss": 2.5322,
      "step": 141024
    },
    {
      "epoch": 27.08,
      "learning_rate": 0.001,
      "loss": 2.5354,
      "step": 141036
    },
    {
      "epoch": 27.08,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 141048
    },
    {
      "epoch": 27.09,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 141060
    },
    {
      "epoch": 27.09,
      "learning_rate": 0.001,
      "loss": 2.5339,
      "step": 141072
    },
    {
      "epoch": 27.09,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 141084
    },
    {
      "epoch": 27.09,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 141096
    },
    {
      "epoch": 27.09,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 141108
    },
    {
      "epoch": 27.1,
      "learning_rate": 0.001,
      "loss": 2.5338,
      "step": 141120
    },
    {
      "epoch": 27.1,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 141132
    },
    {
      "epoch": 27.1,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 141144
    },
    {
      "epoch": 27.1,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 141156
    },
    {
      "epoch": 27.11,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 141168
    },
    {
      "epoch": 27.11,
      "learning_rate": 0.001,
      "loss": 2.5494,
      "step": 141180
    },
    {
      "epoch": 27.11,
      "learning_rate": 0.001,
      "loss": 2.5334,
      "step": 141192
    },
    {
      "epoch": 27.11,
      "learning_rate": 0.001,
      "loss": 2.5343,
      "step": 141204
    },
    {
      "epoch": 27.12,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 141216
    },
    {
      "epoch": 27.12,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 141228
    },
    {
      "epoch": 27.12,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 141240
    },
    {
      "epoch": 27.12,
      "eval_ag_news_accuracy": 0.3264375,
      "eval_ag_news_bleu_score": 4.9004347070764664,
      "eval_ag_news_bleu_score_sem": 0.14755183483400694,
      "eval_ag_news_emb_cos_sim": 0.8155641555786133,
      "eval_ag_news_emb_cos_sim_sem": 0.0073370346827982815,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5504798889160156,
      "eval_ag_news_n_ngrams_match_1": 14.286,
      "eval_ag_news_n_ngrams_match_2": 3.17,
      "eval_ag_news_n_ngrams_match_3": 0.932,
      "eval_ag_news_num_pred_words": 46.842,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.8300280220735,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3535168361662875,
      "eval_ag_news_runtime": 10.0954,
      "eval_ag_news_samples_per_second": 49.528,
      "eval_ag_news_steps_per_second": 0.099,
      "eval_ag_news_token_set_f1": 0.3573181011436177,
      "eval_ag_news_token_set_f1_sem": 0.004470164752863457,
      "eval_ag_news_token_set_precision": 0.3391237020509248,
      "eval_ag_news_token_set_recall": 0.3935938386315709,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 141250
    },
    {
      "epoch": 27.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.11478125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0150833923399283,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10955506768456669,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6785062551498413,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008186659172494727,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.249908685684204,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.178,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.892,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.276,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.78798499746996,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21101062662901304,
      "eval_anthropic_toxic_prompts_runtime": 9.9024,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.493,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3707925278993287,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006482853018165835,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4461795370414037,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.34683668237564347,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 141250
    },
    {
      "epoch": 27.12,
      "eval_arxiv_accuracy": 0.34790625,
      "eval_arxiv_bleu_score": 4.461709294954279,
      "eval_arxiv_bleu_score_sem": 0.1344275111919054,
      "eval_arxiv_emb_cos_sim": 0.7651487588882446,
      "eval_arxiv_emb_cos_sim_sem": 0.008997062068384737,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.398071765899658,
      "eval_arxiv_n_ngrams_match_1": 15.362,
      "eval_arxiv_n_ngrams_match_2": 3.046,
      "eval_arxiv_n_ngrams_match_3": 0.684,
      "eval_arxiv_num_pred_words": 41.014,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.90637791667463,
      "eval_arxiv_pred_num_tokens": 62.96875,
      "eval_arxiv_rouge_score": 0.3651099171339527,
      "eval_arxiv_runtime": 10.1248,
      "eval_arxiv_samples_per_second": 49.384,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.359213955285717,
      "eval_arxiv_token_set_f1_sem": 0.004244709962988462,
      "eval_arxiv_token_set_precision": 0.308466804437646,
      "eval_arxiv_token_set_recall": 0.44681578820475754,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 141250
    },
    {
      "epoch": 27.12,
      "eval_python_code_alpaca_accuracy": 0.16103125,
      "eval_python_code_alpaca_bleu_score": 4.446975211810466,
      "eval_python_code_alpaca_bleu_score_sem": 0.1370287258997162,
      "eval_python_code_alpaca_emb_cos_sim": 0.7647826671600342,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007976102078412903,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8925230503082275,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.074,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.006,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.998,
      "eval_python_code_alpaca_num_pred_words": 46.34,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.038764945833506,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32902236652320827,
      "eval_python_code_alpaca_runtime": 9.8522,
      "eval_python_code_alpaca_samples_per_second": 50.75,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.48857035639300544,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005201786981010137,
      "eval_python_code_alpaca_token_set_precision": 0.5519706306998449,
      "eval_python_code_alpaca_token_set_recall": 0.45941360942084103,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 141250
    },
    {
      "epoch": 27.12,
      "eval_wikibio_accuracy": 0.3249375,
      "eval_wikibio_bleu_score": 5.880058079662076,
      "eval_wikibio_bleu_score_sem": 0.21368764081563563,
      "eval_wikibio_emb_cos_sim": 0.7586035132408142,
      "eval_wikibio_emb_cos_sim_sem": 0.007264186489325233,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7385756969451904,
      "eval_wikibio_n_ngrams_match_1": 10.236,
      "eval_wikibio_n_ngrams_match_2": 3.372,
      "eval_wikibio_n_ngrams_match_3": 1.204,
      "eval_wikibio_num_pred_words": 37.052,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.03807254955418,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3581604096255913,
      "eval_wikibio_runtime": 9.8967,
      "eval_wikibio_samples_per_second": 50.522,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.32323460095912443,
      "eval_wikibio_token_set_f1_sem": 0.005248446062747598,
      "eval_wikibio_token_set_precision": 0.331230550101497,
      "eval_wikibio_token_set_recall": 0.3317681584772462,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 141250
    },
    {
      "epoch": 27.12,
      "eval_nq_accuracy": 0.5293125,
      "eval_nq_bleu_score": 11.888426097655694,
      "eval_nq_bleu_score_sem": 0.4823268204876117,
      "eval_nq_emb_cos_sim": 0.8352838158607483,
      "eval_nq_emb_cos_sim_sem": 0.006663987049116259,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1758615970611572,
      "eval_nq_n_ngrams_match_1": 23.2,
      "eval_nq_n_ngrams_match_2": 8.608,
      "eval_nq_n_ngrams_match_3": 3.964,
      "eval_nq_num_pred_words": 49.152,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.809772327108014,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44937964306180467,
      "eval_nq_runtime": 10.6536,
      "eval_nq_samples_per_second": 46.933,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.46292291547340003,
      "eval_nq_token_set_f1_sem": 0.004760199512869909,
      "eval_nq_token_set_precision": 0.41947397455665075,
      "eval_nq_token_set_recall": 0.5252281568202145,
      "eval_nq_true_num_tokens": 64.0,
      "step": 141250
    },
    {
      "epoch": 27.12,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 141252
    },
    {
      "epoch": 27.12,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 141264
    },
    {
      "epoch": 27.13,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 141276
    },
    {
      "epoch": 27.13,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 141288
    },
    {
      "epoch": 27.13,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 141300
    },
    {
      "epoch": 27.13,
      "learning_rate": 0.001,
      "loss": 2.5354,
      "step": 141312
    },
    {
      "epoch": 27.14,
      "learning_rate": 0.001,
      "loss": 2.5403,
      "step": 141324
    },
    {
      "epoch": 27.14,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 141336
    },
    {
      "epoch": 27.14,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 141348
    },
    {
      "epoch": 27.14,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 141360
    },
    {
      "epoch": 27.15,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 141372
    },
    {
      "epoch": 27.15,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 141384
    },
    {
      "epoch": 27.15,
      "learning_rate": 0.001,
      "loss": 2.5427,
      "step": 141396
    },
    {
      "epoch": 27.15,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 141408
    },
    {
      "epoch": 27.15,
      "learning_rate": 0.001,
      "loss": 2.5454,
      "step": 141420
    },
    {
      "epoch": 27.16,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 141432
    },
    {
      "epoch": 27.16,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 141444
    },
    {
      "epoch": 27.16,
      "learning_rate": 0.001,
      "loss": 2.5312,
      "step": 141456
    },
    {
      "epoch": 27.16,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 141468
    },
    {
      "epoch": 27.17,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 141480
    },
    {
      "epoch": 27.17,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 141492
    },
    {
      "epoch": 27.17,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 141504
    },
    {
      "epoch": 27.17,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 141516
    },
    {
      "epoch": 27.18,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 141528
    },
    {
      "epoch": 27.18,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 141540
    },
    {
      "epoch": 27.18,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 141552
    },
    {
      "epoch": 27.18,
      "learning_rate": 0.001,
      "loss": 2.5539,
      "step": 141564
    },
    {
      "epoch": 27.18,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 141576
    },
    {
      "epoch": 27.19,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 141588
    },
    {
      "epoch": 27.19,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 141600
    },
    {
      "epoch": 27.19,
      "learning_rate": 0.001,
      "loss": 2.5361,
      "step": 141612
    },
    {
      "epoch": 27.19,
      "learning_rate": 0.001,
      "loss": 2.5449,
      "step": 141624
    },
    {
      "epoch": 27.2,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 141636
    },
    {
      "epoch": 27.2,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 141648
    },
    {
      "epoch": 27.2,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 141660
    },
    {
      "epoch": 27.2,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 141672
    },
    {
      "epoch": 27.21,
      "learning_rate": 0.001,
      "loss": 2.5319,
      "step": 141684
    },
    {
      "epoch": 27.21,
      "learning_rate": 0.001,
      "loss": 2.5341,
      "step": 141696
    },
    {
      "epoch": 27.21,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 141708
    },
    {
      "epoch": 27.21,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 141720
    },
    {
      "epoch": 27.21,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 141732
    },
    {
      "epoch": 27.22,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 141744
    },
    {
      "epoch": 27.22,
      "learning_rate": 0.001,
      "loss": 2.5363,
      "step": 141756
    },
    {
      "epoch": 27.22,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 141768
    },
    {
      "epoch": 27.22,
      "learning_rate": 0.001,
      "loss": 2.5373,
      "step": 141780
    },
    {
      "epoch": 27.23,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 141792
    },
    {
      "epoch": 27.23,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 141804
    },
    {
      "epoch": 27.23,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 141816
    },
    {
      "epoch": 27.23,
      "learning_rate": 0.001,
      "loss": 2.5422,
      "step": 141828
    },
    {
      "epoch": 27.24,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 141840
    },
    {
      "epoch": 27.24,
      "learning_rate": 0.001,
      "loss": 2.545,
      "step": 141852
    },
    {
      "epoch": 27.24,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 141864
    },
    {
      "epoch": 27.24,
      "eval_ag_news_accuracy": 0.3230625,
      "eval_ag_news_bleu_score": 4.695236421046426,
      "eval_ag_news_bleu_score_sem": 0.14337753510925322,
      "eval_ag_news_emb_cos_sim": 0.8126242160797119,
      "eval_ag_news_emb_cos_sim_sem": 0.007243090016012007,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5421786308288574,
      "eval_ag_news_n_ngrams_match_1": 14.102,
      "eval_ag_news_n_ngrams_match_2": 3.044,
      "eval_ag_news_n_ngrams_match_3": 0.852,
      "eval_ag_news_num_pred_words": 46.33,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.54209174046591,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3511028068599481,
      "eval_ag_news_runtime": 10.5234,
      "eval_ag_news_samples_per_second": 47.513,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.35126550267062095,
      "eval_ag_news_token_set_f1_sem": 0.004349084425878091,
      "eval_ag_news_token_set_precision": 0.3368724276109881,
      "eval_ag_news_token_set_recall": 0.3818385208659725,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 141875
    },
    {
      "epoch": 27.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.1156875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1471792051203704,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11996522175800078,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6792980432510376,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009361244425288769,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2164149284362793,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.282,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.956,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.69,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.93855322728402,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21490718146133306,
      "eval_anthropic_toxic_prompts_runtime": 9.8643,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.688,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35637220708491674,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00661980135967938,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4446733590317007,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32503633152903283,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 141875
    },
    {
      "epoch": 27.24,
      "eval_arxiv_accuracy": 0.34909375,
      "eval_arxiv_bleu_score": 4.461642500518708,
      "eval_arxiv_bleu_score_sem": 0.13195831530701474,
      "eval_arxiv_emb_cos_sim": 0.7710940837860107,
      "eval_arxiv_emb_cos_sim_sem": 0.007245903976694617,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3970530033111572,
      "eval_arxiv_n_ngrams_match_1": 15.342,
      "eval_arxiv_n_ngrams_match_2": 2.986,
      "eval_arxiv_n_ngrams_match_3": 0.674,
      "eval_arxiv_num_pred_words": 40.674,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.875925932000698,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.366844827069081,
      "eval_arxiv_runtime": 10.5053,
      "eval_arxiv_samples_per_second": 47.595,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.35957249144156844,
      "eval_arxiv_token_set_f1_sem": 0.004022660989498987,
      "eval_arxiv_token_set_precision": 0.31175704851952657,
      "eval_arxiv_token_set_recall": 0.43951956309984946,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 141875
    },
    {
      "epoch": 27.24,
      "eval_python_code_alpaca_accuracy": 0.16125,
      "eval_python_code_alpaca_bleu_score": 4.800524728411466,
      "eval_python_code_alpaca_bleu_score_sem": 0.15556858641471405,
      "eval_python_code_alpaca_emb_cos_sim": 0.7585030794143677,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0076178196217558895,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.879707098007202,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.124,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.06,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.052,
      "eval_python_code_alpaca_num_pred_words": 44.164,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.80905610758034,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3399763958064745,
      "eval_python_code_alpaca_runtime": 9.9431,
      "eval_python_code_alpaca_samples_per_second": 50.286,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.48190383905050427,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0055441938121901495,
      "eval_python_code_alpaca_token_set_precision": 0.5517781748117877,
      "eval_python_code_alpaca_token_set_recall": 0.4495225154600577,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 141875
    },
    {
      "epoch": 27.24,
      "eval_wikibio_accuracy": 0.3265,
      "eval_wikibio_bleu_score": 5.8613962982352295,
      "eval_wikibio_bleu_score_sem": 0.20624154148742285,
      "eval_wikibio_emb_cos_sim": 0.7403441667556763,
      "eval_wikibio_emb_cos_sim_sem": 0.009352147291455495,
      "eval_wikibio_emb_top1_equal": 0.1015625,
      "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7235610485076904,
      "eval_wikibio_n_ngrams_match_1": 10.048,
      "eval_wikibio_n_ngrams_match_2": 3.336,
      "eval_wikibio_n_ngrams_match_3": 1.202,
      "eval_wikibio_num_pred_words": 35.714,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.411600566779896,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3533449039304873,
      "eval_wikibio_runtime": 9.8714,
      "eval_wikibio_samples_per_second": 50.652,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.31918143246822217,
      "eval_wikibio_token_set_f1_sem": 0.005600280279077625,
      "eval_wikibio_token_set_precision": 0.32590203181550037,
      "eval_wikibio_token_set_recall": 0.33026026306714884,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 141875
    },
    {
      "epoch": 27.24,
      "eval_nq_accuracy": 0.530625,
      "eval_nq_bleu_score": 11.923652479195313,
      "eval_nq_bleu_score_sem": 0.48769293250735835,
      "eval_nq_emb_cos_sim": 0.8363159894943237,
      "eval_nq_emb_cos_sim_sem": 0.006774035104756235,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.17130708694458,
      "eval_nq_n_ngrams_match_1": 23.072,
      "eval_nq_n_ngrams_match_2": 8.488,
      "eval_nq_n_ngrams_match_3": 3.974,
      "eval_nq_num_pred_words": 48.922,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.769739364388474,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4477909038090314,
      "eval_nq_runtime": 10.318,
      "eval_nq_samples_per_second": 48.459,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.46216718630629605,
      "eval_nq_token_set_f1_sem": 0.004894449845837533,
      "eval_nq_token_set_precision": 0.4203633866577919,
      "eval_nq_token_set_recall": 0.5202700453028507,
      "eval_nq_true_num_tokens": 64.0,
      "step": 141875
    },
    {
      "epoch": 27.24,
      "learning_rate": 0.001,
      "loss": 2.5375,
      "step": 141876
    },
    {
      "epoch": 27.24,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 141888
    },
    {
      "epoch": 27.25,
      "learning_rate": 0.001,
      "loss": 2.5383,
      "step": 141900
    },
    {
      "epoch": 27.25,
      "learning_rate": 0.001,
      "loss": 2.5336,
      "step": 141912
    },
    {
      "epoch": 27.25,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 141924
    },
    {
      "epoch": 27.25,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 141936
    },
    {
      "epoch": 27.26,
      "learning_rate": 0.001,
      "loss": 2.5395,
      "step": 141948
    },
    {
      "epoch": 27.26,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 141960
    },
    {
      "epoch": 27.26,
      "learning_rate": 0.001,
      "loss": 2.5461,
      "step": 141972
    },
    {
      "epoch": 27.26,
      "learning_rate": 0.001,
      "loss": 2.5383,
      "step": 141984
    },
    {
      "epoch": 27.26,
      "learning_rate": 0.001,
      "loss": 2.5307,
      "step": 141996
    },
    {
      "epoch": 27.27,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 142008
    },
    {
      "epoch": 27.27,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 142020
    },
    {
      "epoch": 27.27,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 142032
    },
    {
      "epoch": 27.27,
      "learning_rate": 0.001,
      "loss": 2.5357,
      "step": 142044
    },
    {
      "epoch": 27.28,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 142056
    },
    {
      "epoch": 27.28,
      "learning_rate": 0.001,
      "loss": 2.5383,
      "step": 142068
    },
    {
      "epoch": 27.28,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 142080
    },
    {
      "epoch": 27.28,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 142092
    },
    {
      "epoch": 27.29,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 142104
    },
    {
      "epoch": 27.29,
      "learning_rate": 0.001,
      "loss": 2.5433,
      "step": 142116
    },
    {
      "epoch": 27.29,
      "learning_rate": 0.001,
      "loss": 2.5452,
      "step": 142128
    },
    {
      "epoch": 27.29,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 142140
    },
    {
      "epoch": 27.29,
      "learning_rate": 0.001,
      "loss": 2.5477,
      "step": 142152
    },
    {
      "epoch": 27.3,
      "learning_rate": 0.001,
      "loss": 2.5381,
      "step": 142164
    },
    {
      "epoch": 27.3,
      "learning_rate": 0.001,
      "loss": 2.5334,
      "step": 142176
    },
    {
      "epoch": 27.3,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 142188
    },
    {
      "epoch": 27.3,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 142200
    },
    {
      "epoch": 27.31,
      "learning_rate": 0.001,
      "loss": 2.548,
      "step": 142212
    },
    {
      "epoch": 27.31,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 142224
    },
    {
      "epoch": 27.31,
      "learning_rate": 0.001,
      "loss": 2.5334,
      "step": 142236
    },
    {
      "epoch": 27.31,
      "learning_rate": 0.001,
      "loss": 2.5359,
      "step": 142248
    },
    {
      "epoch": 27.32,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 142260
    },
    {
      "epoch": 27.32,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 142272
    },
    {
      "epoch": 27.32,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 142284
    },
    {
      "epoch": 27.32,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 142296
    },
    {
      "epoch": 27.32,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 142308
    },
    {
      "epoch": 27.33,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 142320
    },
    {
      "epoch": 27.33,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 142332
    },
    {
      "epoch": 27.33,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 142344
    },
    {
      "epoch": 27.33,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 142356
    },
    {
      "epoch": 27.34,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 142368
    },
    {
      "epoch": 27.34,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 142380
    },
    {
      "epoch": 27.34,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 142392
    },
    {
      "epoch": 27.34,
      "learning_rate": 0.001,
      "loss": 2.5432,
      "step": 142404
    },
    {
      "epoch": 27.35,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 142416
    },
    {
      "epoch": 27.35,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 142428
    },
    {
      "epoch": 27.35,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 142440
    },
    {
      "epoch": 27.35,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 142452
    },
    {
      "epoch": 27.35,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 142464
    },
    {
      "epoch": 27.36,
      "learning_rate": 0.001,
      "loss": 2.5343,
      "step": 142476
    },
    {
      "epoch": 27.36,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 142488
    },
    {
      "epoch": 27.36,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 142500
    },
    {
      "epoch": 27.36,
      "eval_ag_news_accuracy": 0.3239375,
      "eval_ag_news_bleu_score": 4.830598132312196,
      "eval_ag_news_bleu_score_sem": 0.1573441424417308,
      "eval_ag_news_emb_cos_sim": 0.8136233687400818,
      "eval_ag_news_emb_cos_sim_sem": 0.006709981773187296,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5366628170013428,
      "eval_ag_news_n_ngrams_match_1": 14.158,
      "eval_ag_news_n_ngrams_match_2": 3.12,
      "eval_ag_news_n_ngrams_match_3": 0.92,
      "eval_ag_news_num_pred_words": 47.28,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.35208848622561,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.34865583719920046,
      "eval_ag_news_runtime": 10.5531,
      "eval_ag_news_samples_per_second": 47.379,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3506578007699237,
      "eval_ag_news_token_set_f1_sem": 0.00432896121733213,
      "eval_ag_news_token_set_precision": 0.33785937442762887,
      "eval_ag_news_token_set_recall": 0.3771866626859236,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 142500
    },
    {
      "epoch": 27.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.1156875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.018429416369582,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11312213146117575,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.677958607673645,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008252589355911416,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.21828031539917,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.144,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.838,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.396,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.985116695288514,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21155234687896496,
      "eval_anthropic_toxic_prompts_runtime": 10.4133,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.016,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3547790678122964,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00643582847758279,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43369618688291867,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3269438007734628,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 142500
    },
    {
      "epoch": 27.36,
      "eval_arxiv_accuracy": 0.34953125,
      "eval_arxiv_bleu_score": 4.32187604679201,
      "eval_arxiv_bleu_score_sem": 0.1283524302047555,
      "eval_arxiv_emb_cos_sim": 0.7498331665992737,
      "eval_arxiv_emb_cos_sim_sem": 0.009119076536279268,
      "eval_arxiv_emb_top1_equal": 0.21875,
      "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.39444637298584,
      "eval_arxiv_n_ngrams_match_1": 14.798,
      "eval_arxiv_n_ngrams_match_2": 2.97,
      "eval_arxiv_n_ngrams_match_3": 0.698,
      "eval_arxiv_num_pred_words": 39.894,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.79815184565188,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.35563091088651877,
      "eval_arxiv_runtime": 10.1858,
      "eval_arxiv_samples_per_second": 49.088,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.34819284414634094,
      "eval_arxiv_token_set_f1_sem": 0.004589054415412313,
      "eval_arxiv_token_set_precision": 0.2984303754212457,
      "eval_arxiv_token_set_recall": 0.4399873959763242,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 142500
    },
    {
      "epoch": 27.36,
      "eval_python_code_alpaca_accuracy": 0.160625,
      "eval_python_code_alpaca_bleu_score": 4.589698055543078,
      "eval_python_code_alpaca_bleu_score_sem": 0.14378166776701212,
      "eval_python_code_alpaca_emb_cos_sim": 0.7577734589576721,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00971669711619498,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.88792085647583,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.822,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.89,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.99,
      "eval_python_code_alpaca_num_pred_words": 43.624,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.955937792553886,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3282297908130535,
      "eval_python_code_alpaca_runtime": 10.3509,
      "eval_python_code_alpaca_samples_per_second": 48.305,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.47276119537101813,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005933106140040877,
      "eval_python_code_alpaca_token_set_precision": 0.5343343202297457,
      "eval_python_code_alpaca_token_set_recall": 0.44413219104114415,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 142500
    },
    {
      "epoch": 27.36,
      "eval_wikibio_accuracy": 0.32396875,
      "eval_wikibio_bleu_score": 6.13173766393367,
      "eval_wikibio_bleu_score_sem": 0.21518832836323656,
      "eval_wikibio_emb_cos_sim": 0.7459526062011719,
      "eval_wikibio_emb_cos_sim_sem": 0.008536965338071054,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.712768077850342,
      "eval_wikibio_n_ngrams_match_1": 10.32,
      "eval_wikibio_n_ngrams_match_2": 3.526,
      "eval_wikibio_n_ngrams_match_3": 1.28,
      "eval_wikibio_num_pred_words": 36.85,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.96704970459743,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36472456853502216,
      "eval_wikibio_runtime": 10.0988,
      "eval_wikibio_samples_per_second": 49.511,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3260278336768941,
      "eval_wikibio_token_set_f1_sem": 0.005138854716074928,
      "eval_wikibio_token_set_precision": 0.33401035182185435,
      "eval_wikibio_token_set_recall": 0.33341173815211955,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 142500
    },
    {
      "epoch": 27.36,
      "eval_nq_accuracy": 0.529,
      "eval_nq_bleu_score": 11.780356328999263,
      "eval_nq_bleu_score_sem": 0.47903628345571003,
      "eval_nq_emb_cos_sim": 0.8332801461219788,
      "eval_nq_emb_cos_sim_sem": 0.007031384420308262,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.173349618911743,
      "eval_nq_n_ngrams_match_1": 23.45,
      "eval_nq_n_ngrams_match_2": 8.572,
      "eval_nq_n_ngrams_match_3": 3.876,
      "eval_nq_num_pred_words": 49.566,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.787670143244592,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4521968124155398,
      "eval_nq_runtime": 10.4245,
      "eval_nq_samples_per_second": 47.964,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46548585375671925,
      "eval_nq_token_set_f1_sem": 0.0049039555675616095,
      "eval_nq_token_set_precision": 0.42552332036072815,
      "eval_nq_token_set_recall": 0.520016810258735,
      "eval_nq_true_num_tokens": 64.0,
      "step": 142500
    },
    {
      "epoch": 27.36,
      "learning_rate": 0.001,
      "loss": 2.5432,
      "step": 142512
    },
    {
      "epoch": 27.37,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 142524
    },
    {
      "epoch": 27.37,
      "learning_rate": 0.001,
      "loss": 2.5467,
      "step": 142536
    },
    {
      "epoch": 27.37,
      "learning_rate": 0.001,
      "loss": 2.5475,
      "step": 142548
    },
    {
      "epoch": 27.37,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 142560
    },
    {
      "epoch": 27.38,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 142572
    },
    {
      "epoch": 27.38,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 142584
    },
    {
      "epoch": 27.38,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 142596
    },
    {
      "epoch": 27.38,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 142608
    },
    {
      "epoch": 27.38,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 142620
    },
    {
      "epoch": 27.39,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 142632
    },
    {
      "epoch": 27.39,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 142644
    },
    {
      "epoch": 27.39,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 142656
    },
    {
      "epoch": 27.39,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 142668
    },
    {
      "epoch": 27.4,
      "learning_rate": 0.001,
      "loss": 2.5329,
      "step": 142680
    },
    {
      "epoch": 27.4,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 142692
    },
    {
      "epoch": 27.4,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 142704
    },
    {
      "epoch": 27.4,
      "learning_rate": 0.001,
      "loss": 2.5435,
      "step": 142716
    },
    {
      "epoch": 27.41,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 142728
    },
    {
      "epoch": 27.41,
      "learning_rate": 0.001,
      "loss": 2.5432,
      "step": 142740
    },
    {
      "epoch": 27.41,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 142752
    },
    {
      "epoch": 27.41,
      "learning_rate": 0.001,
      "loss": 2.5399,
      "step": 142764
    },
    {
      "epoch": 27.41,
      "learning_rate": 0.001,
      "loss": 2.5522,
      "step": 142776
    },
    {
      "epoch": 27.42,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 142788
    },
    {
      "epoch": 27.42,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 142800
    },
    {
      "epoch": 27.42,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 142812
    },
    {
      "epoch": 27.42,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 142824
    },
    {
      "epoch": 27.43,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 142836
    },
    {
      "epoch": 27.43,
      "learning_rate": 0.001,
      "loss": 2.5502,
      "step": 142848
    },
    {
      "epoch": 27.43,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 142860
    },
    {
      "epoch": 27.43,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 142872
    },
    {
      "epoch": 27.44,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 142884
    },
    {
      "epoch": 27.44,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 142896
    },
    {
      "epoch": 27.44,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 142908
    },
    {
      "epoch": 27.44,
      "learning_rate": 0.001,
      "loss": 2.5493,
      "step": 142920
    },
    {
      "epoch": 27.44,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 142932
    },
    {
      "epoch": 27.45,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 142944
    },
    {
      "epoch": 27.45,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 142956
    },
    {
      "epoch": 27.45,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 142968
    },
    {
      "epoch": 27.45,
      "learning_rate": 0.001,
      "loss": 2.5474,
      "step": 142980
    },
    {
      "epoch": 27.46,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 142992
    },
    {
      "epoch": 27.46,
      "learning_rate": 0.001,
      "loss": 2.5343,
      "step": 143004
    },
    {
      "epoch": 27.46,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 143016
    },
    {
      "epoch": 27.46,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 143028
    },
    {
      "epoch": 27.47,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 143040
    },
    {
      "epoch": 27.47,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 143052
    },
    {
      "epoch": 27.47,
      "learning_rate": 0.001,
      "loss": 2.5492,
      "step": 143064
    },
    {
      "epoch": 27.47,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 143076
    },
    {
      "epoch": 27.47,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 143088
    },
    {
      "epoch": 27.48,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 143100
    },
    {
      "epoch": 27.48,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 143112
    },
    {
      "epoch": 27.48,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 143124
    },
    {
      "epoch": 27.48,
      "eval_ag_news_accuracy": 0.324125,
      "eval_ag_news_bleu_score": 5.039687093948509,
      "eval_ag_news_bleu_score_sem": 0.1665901840628385,
      "eval_ag_news_emb_cos_sim": 0.8166662454605103,
      "eval_ag_news_emb_cos_sim_sem": 0.0068301160965115355,
      "eval_ag_news_emb_top1_equal": 0.1953125,
      "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.525768518447876,
      "eval_ag_news_n_ngrams_match_1": 14.082,
      "eval_ag_news_n_ngrams_match_2": 3.12,
      "eval_ag_news_n_ngrams_match_3": 0.928,
      "eval_ag_news_num_pred_words": 46.436,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.9798777471149,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3510902997573453,
      "eval_ag_news_runtime": 10.6257,
      "eval_ag_news_samples_per_second": 47.056,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.35099022534293467,
      "eval_ag_news_token_set_f1_sem": 0.0044793457169557665,
      "eval_ag_news_token_set_precision": 0.33560578149662795,
      "eval_ag_news_token_set_recall": 0.3897147862700805,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 143125
    },
    {
      "epoch": 27.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.115375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.236108825922928,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11807013059932496,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6814109086990356,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008242399038194893,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2264301776885986,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.006,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.144,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.189573973296206,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2188053723440092,
      "eval_anthropic_toxic_prompts_runtime": 10.2191,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.928,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36225645245563226,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006332418695622373,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44929653816519577,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3296031274263888,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 143125
    },
    {
      "epoch": 27.48,
      "eval_arxiv_accuracy": 0.351125,
      "eval_arxiv_bleu_score": 4.4092280661335534,
      "eval_arxiv_bleu_score_sem": 0.12279747690110586,
      "eval_arxiv_emb_cos_sim": 0.7718652486801147,
      "eval_arxiv_emb_cos_sim_sem": 0.006698258363856188,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3722763061523438,
      "eval_arxiv_n_ngrams_match_1": 15.386,
      "eval_arxiv_n_ngrams_match_2": 3.036,
      "eval_arxiv_n_ngrams_match_3": 0.666,
      "eval_arxiv_num_pred_words": 40.212,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.144794081242484,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36884531866145703,
      "eval_arxiv_runtime": 10.7393,
      "eval_arxiv_samples_per_second": 46.558,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.36054283884661736,
      "eval_arxiv_token_set_f1_sem": 0.004120284513604176,
      "eval_arxiv_token_set_precision": 0.3101980172639564,
      "eval_arxiv_token_set_recall": 0.4501447148878183,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 143125
    },
    {
      "epoch": 27.48,
      "eval_python_code_alpaca_accuracy": 0.1613125,
      "eval_python_code_alpaca_bleu_score": 4.506491867995508,
      "eval_python_code_alpaca_bleu_score_sem": 0.15751610811373762,
      "eval_python_code_alpaca_emb_cos_sim": 0.764640748500824,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008484346973338728,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8803963661193848,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.812,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.838,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.882,
      "eval_python_code_alpaca_num_pred_words": 43.264,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.821335553492464,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33476666746035166,
      "eval_python_code_alpaca_runtime": 10.3,
      "eval_python_code_alpaca_samples_per_second": 48.544,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.4777867146158203,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005835641091555561,
      "eval_python_code_alpaca_token_set_precision": 0.5386932988730029,
      "eval_python_code_alpaca_token_set_recall": 0.45061103365992106,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 143125
    },
    {
      "epoch": 27.48,
      "eval_wikibio_accuracy": 0.325125,
      "eval_wikibio_bleu_score": 5.578659342138037,
      "eval_wikibio_bleu_score_sem": 0.20152464322979105,
      "eval_wikibio_emb_cos_sim": 0.738686203956604,
      "eval_wikibio_emb_cos_sim_sem": 0.009736943408548401,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.676901340484619,
      "eval_wikibio_n_ngrams_match_1": 9.53,
      "eval_wikibio_n_ngrams_match_2": 3.15,
      "eval_wikibio_n_ngrams_match_3": 1.124,
      "eval_wikibio_num_pred_words": 35.248,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.523733536094035,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3384667298775853,
      "eval_wikibio_runtime": 10.3067,
      "eval_wikibio_samples_per_second": 48.512,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.30206023185653375,
      "eval_wikibio_token_set_f1_sem": 0.005679007063202068,
      "eval_wikibio_token_set_precision": 0.3088813719711778,
      "eval_wikibio_token_set_recall": 0.31404185933962747,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 143125
    },
    {
      "epoch": 27.48,
      "eval_nq_accuracy": 0.52903125,
      "eval_nq_bleu_score": 11.79704300367307,
      "eval_nq_bleu_score_sem": 0.48651102349765474,
      "eval_nq_emb_cos_sim": 0.827208936214447,
      "eval_nq_emb_cos_sim_sem": 0.00765775441926447,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.168199300765991,
      "eval_nq_n_ngrams_match_1": 23.006,
      "eval_nq_n_ngrams_match_2": 8.494,
      "eval_nq_n_ngrams_match_3": 3.974,
      "eval_nq_num_pred_words": 49.126,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.742527196303948,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4446213040074076,
      "eval_nq_runtime": 11.7566,
      "eval_nq_samples_per_second": 42.529,
      "eval_nq_steps_per_second": 0.085,
      "eval_nq_token_set_f1": 0.45899188065685237,
      "eval_nq_token_set_f1_sem": 0.004989788793268598,
      "eval_nq_token_set_precision": 0.41753366194454644,
      "eval_nq_token_set_recall": 0.5174536315242882,
      "eval_nq_true_num_tokens": 64.0,
      "step": 143125
    },
    {
      "epoch": 27.48,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 143136
    },
    {
      "epoch": 27.49,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 143148
    },
    {
      "epoch": 27.49,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 143160
    },
    {
      "epoch": 27.49,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 143172
    },
    {
      "epoch": 27.49,
      "learning_rate": 0.001,
      "loss": 2.5509,
      "step": 143184
    },
    {
      "epoch": 27.5,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 143196
    },
    {
      "epoch": 27.5,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 143208
    },
    {
      "epoch": 27.5,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 143220
    },
    {
      "epoch": 27.5,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 143232
    },
    {
      "epoch": 27.5,
      "learning_rate": 0.001,
      "loss": 2.5349,
      "step": 143244
    },
    {
      "epoch": 27.51,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 143256
    },
    {
      "epoch": 27.51,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 143268
    },
    {
      "epoch": 27.51,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 143280
    },
    {
      "epoch": 27.51,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 143292
    },
    {
      "epoch": 27.52,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 143304
    },
    {
      "epoch": 27.52,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 143316
    },
    {
      "epoch": 27.52,
      "learning_rate": 0.001,
      "loss": 2.5332,
      "step": 143328
    },
    {
      "epoch": 27.52,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 143340
    },
    {
      "epoch": 27.53,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 143352
    },
    {
      "epoch": 27.53,
      "learning_rate": 0.001,
      "loss": 2.539,
      "step": 143364
    },
    {
      "epoch": 27.53,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 143376
    },
    {
      "epoch": 27.53,
      "learning_rate": 0.001,
      "loss": 2.5432,
      "step": 143388
    },
    {
      "epoch": 27.53,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 143400
    },
    {
      "epoch": 27.54,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 143412
    },
    {
      "epoch": 27.54,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 143424
    },
    {
      "epoch": 27.54,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 143436
    },
    {
      "epoch": 27.54,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 143448
    },
    {
      "epoch": 27.55,
      "learning_rate": 0.001,
      "loss": 2.5501,
      "step": 143460
    },
    {
      "epoch": 27.55,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 143472
    },
    {
      "epoch": 27.55,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 143484
    },
    {
      "epoch": 27.55,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 143496
    },
    {
      "epoch": 27.56,
      "learning_rate": 0.001,
      "loss": 2.5563,
      "step": 143508
    },
    {
      "epoch": 27.56,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 143520
    },
    {
      "epoch": 27.56,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 143532
    },
    {
      "epoch": 27.56,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 143544
    },
    {
      "epoch": 27.56,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 143556
    },
    {
      "epoch": 27.57,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 143568
    },
    {
      "epoch": 27.57,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 143580
    },
    {
      "epoch": 27.57,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 143592
    },
    {
      "epoch": 27.57,
      "learning_rate": 0.001,
      "loss": 2.5399,
      "step": 143604
    },
    {
      "epoch": 27.58,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 143616
    },
    {
      "epoch": 27.58,
      "learning_rate": 0.001,
      "loss": 2.5405,
      "step": 143628
    },
    {
      "epoch": 27.58,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 143640
    },
    {
      "epoch": 27.58,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 143652
    },
    {
      "epoch": 27.59,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 143664
    },
    {
      "epoch": 27.59,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 143676
    },
    {
      "epoch": 27.59,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 143688
    },
    {
      "epoch": 27.59,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 143700
    },
    {
      "epoch": 27.59,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 143712
    },
    {
      "epoch": 27.6,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 143724
    },
    {
      "epoch": 27.6,
      "learning_rate": 0.001,
      "loss": 2.539,
      "step": 143736
    },
    {
      "epoch": 27.6,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 143748
    },
    {
      "epoch": 27.6,
      "eval_ag_news_accuracy": 0.325125,
      "eval_ag_news_bleu_score": 4.9060576397579005,
      "eval_ag_news_bleu_score_sem": 0.15229328835492612,
      "eval_ag_news_emb_cos_sim": 0.8132540583610535,
      "eval_ag_news_emb_cos_sim_sem": 0.008341442815106476,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5347824096679688,
      "eval_ag_news_n_ngrams_match_1": 14.248,
      "eval_ag_news_n_ngrams_match_2": 3.162,
      "eval_ag_news_n_ngrams_match_3": 0.884,
      "eval_ag_news_num_pred_words": 46.202,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.287553262389494,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3531805767359434,
      "eval_ag_news_runtime": 10.3162,
      "eval_ag_news_samples_per_second": 48.467,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3534782397664274,
      "eval_ag_news_token_set_f1_sem": 0.004460881976462208,
      "eval_ag_news_token_set_precision": 0.34088337323922935,
      "eval_ag_news_token_set_recall": 0.38200145698709126,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 143750
    },
    {
      "epoch": 27.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.11678125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1628702078780115,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12169665074305872,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6807167530059814,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00788797433526564,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2000010013580322,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.924,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.704,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.57,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.532554762967813,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2158291149747981,
      "eval_anthropic_toxic_prompts_runtime": 9.9051,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.479,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35263688369545504,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006466724667783063,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43821858309757283,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3228368894232848,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 143750
    },
    {
      "epoch": 27.6,
      "eval_arxiv_accuracy": 0.34753125,
      "eval_arxiv_bleu_score": 4.35764468534656,
      "eval_arxiv_bleu_score_sem": 0.12481431895670242,
      "eval_arxiv_emb_cos_sim": 0.7613619565963745,
      "eval_arxiv_emb_cos_sim_sem": 0.00844892583988786,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3958537578582764,
      "eval_arxiv_n_ngrams_match_1": 15.274,
      "eval_arxiv_n_ngrams_match_2": 3.006,
      "eval_arxiv_n_ngrams_match_3": 0.658,
      "eval_arxiv_num_pred_words": 40.622,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.840118838714492,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36452885880810904,
      "eval_arxiv_runtime": 10.4698,
      "eval_arxiv_samples_per_second": 47.756,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.35962154370328314,
      "eval_arxiv_token_set_f1_sem": 0.0041119309525474105,
      "eval_arxiv_token_set_precision": 0.3111581583150548,
      "eval_arxiv_token_set_recall": 0.44568854702110317,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 143750
    },
    {
      "epoch": 27.6,
      "eval_python_code_alpaca_accuracy": 0.1625,
      "eval_python_code_alpaca_bleu_score": 4.567988418321479,
      "eval_python_code_alpaca_bleu_score_sem": 0.13992635271135734,
      "eval_python_code_alpaca_emb_cos_sim": 0.7611854672431946,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007823779416627485,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.891040563583374,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.022,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.864,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.938,
      "eval_python_code_alpaca_num_pred_words": 43.834,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.012042528976817,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3414438237031079,
      "eval_python_code_alpaca_runtime": 10.2062,
      "eval_python_code_alpaca_samples_per_second": 48.99,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4802944043537252,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005362698658037156,
      "eval_python_code_alpaca_token_set_precision": 0.5526999807207397,
      "eval_python_code_alpaca_token_set_recall": 0.44429027037711927,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 143750
    },
    {
      "epoch": 27.6,
      "eval_wikibio_accuracy": 0.32371875,
      "eval_wikibio_bleu_score": 6.038606194427294,
      "eval_wikibio_bleu_score_sem": 0.20993551884146786,
      "eval_wikibio_emb_cos_sim": 0.7467552423477173,
      "eval_wikibio_emb_cos_sim_sem": 0.009053170060464865,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7513339519500732,
      "eval_wikibio_n_ngrams_match_1": 10.056,
      "eval_wikibio_n_ngrams_match_2": 3.352,
      "eval_wikibio_n_ngrams_match_3": 1.204,
      "eval_wikibio_num_pred_words": 35.754,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.57784092874125,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35741638556115785,
      "eval_wikibio_runtime": 11.0308,
      "eval_wikibio_samples_per_second": 45.328,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.32068604088086433,
      "eval_wikibio_token_set_f1_sem": 0.005247016802490674,
      "eval_wikibio_token_set_precision": 0.3285627299052317,
      "eval_wikibio_token_set_recall": 0.3292274279664372,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 143750
    },
    {
      "epoch": 27.6,
      "eval_nq_accuracy": 0.5283125,
      "eval_nq_bleu_score": 11.54522568293295,
      "eval_nq_bleu_score_sem": 0.4623561804624445,
      "eval_nq_emb_cos_sim": 0.8352663516998291,
      "eval_nq_emb_cos_sim_sem": 0.007021329104215874,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1706504821777344,
      "eval_nq_n_ngrams_match_1": 22.98,
      "eval_nq_n_ngrams_match_2": 8.36,
      "eval_nq_n_ngrams_match_3": 3.832,
      "eval_nq_num_pred_words": 48.572,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.763983001752207,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4483094144303781,
      "eval_nq_runtime": 10.2471,
      "eval_nq_samples_per_second": 48.794,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.4625401152356082,
      "eval_nq_token_set_f1_sem": 0.0049427924543661165,
      "eval_nq_token_set_precision": 0.42163948046897626,
      "eval_nq_token_set_recall": 0.5201009496547064,
      "eval_nq_true_num_tokens": 64.0,
      "step": 143750
    },
    {
      "epoch": 27.6,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 143760
    },
    {
      "epoch": 27.61,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 143772
    },
    {
      "epoch": 27.61,
      "learning_rate": 0.001,
      "loss": 2.5501,
      "step": 143784
    },
    {
      "epoch": 27.61,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 143796
    },
    {
      "epoch": 27.61,
      "learning_rate": 0.001,
      "loss": 2.5464,
      "step": 143808
    },
    {
      "epoch": 27.62,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 143820
    },
    {
      "epoch": 27.62,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 143832
    },
    {
      "epoch": 27.62,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 143844
    },
    {
      "epoch": 27.62,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 143856
    },
    {
      "epoch": 27.62,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 143868
    },
    {
      "epoch": 27.63,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 143880
    },
    {
      "epoch": 27.63,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 143892
    },
    {
      "epoch": 27.63,
      "learning_rate": 0.001,
      "loss": 2.5568,
      "step": 143904
    },
    {
      "epoch": 27.63,
      "learning_rate": 0.001,
      "loss": 2.5526,
      "step": 143916
    },
    {
      "epoch": 27.64,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 143928
    },
    {
      "epoch": 27.64,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 143940
    },
    {
      "epoch": 27.64,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 143952
    },
    {
      "epoch": 27.64,
      "learning_rate": 0.001,
      "loss": 2.5537,
      "step": 143964
    },
    {
      "epoch": 27.65,
      "learning_rate": 0.001,
      "loss": 2.5499,
      "step": 143976
    },
    {
      "epoch": 27.65,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 143988
    },
    {
      "epoch": 27.65,
      "learning_rate": 0.001,
      "loss": 2.5448,
      "step": 144000
    },
    {
      "epoch": 27.65,
      "learning_rate": 0.001,
      "loss": 2.5532,
      "step": 144012
    },
    {
      "epoch": 27.65,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 144024
    },
    {
      "epoch": 27.66,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 144036
    },
    {
      "epoch": 27.66,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 144048
    },
    {
      "epoch": 27.66,
      "learning_rate": 0.001,
      "loss": 2.557,
      "step": 144060
    },
    {
      "epoch": 27.66,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 144072
    },
    {
      "epoch": 27.67,
      "learning_rate": 0.001,
      "loss": 2.5435,
      "step": 144084
    },
    {
      "epoch": 27.67,
      "learning_rate": 0.001,
      "loss": 2.5445,
      "step": 144096
    },
    {
      "epoch": 27.67,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 144108
    },
    {
      "epoch": 27.67,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 144120
    },
    {
      "epoch": 27.68,
      "learning_rate": 0.001,
      "loss": 2.5433,
      "step": 144132
    },
    {
      "epoch": 27.68,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 144144
    },
    {
      "epoch": 27.68,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 144156
    },
    {
      "epoch": 27.68,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 144168
    },
    {
      "epoch": 27.68,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 144180
    },
    {
      "epoch": 27.69,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 144192
    },
    {
      "epoch": 27.69,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 144204
    },
    {
      "epoch": 27.69,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 144216
    },
    {
      "epoch": 27.69,
      "learning_rate": 0.001,
      "loss": 2.5407,
      "step": 144228
    },
    {
      "epoch": 27.7,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 144240
    },
    {
      "epoch": 27.7,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 144252
    },
    {
      "epoch": 27.7,
      "learning_rate": 0.001,
      "loss": 2.5453,
      "step": 144264
    },
    {
      "epoch": 27.7,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 144276
    },
    {
      "epoch": 27.71,
      "learning_rate": 0.001,
      "loss": 2.5502,
      "step": 144288
    },
    {
      "epoch": 27.71,
      "learning_rate": 0.001,
      "loss": 2.5403,
      "step": 144300
    },
    {
      "epoch": 27.71,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 144312
    },
    {
      "epoch": 27.71,
      "learning_rate": 0.001,
      "loss": 2.5395,
      "step": 144324
    },
    {
      "epoch": 27.71,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 144336
    },
    {
      "epoch": 27.72,
      "learning_rate": 0.001,
      "loss": 2.5477,
      "step": 144348
    },
    {
      "epoch": 27.72,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 144360
    },
    {
      "epoch": 27.72,
      "learning_rate": 0.001,
      "loss": 2.5524,
      "step": 144372
    },
    {
      "epoch": 27.72,
      "eval_ag_news_accuracy": 0.3240625,
      "eval_ag_news_bleu_score": 4.970414400051082,
      "eval_ag_news_bleu_score_sem": 0.15894019644248084,
      "eval_ag_news_emb_cos_sim": 0.8165663480758667,
      "eval_ag_news_emb_cos_sim_sem": 0.006890783192220428,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5300419330596924,
      "eval_ag_news_n_ngrams_match_1": 14.202,
      "eval_ag_news_n_ngrams_match_2": 3.2,
      "eval_ag_news_n_ngrams_match_3": 0.94,
      "eval_ag_news_num_pred_words": 46.626,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.12539856712719,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3527152335552278,
      "eval_ag_news_runtime": 11.2613,
      "eval_ag_news_samples_per_second": 44.4,
      "eval_ag_news_steps_per_second": 0.089,
      "eval_ag_news_token_set_f1": 0.3547670915677933,
      "eval_ag_news_token_set_f1_sem": 0.004412122266916088,
      "eval_ag_news_token_set_precision": 0.3410934263404353,
      "eval_ag_news_token_set_recall": 0.38717581638007587,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 144375
    },
    {
      "epoch": 27.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.11428125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1798131297428873,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12461979207105056,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6775497198104858,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009008732135610698,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.219355583190918,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.148,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.942,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.154,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.011996835628693,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21389804969470883,
      "eval_anthropic_toxic_prompts_runtime": 9.8543,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.739,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35475397955300003,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006658995972486581,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43114369295925986,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3278570045263657,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 144375
    },
    {
      "epoch": 27.72,
      "eval_arxiv_accuracy": 0.34809375,
      "eval_arxiv_bleu_score": 4.291331107247671,
      "eval_arxiv_bleu_score_sem": 0.12796598973892936,
      "eval_arxiv_emb_cos_sim": 0.7707942128181458,
      "eval_arxiv_emb_cos_sim_sem": 0.007130301098129807,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.397416830062866,
      "eval_arxiv_n_ngrams_match_1": 15.254,
      "eval_arxiv_n_ngrams_match_2": 2.892,
      "eval_arxiv_n_ngrams_match_3": 0.628,
      "eval_arxiv_num_pred_words": 41.08,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.886797570663404,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36530406305732177,
      "eval_arxiv_runtime": 11.3173,
      "eval_arxiv_samples_per_second": 44.18,
      "eval_arxiv_steps_per_second": 0.088,
      "eval_arxiv_token_set_f1": 0.35487308078448215,
      "eval_arxiv_token_set_f1_sem": 0.004195730407037573,
      "eval_arxiv_token_set_precision": 0.3079749247150917,
      "eval_arxiv_token_set_recall": 0.43428864442882037,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 144375
    },
    {
      "epoch": 27.72,
      "eval_python_code_alpaca_accuracy": 0.1631875,
      "eval_python_code_alpaca_bleu_score": 4.609435844335552,
      "eval_python_code_alpaca_bleu_score_sem": 0.1432049999141692,
      "eval_python_code_alpaca_emb_cos_sim": 0.7601396441459656,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008817652153896974,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8603568077087402,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.976,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.898,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.008,
      "eval_python_code_alpaca_num_pred_words": 44.25,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.46775845565825,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33343501326855096,
      "eval_python_code_alpaca_runtime": 10.0348,
      "eval_python_code_alpaca_samples_per_second": 49.827,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.478446577186757,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005448696921159396,
      "eval_python_code_alpaca_token_set_precision": 0.5468609620776019,
      "eval_python_code_alpaca_token_set_recall": 0.44585232783812784,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 144375
    },
    {
      "epoch": 27.72,
      "eval_wikibio_accuracy": 0.324375,
      "eval_wikibio_bleu_score": 6.127609120138676,
      "eval_wikibio_bleu_score_sem": 0.2135397458310362,
      "eval_wikibio_emb_cos_sim": 0.7458769679069519,
      "eval_wikibio_emb_cos_sim_sem": 0.0093512154509767,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7127797603607178,
      "eval_wikibio_n_ngrams_match_1": 10.226,
      "eval_wikibio_n_ngrams_match_2": 3.474,
      "eval_wikibio_n_ngrams_match_3": 1.292,
      "eval_wikibio_num_pred_words": 36.576,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.9675283053763,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3624596865412507,
      "eval_wikibio_runtime": 10.1097,
      "eval_wikibio_samples_per_second": 49.457,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3232188252718927,
      "eval_wikibio_token_set_f1_sem": 0.005172646829244083,
      "eval_wikibio_token_set_precision": 0.33374889848493866,
      "eval_wikibio_token_set_recall": 0.3280943778877188,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 144375
    },
    {
      "epoch": 27.72,
      "eval_nq_accuracy": 0.53040625,
      "eval_nq_bleu_score": 11.528264571578472,
      "eval_nq_bleu_score_sem": 0.4655984014245001,
      "eval_nq_emb_cos_sim": 0.8405819535255432,
      "eval_nq_emb_cos_sim_sem": 0.006863966601310863,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.166982412338257,
      "eval_nq_n_ngrams_match_1": 23.074,
      "eval_nq_n_ngrams_match_2": 8.422,
      "eval_nq_n_ngrams_match_3": 3.838,
      "eval_nq_num_pred_words": 48.786,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.73189498654815,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44792931955147647,
      "eval_nq_runtime": 10.5458,
      "eval_nq_samples_per_second": 47.412,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4588423175001224,
      "eval_nq_token_set_f1_sem": 0.004830897197932589,
      "eval_nq_token_set_precision": 0.4166175122565744,
      "eval_nq_token_set_recall": 0.5182190175123669,
      "eval_nq_true_num_tokens": 64.0,
      "step": 144375
    },
    {
      "epoch": 27.72,
      "learning_rate": 0.001,
      "loss": 2.5558,
      "step": 144384
    },
    {
      "epoch": 27.73,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 144396
    },
    {
      "epoch": 27.73,
      "learning_rate": 0.001,
      "loss": 2.5448,
      "step": 144408
    },
    {
      "epoch": 27.73,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 144420
    },
    {
      "epoch": 27.73,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 144432
    },
    {
      "epoch": 27.74,
      "learning_rate": 0.001,
      "loss": 2.548,
      "step": 144444
    },
    {
      "epoch": 27.74,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 144456
    },
    {
      "epoch": 27.74,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 144468
    },
    {
      "epoch": 27.74,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 144480
    },
    {
      "epoch": 27.74,
      "learning_rate": 0.001,
      "loss": 2.5628,
      "step": 144492
    },
    {
      "epoch": 27.75,
      "learning_rate": 0.001,
      "loss": 2.5385,
      "step": 144504
    },
    {
      "epoch": 27.75,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 144516
    },
    {
      "epoch": 27.75,
      "learning_rate": 0.001,
      "loss": 2.5527,
      "step": 144528
    },
    {
      "epoch": 27.75,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 144540
    },
    {
      "epoch": 27.76,
      "learning_rate": 0.001,
      "loss": 2.5433,
      "step": 144552
    },
    {
      "epoch": 27.76,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 144564
    },
    {
      "epoch": 27.76,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 144576
    },
    {
      "epoch": 27.76,
      "learning_rate": 0.001,
      "loss": 2.5568,
      "step": 144588
    },
    {
      "epoch": 27.76,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 144600
    },
    {
      "epoch": 27.77,
      "learning_rate": 0.001,
      "loss": 2.5632,
      "step": 144612
    },
    {
      "epoch": 27.77,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 144624
    },
    {
      "epoch": 27.77,
      "learning_rate": 0.001,
      "loss": 2.5569,
      "step": 144636
    },
    {
      "epoch": 27.77,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 144648
    },
    {
      "epoch": 27.78,
      "learning_rate": 0.001,
      "loss": 2.5563,
      "step": 144660
    },
    {
      "epoch": 27.78,
      "learning_rate": 0.001,
      "loss": 2.5506,
      "step": 144672
    },
    {
      "epoch": 27.78,
      "learning_rate": 0.001,
      "loss": 2.5571,
      "step": 144684
    },
    {
      "epoch": 27.78,
      "learning_rate": 0.001,
      "loss": 2.5578,
      "step": 144696
    },
    {
      "epoch": 27.79,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 144708
    },
    {
      "epoch": 27.79,
      "learning_rate": 0.001,
      "loss": 2.5524,
      "step": 144720
    },
    {
      "epoch": 27.79,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 144732
    },
    {
      "epoch": 27.79,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 144744
    },
    {
      "epoch": 27.79,
      "learning_rate": 0.001,
      "loss": 2.5484,
      "step": 144756
    },
    {
      "epoch": 27.8,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 144768
    },
    {
      "epoch": 27.8,
      "learning_rate": 0.001,
      "loss": 2.5529,
      "step": 144780
    },
    {
      "epoch": 27.8,
      "learning_rate": 0.001,
      "loss": 2.5551,
      "step": 144792
    },
    {
      "epoch": 27.8,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 144804
    },
    {
      "epoch": 27.81,
      "learning_rate": 0.001,
      "loss": 2.5544,
      "step": 144816
    },
    {
      "epoch": 27.81,
      "learning_rate": 0.001,
      "loss": 2.5495,
      "step": 144828
    },
    {
      "epoch": 27.81,
      "learning_rate": 0.001,
      "loss": 2.5511,
      "step": 144840
    },
    {
      "epoch": 27.81,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 144852
    },
    {
      "epoch": 27.82,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 144864
    },
    {
      "epoch": 27.82,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 144876
    },
    {
      "epoch": 27.82,
      "learning_rate": 0.001,
      "loss": 2.5635,
      "step": 144888
    },
    {
      "epoch": 27.82,
      "learning_rate": 0.001,
      "loss": 2.5531,
      "step": 144900
    },
    {
      "epoch": 27.82,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 144912
    },
    {
      "epoch": 27.83,
      "learning_rate": 0.001,
      "loss": 2.5564,
      "step": 144924
    },
    {
      "epoch": 27.83,
      "learning_rate": 0.001,
      "loss": 2.5549,
      "step": 144936
    },
    {
      "epoch": 27.83,
      "learning_rate": 0.001,
      "loss": 2.5576,
      "step": 144948
    },
    {
      "epoch": 27.83,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 144960
    },
    {
      "epoch": 27.84,
      "learning_rate": 0.001,
      "loss": 2.5504,
      "step": 144972
    },
    {
      "epoch": 27.84,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 144984
    },
    {
      "epoch": 27.84,
      "learning_rate": 0.001,
      "loss": 2.5603,
      "step": 144996
    },
    {
      "epoch": 27.84,
      "eval_ag_news_accuracy": 0.32534375,
      "eval_ag_news_bleu_score": 4.8675052779507695,
      "eval_ag_news_bleu_score_sem": 0.15423374050962127,
      "eval_ag_news_emb_cos_sim": 0.8129054307937622,
      "eval_ag_news_emb_cos_sim_sem": 0.0077852795876018,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5247530937194824,
      "eval_ag_news_n_ngrams_match_1": 14.156,
      "eval_ag_news_n_ngrams_match_2": 3.094,
      "eval_ag_news_n_ngrams_match_3": 0.882,
      "eval_ag_news_num_pred_words": 46.428,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.94539125116629,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35092431823047665,
      "eval_ag_news_runtime": 9.9119,
      "eval_ag_news_samples_per_second": 50.445,
      "eval_ag_news_steps_per_second": 0.101,
      "eval_ag_news_token_set_f1": 0.35503893435791334,
      "eval_ag_news_token_set_f1_sem": 0.004356091461463944,
      "eval_ag_news_token_set_precision": 0.3387076756136274,
      "eval_ag_news_token_set_recall": 0.39054112356255016,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 145000
    },
    {
      "epoch": 27.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.11528125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.035062237310198,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11474366489905395,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6745525598526001,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008941784500929176,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2177090644836426,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.108,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.866,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.696,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.24,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.970848000392305,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21239538917101328,
      "eval_anthropic_toxic_prompts_runtime": 9.97,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.151,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3601126237014914,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006721048003872662,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4323960264713501,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3362558132085867,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 145000
    },
    {
      "epoch": 27.84,
      "eval_arxiv_accuracy": 0.35090625,
      "eval_arxiv_bleu_score": 4.385563568867093,
      "eval_arxiv_bleu_score_sem": 0.12942509155446114,
      "eval_arxiv_emb_cos_sim": 0.7661959528923035,
      "eval_arxiv_emb_cos_sim_sem": 0.007445631964887172,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.37373948097229,
      "eval_arxiv_n_ngrams_match_1": 15.196,
      "eval_arxiv_n_ngrams_match_2": 2.962,
      "eval_arxiv_n_ngrams_match_3": 0.664,
      "eval_arxiv_num_pred_words": 40.242,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.187469223057615,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36519591969325826,
      "eval_arxiv_runtime": 10.3585,
      "eval_arxiv_samples_per_second": 48.27,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.359672278920851,
      "eval_arxiv_token_set_f1_sem": 0.004138312262634471,
      "eval_arxiv_token_set_precision": 0.30804023179031814,
      "eval_arxiv_token_set_recall": 0.4478689257163039,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 145000
    },
    {
      "epoch": 27.84,
      "eval_python_code_alpaca_accuracy": 0.16115625,
      "eval_python_code_alpaca_bleu_score": 4.675873061055396,
      "eval_python_code_alpaca_bleu_score_sem": 0.14521065862146887,
      "eval_python_code_alpaca_emb_cos_sim": 0.7455906867980957,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010485603147003477,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8650741577148438,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.81,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.906,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.992,
      "eval_python_code_alpaca_num_pred_words": 43.32,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.550354650525446,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3352814709123943,
      "eval_python_code_alpaca_runtime": 9.9699,
      "eval_python_code_alpaca_samples_per_second": 50.151,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4718053567366586,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005538700436273628,
      "eval_python_code_alpaca_token_set_precision": 0.5345477910229891,
      "eval_python_code_alpaca_token_set_recall": 0.4471907717852849,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 145000
    },
    {
      "epoch": 27.84,
      "eval_wikibio_accuracy": 0.3265,
      "eval_wikibio_bleu_score": 5.9709247254810505,
      "eval_wikibio_bleu_score_sem": 0.22313585042195538,
      "eval_wikibio_emb_cos_sim": 0.735073447227478,
      "eval_wikibio_emb_cos_sim_sem": 0.00943841991472377,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6820902824401855,
      "eval_wikibio_n_ngrams_match_1": 9.778,
      "eval_wikibio_n_ngrams_match_2": 3.342,
      "eval_wikibio_n_ngrams_match_3": 1.254,
      "eval_wikibio_num_pred_words": 35.844,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.72935290741116,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34520965329401354,
      "eval_wikibio_runtime": 9.779,
      "eval_wikibio_samples_per_second": 51.13,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.31398602630878913,
      "eval_wikibio_token_set_f1_sem": 0.005793615057936962,
      "eval_wikibio_token_set_precision": 0.31938491935469054,
      "eval_wikibio_token_set_recall": 0.3278386988221626,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 145000
    },
    {
      "epoch": 27.84,
      "eval_nq_accuracy": 0.52971875,
      "eval_nq_bleu_score": 12.093362890213317,
      "eval_nq_bleu_score_sem": 0.5002878423445387,
      "eval_nq_emb_cos_sim": 0.8364195227622986,
      "eval_nq_emb_cos_sim_sem": 0.006576766121756488,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1689302921295166,
      "eval_nq_n_ngrams_match_1": 23.426,
      "eval_nq_n_ngrams_match_2": 8.724,
      "eval_nq_n_ngrams_match_3": 4.03,
      "eval_nq_num_pred_words": 49.194,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.748920244526674,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4542114617646773,
      "eval_nq_runtime": 10.3945,
      "eval_nq_samples_per_second": 48.102,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4674915179947641,
      "eval_nq_token_set_f1_sem": 0.005029040902590511,
      "eval_nq_token_set_precision": 0.42419503837681494,
      "eval_nq_token_set_recall": 0.5281744958888587,
      "eval_nq_true_num_tokens": 64.0,
      "step": 145000
    },
    {
      "epoch": 27.84,
      "learning_rate": 0.001,
      "loss": 2.5498,
      "step": 145008
    },
    {
      "epoch": 27.85,
      "learning_rate": 0.001,
      "loss": 2.5562,
      "step": 145020
    },
    {
      "epoch": 27.85,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 145032
    },
    {
      "epoch": 27.85,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 145044
    },
    {
      "epoch": 27.85,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 145056
    },
    {
      "epoch": 27.85,
      "learning_rate": 0.001,
      "loss": 2.5547,
      "step": 145068
    },
    {
      "epoch": 27.86,
      "learning_rate": 0.001,
      "loss": 2.5569,
      "step": 145080
    },
    {
      "epoch": 27.86,
      "learning_rate": 0.001,
      "loss": 2.5506,
      "step": 145092
    },
    {
      "epoch": 27.86,
      "learning_rate": 0.001,
      "loss": 2.5449,
      "step": 145104
    },
    {
      "epoch": 27.86,
      "learning_rate": 0.001,
      "loss": 2.5555,
      "step": 145116
    },
    {
      "epoch": 27.87,
      "learning_rate": 0.001,
      "loss": 2.5586,
      "step": 145128
    },
    {
      "epoch": 27.87,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 145140
    },
    {
      "epoch": 27.87,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 145152
    },
    {
      "epoch": 27.87,
      "learning_rate": 0.001,
      "loss": 2.5683,
      "step": 145164
    },
    {
      "epoch": 27.88,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 145176
    },
    {
      "epoch": 27.88,
      "learning_rate": 0.001,
      "loss": 2.5467,
      "step": 145188
    },
    {
      "epoch": 27.88,
      "learning_rate": 0.001,
      "loss": 2.5582,
      "step": 145200
    },
    {
      "epoch": 27.88,
      "learning_rate": 0.001,
      "loss": 2.5515,
      "step": 145212
    },
    {
      "epoch": 27.88,
      "learning_rate": 0.001,
      "loss": 2.5403,
      "step": 145224
    },
    {
      "epoch": 27.89,
      "learning_rate": 0.001,
      "loss": 2.5563,
      "step": 145236
    },
    {
      "epoch": 27.89,
      "learning_rate": 0.001,
      "loss": 2.5419,
      "step": 145248
    },
    {
      "epoch": 27.89,
      "learning_rate": 0.001,
      "loss": 2.558,
      "step": 145260
    },
    {
      "epoch": 27.89,
      "learning_rate": 0.001,
      "loss": 2.5546,
      "step": 145272
    },
    {
      "epoch": 27.9,
      "learning_rate": 0.001,
      "loss": 2.5451,
      "step": 145284
    },
    {
      "epoch": 27.9,
      "learning_rate": 0.001,
      "loss": 2.5605,
      "step": 145296
    },
    {
      "epoch": 27.9,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 145308
    },
    {
      "epoch": 27.9,
      "learning_rate": 0.001,
      "loss": 2.5607,
      "step": 145320
    },
    {
      "epoch": 27.91,
      "learning_rate": 0.001,
      "loss": 2.5505,
      "step": 145332
    },
    {
      "epoch": 27.91,
      "learning_rate": 0.001,
      "loss": 2.5539,
      "step": 145344
    },
    {
      "epoch": 27.91,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 145356
    },
    {
      "epoch": 27.91,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 145368
    },
    {
      "epoch": 27.91,
      "learning_rate": 0.001,
      "loss": 2.5566,
      "step": 145380
    },
    {
      "epoch": 27.92,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 145392
    },
    {
      "epoch": 27.92,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 145404
    },
    {
      "epoch": 27.92,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 145416
    },
    {
      "epoch": 27.92,
      "learning_rate": 0.001,
      "loss": 2.5549,
      "step": 145428
    },
    {
      "epoch": 27.93,
      "learning_rate": 0.001,
      "loss": 2.5468,
      "step": 145440
    },
    {
      "epoch": 27.93,
      "learning_rate": 0.001,
      "loss": 2.5576,
      "step": 145452
    },
    {
      "epoch": 27.93,
      "learning_rate": 0.001,
      "loss": 2.5525,
      "step": 145464
    },
    {
      "epoch": 27.93,
      "learning_rate": 0.001,
      "loss": 2.5562,
      "step": 145476
    },
    {
      "epoch": 27.94,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 145488
    },
    {
      "epoch": 27.94,
      "learning_rate": 0.001,
      "loss": 2.5526,
      "step": 145500
    },
    {
      "epoch": 27.94,
      "learning_rate": 0.001,
      "loss": 2.5619,
      "step": 145512
    },
    {
      "epoch": 27.94,
      "learning_rate": 0.001,
      "loss": 2.5492,
      "step": 145524
    },
    {
      "epoch": 27.94,
      "learning_rate": 0.001,
      "loss": 2.5502,
      "step": 145536
    },
    {
      "epoch": 27.95,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 145548
    },
    {
      "epoch": 27.95,
      "learning_rate": 0.001,
      "loss": 2.5534,
      "step": 145560
    },
    {
      "epoch": 27.95,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 145572
    },
    {
      "epoch": 27.95,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 145584
    },
    {
      "epoch": 27.96,
      "learning_rate": 0.001,
      "loss": 2.5498,
      "step": 145596
    },
    {
      "epoch": 27.96,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 145608
    },
    {
      "epoch": 27.96,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 145620
    },
    {
      "epoch": 27.96,
      "eval_ag_news_accuracy": 0.323875,
      "eval_ag_news_bleu_score": 4.910617404866825,
      "eval_ag_news_bleu_score_sem": 0.1573348701568156,
      "eval_ag_news_emb_cos_sim": 0.8168294429779053,
      "eval_ag_news_emb_cos_sim_sem": 0.006366818181344549,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.532271146774292,
      "eval_ag_news_n_ngrams_match_1": 14.134,
      "eval_ag_news_n_ngrams_match_2": 3.24,
      "eval_ag_news_n_ngrams_match_3": 0.926,
      "eval_ag_news_num_pred_words": 46.722,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.20155622794208,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35125415506204893,
      "eval_ag_news_runtime": 10.3222,
      "eval_ag_news_samples_per_second": 48.439,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3545952564019915,
      "eval_ag_news_token_set_f1_sem": 0.004309820244551049,
      "eval_ag_news_token_set_precision": 0.34017337087885013,
      "eval_ag_news_token_set_recall": 0.3882196094332846,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 145625
    },
    {
      "epoch": 27.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.11434375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.075192452458553,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11800515664346292,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6746152639389038,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009187364967579043,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2376859188079834,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.146,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.318,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.474702955568805,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21287619423670326,
      "eval_anthropic_toxic_prompts_runtime": 10.041,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.796,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3576538157638634,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065782782685566655,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43505866930394155,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32891197715308995,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 145625
    },
    {
      "epoch": 27.96,
      "eval_arxiv_accuracy": 0.34896875,
      "eval_arxiv_bleu_score": 4.506671232722476,
      "eval_arxiv_bleu_score_sem": 0.12858575161834154,
      "eval_arxiv_emb_cos_sim": 0.7645954489707947,
      "eval_arxiv_emb_cos_sim_sem": 0.006850017970087281,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3764231204986572,
      "eval_arxiv_n_ngrams_match_1": 15.198,
      "eval_arxiv_n_ngrams_match_2": 3.082,
      "eval_arxiv_n_ngrams_match_3": 0.696,
      "eval_arxiv_num_pred_words": 40.662,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.265903066147143,
      "eval_arxiv_pred_num_tokens": 62.9140625,
      "eval_arxiv_rouge_score": 0.3612463677716442,
      "eval_arxiv_runtime": 10.4782,
      "eval_arxiv_samples_per_second": 47.718,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3600539066541921,
      "eval_arxiv_token_set_f1_sem": 0.004119097251032281,
      "eval_arxiv_token_set_precision": 0.308028916206553,
      "eval_arxiv_token_set_recall": 0.4500104806797574,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 145625
    },
    {
      "epoch": 27.96,
      "eval_python_code_alpaca_accuracy": 0.16025,
      "eval_python_code_alpaca_bleu_score": 4.658713891463431,
      "eval_python_code_alpaca_bleu_score_sem": 0.14818277100256289,
      "eval_python_code_alpaca_emb_cos_sim": 0.7642186284065247,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007257160477969565,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8961870670318604,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.036,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.868,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.938,
      "eval_python_code_alpaca_num_pred_words": 43.378,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.104980515667588,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3390717072900312,
      "eval_python_code_alpaca_runtime": 10.0471,
      "eval_python_code_alpaca_samples_per_second": 49.766,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4754388621366275,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005108182121670698,
      "eval_python_code_alpaca_token_set_precision": 0.5448649434657044,
      "eval_python_code_alpaca_token_set_recall": 0.4427471484073749,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 145625
    },
    {
      "epoch": 27.96,
      "eval_wikibio_accuracy": 0.32403125,
      "eval_wikibio_bleu_score": 5.8016650883714735,
      "eval_wikibio_bleu_score_sem": 0.20469397383022217,
      "eval_wikibio_emb_cos_sim": 0.7484632134437561,
      "eval_wikibio_emb_cos_sim_sem": 0.00851143488458558,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7147724628448486,
      "eval_wikibio_n_ngrams_match_1": 10.152,
      "eval_wikibio_n_ngrams_match_2": 3.31,
      "eval_wikibio_n_ngrams_match_3": 1.172,
      "eval_wikibio_num_pred_words": 36.812,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.049245793078754,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3549690485532082,
      "eval_wikibio_runtime": 9.9814,
      "eval_wikibio_samples_per_second": 50.093,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.32202872024201085,
      "eval_wikibio_token_set_f1_sem": 0.005184770073454325,
      "eval_wikibio_token_set_precision": 0.32994717393404416,
      "eval_wikibio_token_set_recall": 0.32988277249828724,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 145625
    },
    {
      "epoch": 27.96,
      "eval_nq_accuracy": 0.53121875,
      "eval_nq_bleu_score": 11.808683773069896,
      "eval_nq_bleu_score_sem": 0.4838169537347454,
      "eval_nq_emb_cos_sim": 0.8334431648254395,
      "eval_nq_emb_cos_sim_sem": 0.007487980328017509,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1664879322052,
      "eval_nq_n_ngrams_match_1": 23.118,
      "eval_nq_n_ngrams_match_2": 8.606,
      "eval_nq_n_ngrams_match_3": 3.964,
      "eval_nq_num_pred_words": 49.156,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.72757830529788,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4468582117136011,
      "eval_nq_runtime": 10.9956,
      "eval_nq_samples_per_second": 45.473,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.46168992517648466,
      "eval_nq_token_set_f1_sem": 0.005032645703840807,
      "eval_nq_token_set_precision": 0.4185919917686395,
      "eval_nq_token_set_recall": 0.525047861840401,
      "eval_nq_true_num_tokens": 64.0,
      "step": 145625
    },
    {
      "epoch": 27.96,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 145632
    },
    {
      "epoch": 27.97,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 145644
    },
    {
      "epoch": 27.97,
      "learning_rate": 0.001,
      "loss": 2.5541,
      "step": 145656
    },
    {
      "epoch": 27.97,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 145668
    },
    {
      "epoch": 27.97,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 145680
    },
    {
      "epoch": 27.97,
      "learning_rate": 0.001,
      "loss": 2.5469,
      "step": 145692
    },
    {
      "epoch": 27.98,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 145704
    },
    {
      "epoch": 27.98,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 145716
    },
    {
      "epoch": 27.98,
      "learning_rate": 0.001,
      "loss": 2.5577,
      "step": 145728
    },
    {
      "epoch": 27.98,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 145740
    },
    {
      "epoch": 27.99,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 145752
    },
    {
      "epoch": 27.99,
      "learning_rate": 0.001,
      "loss": 2.5538,
      "step": 145764
    },
    {
      "epoch": 27.99,
      "learning_rate": 0.001,
      "loss": 2.5525,
      "step": 145776
    },
    {
      "epoch": 27.99,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 145788
    },
    {
      "epoch": 28.0,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 145800
    },
    {
      "epoch": 28.0,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 145812
    },
    {
      "epoch": 28.0,
      "learning_rate": 0.001,
      "loss": 2.542,
      "step": 145824
    },
    {
      "epoch": 28.0,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 145836
    },
    {
      "epoch": 28.0,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 145848
    },
    {
      "epoch": 28.01,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 145860
    },
    {
      "epoch": 28.01,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 145872
    },
    {
      "epoch": 28.01,
      "learning_rate": 0.001,
      "loss": 2.5253,
      "step": 145884
    },
    {
      "epoch": 28.01,
      "learning_rate": 0.001,
      "loss": 2.5445,
      "step": 145896
    },
    {
      "epoch": 28.02,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 145908
    },
    {
      "epoch": 28.02,
      "learning_rate": 0.001,
      "loss": 2.5287,
      "step": 145920
    },
    {
      "epoch": 28.02,
      "learning_rate": 0.001,
      "loss": 2.5278,
      "step": 145932
    },
    {
      "epoch": 28.02,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 145944
    },
    {
      "epoch": 28.03,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 145956
    },
    {
      "epoch": 28.03,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 145968
    },
    {
      "epoch": 28.03,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 145980
    },
    {
      "epoch": 28.03,
      "learning_rate": 0.001,
      "loss": 2.5454,
      "step": 145992
    },
    {
      "epoch": 28.03,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 146004
    },
    {
      "epoch": 28.04,
      "learning_rate": 0.001,
      "loss": 2.5394,
      "step": 146016
    },
    {
      "epoch": 28.04,
      "learning_rate": 0.001,
      "loss": 2.5387,
      "step": 146028
    },
    {
      "epoch": 28.04,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 146040
    },
    {
      "epoch": 28.04,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 146052
    },
    {
      "epoch": 28.05,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 146064
    },
    {
      "epoch": 28.05,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 146076
    },
    {
      "epoch": 28.05,
      "learning_rate": 0.001,
      "loss": 2.5469,
      "step": 146088
    },
    {
      "epoch": 28.05,
      "learning_rate": 0.001,
      "loss": 2.5361,
      "step": 146100
    },
    {
      "epoch": 28.06,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 146112
    },
    {
      "epoch": 28.06,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 146124
    },
    {
      "epoch": 28.06,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 146136
    },
    {
      "epoch": 28.06,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 146148
    },
    {
      "epoch": 28.06,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 146160
    },
    {
      "epoch": 28.07,
      "learning_rate": 0.001,
      "loss": 2.5395,
      "step": 146172
    },
    {
      "epoch": 28.07,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 146184
    },
    {
      "epoch": 28.07,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 146196
    },
    {
      "epoch": 28.07,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 146208
    },
    {
      "epoch": 28.08,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 146220
    },
    {
      "epoch": 28.08,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 146232
    },
    {
      "epoch": 28.08,
      "learning_rate": 0.001,
      "loss": 2.5433,
      "step": 146244
    },
    {
      "epoch": 28.08,
      "eval_ag_news_accuracy": 0.32346875,
      "eval_ag_news_bleu_score": 4.88933358549054,
      "eval_ag_news_bleu_score_sem": 0.1495385200295116,
      "eval_ag_news_emb_cos_sim": 0.8185994625091553,
      "eval_ag_news_emb_cos_sim_sem": 0.005962279558446829,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.529163122177124,
      "eval_ag_news_n_ngrams_match_1": 14.17,
      "eval_ag_news_n_ngrams_match_2": 3.152,
      "eval_ag_news_n_ngrams_match_3": 0.91,
      "eval_ag_news_num_pred_words": 46.824,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.09542196930388,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35328557903614377,
      "eval_ag_news_runtime": 10.2614,
      "eval_ag_news_samples_per_second": 48.726,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.35381324579829265,
      "eval_ag_news_token_set_f1_sem": 0.004506894948881247,
      "eval_ag_news_token_set_precision": 0.338694606243144,
      "eval_ag_news_token_set_recall": 0.38574194389262617,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 146250
    },
    {
      "epoch": 28.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11525,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1025764388193147,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11673357009430338,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.688366174697876,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00839087316587566,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.250960111618042,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.9,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.696,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.728,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.815113412938306,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2157249287222241,
      "eval_anthropic_toxic_prompts_runtime": 10.0509,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.747,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3592458009222363,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00643331194361511,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44069401049663676,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3308920603658305,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 146250
    },
    {
      "epoch": 28.08,
      "eval_arxiv_accuracy": 0.349375,
      "eval_arxiv_bleu_score": 4.388583997874758,
      "eval_arxiv_bleu_score_sem": 0.123463011787757,
      "eval_arxiv_emb_cos_sim": 0.7765886187553406,
      "eval_arxiv_emb_cos_sim_sem": 0.006319269950779181,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3801686763763428,
      "eval_arxiv_n_ngrams_match_1": 15.38,
      "eval_arxiv_n_ngrams_match_2": 3.004,
      "eval_arxiv_n_ngrams_match_3": 0.626,
      "eval_arxiv_num_pred_words": 40.648,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.37572568637919,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3679883820724149,
      "eval_arxiv_runtime": 10.3047,
      "eval_arxiv_samples_per_second": 48.521,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.36123990977720516,
      "eval_arxiv_token_set_f1_sem": 0.004122756118564047,
      "eval_arxiv_token_set_precision": 0.31281050019102363,
      "eval_arxiv_token_set_recall": 0.440686418235979,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 146250
    },
    {
      "epoch": 28.08,
      "eval_python_code_alpaca_accuracy": 0.15978125,
      "eval_python_code_alpaca_bleu_score": 4.743050812663442,
      "eval_python_code_alpaca_bleu_score_sem": 0.14493420575119303,
      "eval_python_code_alpaca_emb_cos_sim": 0.7607376575469971,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008173871555238719,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.891065835952759,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.95,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.03,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.046,
      "eval_python_code_alpaca_num_pred_words": 43.792,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.01249774172111,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33852505803455246,
      "eval_python_code_alpaca_runtime": 16.3432,
      "eval_python_code_alpaca_samples_per_second": 30.594,
      "eval_python_code_alpaca_steps_per_second": 0.061,
      "eval_python_code_alpaca_token_set_f1": 0.4800213088356204,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005468617484744028,
      "eval_python_code_alpaca_token_set_precision": 0.5458710736376982,
      "eval_python_code_alpaca_token_set_recall": 0.4488237114317898,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 146250
    },
    {
      "epoch": 28.08,
      "eval_wikibio_accuracy": 0.327625,
      "eval_wikibio_bleu_score": 5.935470071713013,
      "eval_wikibio_bleu_score_sem": 0.20353583005875175,
      "eval_wikibio_emb_cos_sim": 0.7535380721092224,
      "eval_wikibio_emb_cos_sim_sem": 0.008793624498978716,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.669752836227417,
      "eval_wikibio_n_ngrams_match_1": 10.038,
      "eval_wikibio_n_ngrams_match_2": 3.384,
      "eval_wikibio_n_ngrams_match_3": 1.246,
      "eval_wikibio_num_pred_words": 35.97,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.24220541002032,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3543242286273901,
      "eval_wikibio_runtime": 10.1275,
      "eval_wikibio_samples_per_second": 49.371,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3184323590068187,
      "eval_wikibio_token_set_f1_sem": 0.005480312902743177,
      "eval_wikibio_token_set_precision": 0.3255292548701312,
      "eval_wikibio_token_set_recall": 0.331237265513698,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 146250
    },
    {
      "epoch": 28.08,
      "eval_nq_accuracy": 0.531375,
      "eval_nq_bleu_score": 11.423716282248757,
      "eval_nq_bleu_score_sem": 0.4814025057720387,
      "eval_nq_emb_cos_sim": 0.8324013352394104,
      "eval_nq_emb_cos_sim_sem": 0.006954593520865921,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.164207696914673,
      "eval_nq_n_ngrams_match_1": 23.062,
      "eval_nq_n_ngrams_match_2": 8.38,
      "eval_nq_n_ngrams_match_3": 3.776,
      "eval_nq_num_pred_words": 48.936,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.70770004541319,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44874031225443733,
      "eval_nq_runtime": 10.539,
      "eval_nq_samples_per_second": 47.443,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4615451423733168,
      "eval_nq_token_set_f1_sem": 0.004997725983139397,
      "eval_nq_token_set_precision": 0.41974679645714585,
      "eval_nq_token_set_recall": 0.5226533822086087,
      "eval_nq_true_num_tokens": 64.0,
      "step": 146250
    },
    {
      "epoch": 28.08,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 146256
    },
    {
      "epoch": 28.09,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 146268
    },
    {
      "epoch": 28.09,
      "learning_rate": 0.001,
      "loss": 2.5275,
      "step": 146280
    },
    {
      "epoch": 28.09,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 146292
    },
    {
      "epoch": 28.09,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 146304
    },
    {
      "epoch": 28.09,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 146316
    },
    {
      "epoch": 28.1,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 146328
    },
    {
      "epoch": 28.1,
      "learning_rate": 0.001,
      "loss": 2.5349,
      "step": 146340
    },
    {
      "epoch": 28.1,
      "learning_rate": 0.001,
      "loss": 2.5375,
      "step": 146352
    },
    {
      "epoch": 28.1,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 146364
    },
    {
      "epoch": 28.11,
      "learning_rate": 0.001,
      "loss": 2.5394,
      "step": 146376
    },
    {
      "epoch": 28.11,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 146388
    },
    {
      "epoch": 28.11,
      "learning_rate": 0.001,
      "loss": 2.5452,
      "step": 146400
    },
    {
      "epoch": 28.11,
      "learning_rate": 0.001,
      "loss": 2.5375,
      "step": 146412
    },
    {
      "epoch": 28.12,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 146424
    },
    {
      "epoch": 28.12,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 146436
    },
    {
      "epoch": 28.12,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 146448
    },
    {
      "epoch": 28.12,
      "learning_rate": 0.001,
      "loss": 2.5248,
      "step": 146460
    },
    {
      "epoch": 28.12,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 146472
    },
    {
      "epoch": 28.13,
      "learning_rate": 0.001,
      "loss": 2.5291,
      "step": 146484
    },
    {
      "epoch": 28.13,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 146496
    },
    {
      "epoch": 28.13,
      "learning_rate": 0.001,
      "loss": 2.5357,
      "step": 146508
    },
    {
      "epoch": 28.13,
      "learning_rate": 0.001,
      "loss": 2.5321,
      "step": 146520
    },
    {
      "epoch": 28.14,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 146532
    },
    {
      "epoch": 28.14,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 146544
    },
    {
      "epoch": 28.14,
      "learning_rate": 0.001,
      "loss": 2.5394,
      "step": 146556
    },
    {
      "epoch": 28.14,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 146568
    },
    {
      "epoch": 28.15,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 146580
    },
    {
      "epoch": 28.15,
      "learning_rate": 0.001,
      "loss": 2.5304,
      "step": 146592
    },
    {
      "epoch": 28.15,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 146604
    },
    {
      "epoch": 28.15,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 146616
    },
    {
      "epoch": 28.15,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 146628
    },
    {
      "epoch": 28.16,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 146640
    },
    {
      "epoch": 28.16,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 146652
    },
    {
      "epoch": 28.16,
      "learning_rate": 0.001,
      "loss": 2.5321,
      "step": 146664
    },
    {
      "epoch": 28.16,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 146676
    },
    {
      "epoch": 28.17,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 146688
    },
    {
      "epoch": 28.17,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 146700
    },
    {
      "epoch": 28.17,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 146712
    },
    {
      "epoch": 28.17,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 146724
    },
    {
      "epoch": 28.18,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 146736
    },
    {
      "epoch": 28.18,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 146748
    },
    {
      "epoch": 28.18,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 146760
    },
    {
      "epoch": 28.18,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 146772
    },
    {
      "epoch": 28.18,
      "learning_rate": 0.001,
      "loss": 2.5454,
      "step": 146784
    },
    {
      "epoch": 28.19,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 146796
    },
    {
      "epoch": 28.19,
      "learning_rate": 0.001,
      "loss": 2.5461,
      "step": 146808
    },
    {
      "epoch": 28.19,
      "learning_rate": 0.001,
      "loss": 2.5432,
      "step": 146820
    },
    {
      "epoch": 28.19,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 146832
    },
    {
      "epoch": 28.2,
      "learning_rate": 0.001,
      "loss": 2.5422,
      "step": 146844
    },
    {
      "epoch": 28.2,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 146856
    },
    {
      "epoch": 28.2,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 146868
    },
    {
      "epoch": 28.2,
      "eval_ag_news_accuracy": 0.32515625,
      "eval_ag_news_bleu_score": 4.727902891925582,
      "eval_ag_news_bleu_score_sem": 0.13861072140099853,
      "eval_ag_news_emb_cos_sim": 0.8126072883605957,
      "eval_ag_news_emb_cos_sim_sem": 0.007029657058063824,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.525362730026245,
      "eval_ag_news_n_ngrams_match_1": 14.072,
      "eval_ag_news_n_ngrams_match_2": 3.024,
      "eval_ag_news_n_ngrams_match_3": 0.85,
      "eval_ag_news_num_pred_words": 46.598,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.966091903413734,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35073214171794964,
      "eval_ag_news_runtime": 10.7626,
      "eval_ag_news_samples_per_second": 46.457,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.35152040309285,
      "eval_ag_news_token_set_f1_sem": 0.0041023464680715,
      "eval_ag_news_token_set_precision": 0.33792657206851634,
      "eval_ag_news_token_set_recall": 0.38003136150867717,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 146875
    },
    {
      "epoch": 28.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.11578125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.070931347439393,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1132417458825351,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6800554394721985,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008519421712378395,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2209484577178955,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.27,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.892,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.286,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.05186955594668,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21533283902667258,
      "eval_anthropic_toxic_prompts_runtime": 10.4803,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.709,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.359983194227909,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006546728399224668,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44333651063025303,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32925044382815694,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 146875
    },
    {
      "epoch": 28.2,
      "eval_arxiv_accuracy": 0.35015625,
      "eval_arxiv_bleu_score": 4.2366391027743555,
      "eval_arxiv_bleu_score_sem": 0.11839382999027176,
      "eval_arxiv_emb_cos_sim": 0.7724995017051697,
      "eval_arxiv_emb_cos_sim_sem": 0.007262037000051178,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.378502130508423,
      "eval_arxiv_n_ngrams_match_1": 15.242,
      "eval_arxiv_n_ngrams_match_2": 2.908,
      "eval_arxiv_n_ngrams_match_3": 0.62,
      "eval_arxiv_num_pred_words": 40.18,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.32681046317229,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3657916958413393,
      "eval_arxiv_runtime": 11.1698,
      "eval_arxiv_samples_per_second": 44.764,
      "eval_arxiv_steps_per_second": 0.09,
      "eval_arxiv_token_set_f1": 0.3607956551808795,
      "eval_arxiv_token_set_f1_sem": 0.0042121987330928585,
      "eval_arxiv_token_set_precision": 0.31093707825153855,
      "eval_arxiv_token_set_recall": 0.4471758994531477,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 146875
    },
    {
      "epoch": 28.2,
      "eval_python_code_alpaca_accuracy": 0.161125,
      "eval_python_code_alpaca_bleu_score": 4.484428089151906,
      "eval_python_code_alpaca_bleu_score_sem": 0.139098361350417,
      "eval_python_code_alpaca_emb_cos_sim": 0.7598724365234375,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008038805393740479,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.881675958633423,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.95,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.842,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.908,
      "eval_python_code_alpaca_num_pred_words": 43.848,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.844154197226203,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33786432646369235,
      "eval_python_code_alpaca_runtime": 9.8973,
      "eval_python_code_alpaca_samples_per_second": 50.519,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.47817807628207076,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005347730504199243,
      "eval_python_code_alpaca_token_set_precision": 0.5428783909201385,
      "eval_python_code_alpaca_token_set_recall": 0.4472716463987637,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 146875
    },
    {
      "epoch": 28.2,
      "eval_wikibio_accuracy": 0.32734375,
      "eval_wikibio_bleu_score": 6.081810368842,
      "eval_wikibio_bleu_score_sem": 0.2254834839098503,
      "eval_wikibio_emb_cos_sim": 0.7580106258392334,
      "eval_wikibio_emb_cos_sim_sem": 0.007873820896700075,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6655948162078857,
      "eval_wikibio_n_ngrams_match_1": 10.034,
      "eval_wikibio_n_ngrams_match_2": 3.394,
      "eval_wikibio_n_ngrams_match_3": 1.258,
      "eval_wikibio_num_pred_words": 35.724,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.079374296430586,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3571696117970976,
      "eval_wikibio_runtime": 9.9333,
      "eval_wikibio_samples_per_second": 50.336,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.3173829153408167,
      "eval_wikibio_token_set_f1_sem": 0.005628348895596099,
      "eval_wikibio_token_set_precision": 0.3238723275626845,
      "eval_wikibio_token_set_recall": 0.32692515325967575,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 146875
    },
    {
      "epoch": 28.2,
      "eval_nq_accuracy": 0.53075,
      "eval_nq_bleu_score": 11.848784726225416,
      "eval_nq_bleu_score_sem": 0.49204298155525433,
      "eval_nq_emb_cos_sim": 0.8347103595733643,
      "eval_nq_emb_cos_sim_sem": 0.007278560540300972,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1631009578704834,
      "eval_nq_n_ngrams_match_1": 23.256,
      "eval_nq_n_ngrams_match_2": 8.48,
      "eval_nq_n_ngrams_match_3": 3.942,
      "eval_nq_num_pred_words": 49.258,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.698068224726995,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4507502952551629,
      "eval_nq_runtime": 10.5148,
      "eval_nq_samples_per_second": 47.552,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4648648082013177,
      "eval_nq_token_set_f1_sem": 0.004916229352266828,
      "eval_nq_token_set_precision": 0.42294593970219174,
      "eval_nq_token_set_recall": 0.5240538609663946,
      "eval_nq_true_num_tokens": 64.0,
      "step": 146875
    },
    {
      "epoch": 28.2,
      "learning_rate": 0.001,
      "loss": 2.5305,
      "step": 146880
    },
    {
      "epoch": 28.21,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 146892
    },
    {
      "epoch": 28.21,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 146904
    },
    {
      "epoch": 28.21,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 146916
    },
    {
      "epoch": 28.21,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 146928
    },
    {
      "epoch": 28.21,
      "learning_rate": 0.001,
      "loss": 2.537,
      "step": 146940
    },
    {
      "epoch": 28.22,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 146952
    },
    {
      "epoch": 28.22,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 146964
    },
    {
      "epoch": 28.22,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 146976
    },
    {
      "epoch": 28.22,
      "learning_rate": 0.001,
      "loss": 2.5427,
      "step": 146988
    },
    {
      "epoch": 28.23,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 147000
    },
    {
      "epoch": 28.23,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 147012
    },
    {
      "epoch": 28.23,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 147024
    },
    {
      "epoch": 28.23,
      "learning_rate": 0.001,
      "loss": 2.5405,
      "step": 147036
    },
    {
      "epoch": 28.24,
      "learning_rate": 0.001,
      "loss": 2.542,
      "step": 147048
    },
    {
      "epoch": 28.24,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 147060
    },
    {
      "epoch": 28.24,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 147072
    },
    {
      "epoch": 28.24,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 147084
    },
    {
      "epoch": 28.24,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 147096
    },
    {
      "epoch": 28.25,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 147108
    },
    {
      "epoch": 28.25,
      "learning_rate": 0.001,
      "loss": 2.5394,
      "step": 147120
    },
    {
      "epoch": 28.25,
      "learning_rate": 0.001,
      "loss": 2.5312,
      "step": 147132
    },
    {
      "epoch": 28.25,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 147144
    },
    {
      "epoch": 28.26,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 147156
    },
    {
      "epoch": 28.26,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 147168
    },
    {
      "epoch": 28.26,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 147180
    },
    {
      "epoch": 28.26,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 147192
    },
    {
      "epoch": 28.26,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 147204
    },
    {
      "epoch": 28.27,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 147216
    },
    {
      "epoch": 28.27,
      "learning_rate": 0.001,
      "loss": 2.5394,
      "step": 147228
    },
    {
      "epoch": 28.27,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 147240
    },
    {
      "epoch": 28.27,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 147252
    },
    {
      "epoch": 28.28,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 147264
    },
    {
      "epoch": 28.28,
      "learning_rate": 0.001,
      "loss": 2.5477,
      "step": 147276
    },
    {
      "epoch": 28.28,
      "learning_rate": 0.001,
      "loss": 2.5349,
      "step": 147288
    },
    {
      "epoch": 28.28,
      "learning_rate": 0.001,
      "loss": 2.5373,
      "step": 147300
    },
    {
      "epoch": 28.29,
      "learning_rate": 0.001,
      "loss": 2.548,
      "step": 147312
    },
    {
      "epoch": 28.29,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 147324
    },
    {
      "epoch": 28.29,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 147336
    },
    {
      "epoch": 28.29,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 147348
    },
    {
      "epoch": 28.29,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 147360
    },
    {
      "epoch": 28.3,
      "learning_rate": 0.001,
      "loss": 2.5408,
      "step": 147372
    },
    {
      "epoch": 28.3,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 147384
    },
    {
      "epoch": 28.3,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 147396
    },
    {
      "epoch": 28.3,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 147408
    },
    {
      "epoch": 28.31,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 147420
    },
    {
      "epoch": 28.31,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 147432
    },
    {
      "epoch": 28.31,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 147444
    },
    {
      "epoch": 28.31,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 147456
    },
    {
      "epoch": 28.32,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 147468
    },
    {
      "epoch": 28.32,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 147480
    },
    {
      "epoch": 28.32,
      "learning_rate": 0.001,
      "loss": 2.5508,
      "step": 147492
    },
    {
      "epoch": 28.32,
      "eval_ag_news_accuracy": 0.32496875,
      "eval_ag_news_bleu_score": 5.058627120325173,
      "eval_ag_news_bleu_score_sem": 0.161816408985397,
      "eval_ag_news_emb_cos_sim": 0.8181792497634888,
      "eval_ag_news_emb_cos_sim_sem": 0.005908350690143616,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5404701232910156,
      "eval_ag_news_n_ngrams_match_1": 14.55,
      "eval_ag_news_n_ngrams_match_2": 3.222,
      "eval_ag_news_n_ngrams_match_3": 0.952,
      "eval_ag_news_num_pred_words": 47.02,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.483126701804245,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3596947141738803,
      "eval_ag_news_runtime": 10.327,
      "eval_ag_news_samples_per_second": 48.417,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.36122328818709387,
      "eval_ag_news_token_set_f1_sem": 0.004252778338673336,
      "eval_ag_news_token_set_precision": 0.3481848570783673,
      "eval_ag_news_token_set_recall": 0.3883082182604708,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 147500
    },
    {
      "epoch": 28.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.11565625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2617294868236386,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12156621925491215,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6849709749221802,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008578819134275896,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2211756706237793,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.338,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.008,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.79,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.082,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.0575623107355,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22034475016228383,
      "eval_anthropic_toxic_prompts_runtime": 10.3566,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.278,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3636098196278902,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006576000253160794,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44755966639836103,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3314902285564594,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 147500
    },
    {
      "epoch": 28.32,
      "eval_arxiv_accuracy": 0.34671875,
      "eval_arxiv_bleu_score": 4.491195571076645,
      "eval_arxiv_bleu_score_sem": 0.13996277436818558,
      "eval_arxiv_emb_cos_sim": 0.7671834230422974,
      "eval_arxiv_emb_cos_sim_sem": 0.007037962621398501,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.393876314163208,
      "eval_arxiv_n_ngrams_match_1": 15.268,
      "eval_arxiv_n_ngrams_match_2": 3.05,
      "eval_arxiv_n_ngrams_match_3": 0.72,
      "eval_arxiv_num_pred_words": 41.004,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.781169987083175,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.364461733862252,
      "eval_arxiv_runtime": 10.216,
      "eval_arxiv_samples_per_second": 48.943,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.35881319233874137,
      "eval_arxiv_token_set_f1_sem": 0.004121462831889142,
      "eval_arxiv_token_set_precision": 0.31067587081492953,
      "eval_arxiv_token_set_recall": 0.44142980851469676,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 147500
    },
    {
      "epoch": 28.32,
      "eval_python_code_alpaca_accuracy": 0.160375,
      "eval_python_code_alpaca_bleu_score": 4.6133436284216325,
      "eval_python_code_alpaca_bleu_score_sem": 0.14620112970310353,
      "eval_python_code_alpaca_emb_cos_sim": 0.7637086510658264,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007533973076455021,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8811099529266357,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.016,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.938,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.972,
      "eval_python_code_alpaca_num_pred_words": 43.944,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.83405716187716,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33797355622952535,
      "eval_python_code_alpaca_runtime": 9.9459,
      "eval_python_code_alpaca_samples_per_second": 50.272,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.478446919358761,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005270713999190445,
      "eval_python_code_alpaca_token_set_precision": 0.5429764903453214,
      "eval_python_code_alpaca_token_set_recall": 0.44789353173329033,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 147500
    },
    {
      "epoch": 28.32,
      "eval_wikibio_accuracy": 0.31890625,
      "eval_wikibio_bleu_score": 6.062976786744554,
      "eval_wikibio_bleu_score_sem": 0.2172282879443045,
      "eval_wikibio_emb_cos_sim": 0.7468626499176025,
      "eval_wikibio_emb_cos_sim_sem": 0.009506253291768532,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.749068260192871,
      "eval_wikibio_n_ngrams_match_1": 10.34,
      "eval_wikibio_n_ngrams_match_2": 3.472,
      "eval_wikibio_n_ngrams_match_3": 1.292,
      "eval_wikibio_num_pred_words": 36.772,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.48148186669631,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3615766848430457,
      "eval_wikibio_runtime": 9.8401,
      "eval_wikibio_samples_per_second": 50.813,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.3238601821085116,
      "eval_wikibio_token_set_f1_sem": 0.005327155933330291,
      "eval_wikibio_token_set_precision": 0.33480334333418027,
      "eval_wikibio_token_set_recall": 0.32762355793268383,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 147500
    },
    {
      "epoch": 28.32,
      "eval_nq_accuracy": 0.52946875,
      "eval_nq_bleu_score": 11.701758716992579,
      "eval_nq_bleu_score_sem": 0.4791316807894547,
      "eval_nq_emb_cos_sim": 0.8338937759399414,
      "eval_nq_emb_cos_sim_sem": 0.0072380205403895555,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1693382263183594,
      "eval_nq_n_ngrams_match_1": 23.416,
      "eval_nq_n_ngrams_match_2": 8.568,
      "eval_nq_n_ngrams_match_3": 3.916,
      "eval_nq_num_pred_words": 49.158,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.752489956264105,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4502296560816097,
      "eval_nq_runtime": 10.6182,
      "eval_nq_samples_per_second": 47.089,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.4661927852454617,
      "eval_nq_token_set_f1_sem": 0.004862939505677727,
      "eval_nq_token_set_precision": 0.42371133283314993,
      "eval_nq_token_set_recall": 0.5258215610804338,
      "eval_nq_true_num_tokens": 64.0,
      "step": 147500
    },
    {
      "epoch": 28.32,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 147504
    },
    {
      "epoch": 28.32,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 147516
    },
    {
      "epoch": 28.33,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 147528
    },
    {
      "epoch": 28.33,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 147540
    },
    {
      "epoch": 28.33,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 147552
    },
    {
      "epoch": 28.33,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 147564
    },
    {
      "epoch": 28.34,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 147576
    },
    {
      "epoch": 28.34,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 147588
    },
    {
      "epoch": 28.34,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 147600
    },
    {
      "epoch": 28.34,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 147612
    },
    {
      "epoch": 28.35,
      "learning_rate": 0.001,
      "loss": 2.5451,
      "step": 147624
    },
    {
      "epoch": 28.35,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 147636
    },
    {
      "epoch": 28.35,
      "learning_rate": 0.001,
      "loss": 2.5499,
      "step": 147648
    },
    {
      "epoch": 28.35,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 147660
    },
    {
      "epoch": 28.35,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 147672
    },
    {
      "epoch": 28.36,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 147684
    },
    {
      "epoch": 28.36,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 147696
    },
    {
      "epoch": 28.36,
      "learning_rate": 0.001,
      "loss": 2.5492,
      "step": 147708
    },
    {
      "epoch": 28.36,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 147720
    },
    {
      "epoch": 28.37,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 147732
    },
    {
      "epoch": 28.37,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 147744
    },
    {
      "epoch": 28.37,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 147756
    },
    {
      "epoch": 28.37,
      "learning_rate": 0.001,
      "loss": 2.553,
      "step": 147768
    },
    {
      "epoch": 28.38,
      "learning_rate": 0.001,
      "loss": 2.542,
      "step": 147780
    },
    {
      "epoch": 28.38,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 147792
    },
    {
      "epoch": 28.38,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 147804
    },
    {
      "epoch": 28.38,
      "learning_rate": 0.001,
      "loss": 2.5449,
      "step": 147816
    },
    {
      "epoch": 28.38,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 147828
    },
    {
      "epoch": 28.39,
      "learning_rate": 0.001,
      "loss": 2.5535,
      "step": 147840
    },
    {
      "epoch": 28.39,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 147852
    },
    {
      "epoch": 28.39,
      "learning_rate": 0.001,
      "loss": 2.5491,
      "step": 147864
    },
    {
      "epoch": 28.39,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 147876
    },
    {
      "epoch": 28.4,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 147888
    },
    {
      "epoch": 28.4,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 147900
    },
    {
      "epoch": 28.4,
      "learning_rate": 0.001,
      "loss": 2.5445,
      "step": 147912
    },
    {
      "epoch": 28.4,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 147924
    },
    {
      "epoch": 28.41,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 147936
    },
    {
      "epoch": 28.41,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 147948
    },
    {
      "epoch": 28.41,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 147960
    },
    {
      "epoch": 28.41,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 147972
    },
    {
      "epoch": 28.41,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 147984
    },
    {
      "epoch": 28.42,
      "learning_rate": 0.001,
      "loss": 2.537,
      "step": 147996
    },
    {
      "epoch": 28.42,
      "learning_rate": 0.001,
      "loss": 2.5419,
      "step": 148008
    },
    {
      "epoch": 28.42,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 148020
    },
    {
      "epoch": 28.42,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 148032
    },
    {
      "epoch": 28.43,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 148044
    },
    {
      "epoch": 28.43,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 148056
    },
    {
      "epoch": 28.43,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 148068
    },
    {
      "epoch": 28.43,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 148080
    },
    {
      "epoch": 28.44,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 148092
    },
    {
      "epoch": 28.44,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 148104
    },
    {
      "epoch": 28.44,
      "learning_rate": 0.001,
      "loss": 2.5514,
      "step": 148116
    },
    {
      "epoch": 28.44,
      "eval_ag_news_accuracy": 0.32521875,
      "eval_ag_news_bleu_score": 4.919500148084001,
      "eval_ag_news_bleu_score_sem": 0.15261339154140596,
      "eval_ag_news_emb_cos_sim": 0.8202201128005981,
      "eval_ag_news_emb_cos_sim_sem": 0.007474579605824569,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5227763652801514,
      "eval_ag_news_n_ngrams_match_1": 14.246,
      "eval_ag_news_n_ngrams_match_2": 3.286,
      "eval_ag_news_n_ngrams_match_3": 0.95,
      "eval_ag_news_num_pred_words": 46.996,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.87835670726844,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35314929493138925,
      "eval_ag_news_runtime": 10.5201,
      "eval_ag_news_samples_per_second": 47.528,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.35612438703738875,
      "eval_ag_news_token_set_f1_sem": 0.004623295864016999,
      "eval_ag_news_token_set_precision": 0.3404142533605893,
      "eval_ag_news_token_set_recall": 0.391822754950611,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 148125
    },
    {
      "epoch": 28.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.115625,
      "eval_anthropic_toxic_prompts_bleu_score": 2.975170619532323,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1126888361912147,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6649253368377686,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009802338195280102,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2226529121398926,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.172,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.862,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.668,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.712,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.094605736380164,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2105401992924088,
      "eval_anthropic_toxic_prompts_runtime": 11.181,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.719,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3511539435015556,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006604996312961682,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43285931990358306,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32560668529177567,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 148125
    },
    {
      "epoch": 28.44,
      "eval_arxiv_accuracy": 0.35065625,
      "eval_arxiv_bleu_score": 4.563628620853197,
      "eval_arxiv_bleu_score_sem": 0.1373218933249007,
      "eval_arxiv_emb_cos_sim": 0.7701166868209839,
      "eval_arxiv_emb_cos_sim_sem": 0.007064328109320098,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.383981466293335,
      "eval_arxiv_n_ngrams_match_1": 15.32,
      "eval_arxiv_n_ngrams_match_2": 3.11,
      "eval_arxiv_n_ngrams_match_3": 0.732,
      "eval_arxiv_num_pred_words": 40.696,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.487942951563884,
      "eval_arxiv_pred_num_tokens": 62.9609375,
      "eval_arxiv_rouge_score": 0.36804509052626416,
      "eval_arxiv_runtime": 10.4142,
      "eval_arxiv_samples_per_second": 48.011,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.3592781073925174,
      "eval_arxiv_token_set_f1_sem": 0.004238921442030327,
      "eval_arxiv_token_set_precision": 0.3095947030549729,
      "eval_arxiv_token_set_recall": 0.4413900551762349,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 148125
    },
    {
      "epoch": 28.44,
      "eval_python_code_alpaca_accuracy": 0.16259375,
      "eval_python_code_alpaca_bleu_score": 4.484850193966426,
      "eval_python_code_alpaca_bleu_score_sem": 0.13808648249959496,
      "eval_python_code_alpaca_emb_cos_sim": 0.7501258850097656,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010587072343337962,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.891721248626709,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.844,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.808,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.93,
      "eval_python_code_alpaca_num_pred_words": 43.712,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.02430723065215,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33241039651790105,
      "eval_python_code_alpaca_runtime": 11.3281,
      "eval_python_code_alpaca_samples_per_second": 44.138,
      "eval_python_code_alpaca_steps_per_second": 0.088,
      "eval_python_code_alpaca_token_set_f1": 0.4681634642353719,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00567608342590527,
      "eval_python_code_alpaca_token_set_precision": 0.539047968387537,
      "eval_python_code_alpaca_token_set_recall": 0.43849770529127396,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 148125
    },
    {
      "epoch": 28.44,
      "eval_wikibio_accuracy": 0.32546875,
      "eval_wikibio_bleu_score": 5.936747296165914,
      "eval_wikibio_bleu_score_sem": 0.20803172398243366,
      "eval_wikibio_emb_cos_sim": 0.7442708015441895,
      "eval_wikibio_emb_cos_sim_sem": 0.009910900574510068,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6966350078582764,
      "eval_wikibio_n_ngrams_match_1": 10.166,
      "eval_wikibio_n_ngrams_match_2": 3.476,
      "eval_wikibio_n_ngrams_match_3": 1.278,
      "eval_wikibio_num_pred_words": 36.714,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.31142823799574,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35653502731892006,
      "eval_wikibio_runtime": 10.1759,
      "eval_wikibio_samples_per_second": 49.136,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.32140197713135743,
      "eval_wikibio_token_set_f1_sem": 0.005499881518304417,
      "eval_wikibio_token_set_precision": 0.32982016300060796,
      "eval_wikibio_token_set_recall": 0.3313464747933582,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 148125
    },
    {
      "epoch": 28.44,
      "eval_nq_accuracy": 0.530375,
      "eval_nq_bleu_score": 11.866695437786367,
      "eval_nq_bleu_score_sem": 0.48121408499178814,
      "eval_nq_emb_cos_sim": 0.8286600708961487,
      "eval_nq_emb_cos_sim_sem": 0.007795574613895093,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.168030023574829,
      "eval_nq_n_ngrams_match_1": 23.178,
      "eval_nq_n_ngrams_match_2": 8.496,
      "eval_nq_n_ngrams_match_3": 3.924,
      "eval_nq_num_pred_words": 49.13,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.741047411106974,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4485457505447752,
      "eval_nq_runtime": 10.858,
      "eval_nq_samples_per_second": 46.049,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.462470080542473,
      "eval_nq_token_set_f1_sem": 0.005146965443933411,
      "eval_nq_token_set_precision": 0.421093461301019,
      "eval_nq_token_set_recall": 0.5228317180517345,
      "eval_nq_true_num_tokens": 64.0,
      "step": 148125
    },
    {
      "epoch": 28.44,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 148128
    },
    {
      "epoch": 28.44,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 148140
    },
    {
      "epoch": 28.45,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 148152
    },
    {
      "epoch": 28.45,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 148164
    },
    {
      "epoch": 28.45,
      "learning_rate": 0.001,
      "loss": 2.5449,
      "step": 148176
    },
    {
      "epoch": 28.45,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 148188
    },
    {
      "epoch": 28.46,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 148200
    },
    {
      "epoch": 28.46,
      "learning_rate": 0.001,
      "loss": 2.5387,
      "step": 148212
    },
    {
      "epoch": 28.46,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 148224
    },
    {
      "epoch": 28.46,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 148236
    },
    {
      "epoch": 28.47,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 148248
    },
    {
      "epoch": 28.47,
      "learning_rate": 0.001,
      "loss": 2.5505,
      "step": 148260
    },
    {
      "epoch": 28.47,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 148272
    },
    {
      "epoch": 28.47,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 148284
    },
    {
      "epoch": 28.47,
      "learning_rate": 0.001,
      "loss": 2.5385,
      "step": 148296
    },
    {
      "epoch": 28.48,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 148308
    },
    {
      "epoch": 28.48,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 148320
    },
    {
      "epoch": 28.48,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 148332
    },
    {
      "epoch": 28.48,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 148344
    },
    {
      "epoch": 28.49,
      "learning_rate": 0.001,
      "loss": 2.5448,
      "step": 148356
    },
    {
      "epoch": 28.49,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 148368
    },
    {
      "epoch": 28.49,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 148380
    },
    {
      "epoch": 28.49,
      "learning_rate": 0.001,
      "loss": 2.5455,
      "step": 148392
    },
    {
      "epoch": 28.5,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 148404
    },
    {
      "epoch": 28.5,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 148416
    },
    {
      "epoch": 28.5,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 148428
    },
    {
      "epoch": 28.5,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 148440
    },
    {
      "epoch": 28.5,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 148452
    },
    {
      "epoch": 28.51,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 148464
    },
    {
      "epoch": 28.51,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 148476
    },
    {
      "epoch": 28.51,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 148488
    },
    {
      "epoch": 28.51,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 148500
    },
    {
      "epoch": 28.52,
      "learning_rate": 0.001,
      "loss": 2.5387,
      "step": 148512
    },
    {
      "epoch": 28.52,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 148524
    },
    {
      "epoch": 28.52,
      "learning_rate": 0.001,
      "loss": 2.548,
      "step": 148536
    },
    {
      "epoch": 28.52,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 148548
    },
    {
      "epoch": 28.53,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 148560
    },
    {
      "epoch": 28.53,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 148572
    },
    {
      "epoch": 28.53,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 148584
    },
    {
      "epoch": 28.53,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 148596
    },
    {
      "epoch": 28.53,
      "learning_rate": 0.001,
      "loss": 2.5338,
      "step": 148608
    },
    {
      "epoch": 28.54,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 148620
    },
    {
      "epoch": 28.54,
      "learning_rate": 0.001,
      "loss": 2.5462,
      "step": 148632
    },
    {
      "epoch": 28.54,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 148644
    },
    {
      "epoch": 28.54,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 148656
    },
    {
      "epoch": 28.55,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 148668
    },
    {
      "epoch": 28.55,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 148680
    },
    {
      "epoch": 28.55,
      "learning_rate": 0.001,
      "loss": 2.5357,
      "step": 148692
    },
    {
      "epoch": 28.55,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 148704
    },
    {
      "epoch": 28.56,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 148716
    },
    {
      "epoch": 28.56,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 148728
    },
    {
      "epoch": 28.56,
      "learning_rate": 0.001,
      "loss": 2.5538,
      "step": 148740
    },
    {
      "epoch": 28.56,
      "eval_ag_news_accuracy": 0.3243125,
      "eval_ag_news_bleu_score": 4.756171917009729,
      "eval_ag_news_bleu_score_sem": 0.14941271223160832,
      "eval_ag_news_emb_cos_sim": 0.8182030916213989,
      "eval_ag_news_emb_cos_sim_sem": 0.006345165237922077,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5314078330993652,
      "eval_ag_news_n_ngrams_match_1": 14.328,
      "eval_ag_news_n_ngrams_match_2": 3.112,
      "eval_ag_news_n_ngrams_match_3": 0.87,
      "eval_ag_news_num_pred_words": 46.954,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.17204229846926,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3534847715901825,
      "eval_ag_news_runtime": 10.8852,
      "eval_ag_news_samples_per_second": 45.934,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.3555322727312971,
      "eval_ag_news_token_set_f1_sem": 0.00428780652135488,
      "eval_ag_news_token_set_precision": 0.3410526111294004,
      "eval_ag_news_token_set_recall": 0.3853175974998572,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 148750
    },
    {
      "epoch": 28.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.11396875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0643367565056243,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1201213060777016,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.675471305847168,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009189501944507447,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.234440803527832,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.156,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.898,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.598,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.39216859693375,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.208450634279371,
      "eval_anthropic_toxic_prompts_runtime": 9.8553,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.734,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35818510019077876,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006421029992889609,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.428584827742149,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3348244185071268,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 148750
    },
    {
      "epoch": 28.56,
      "eval_arxiv_accuracy": 0.34975,
      "eval_arxiv_bleu_score": 4.367905795039898,
      "eval_arxiv_bleu_score_sem": 0.12993739532055507,
      "eval_arxiv_emb_cos_sim": 0.7781081199645996,
      "eval_arxiv_emb_cos_sim_sem": 0.0067955978275811135,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3815231323242188,
      "eval_arxiv_n_ngrams_match_1": 15.204,
      "eval_arxiv_n_ngrams_match_2": 3.012,
      "eval_arxiv_n_ngrams_match_3": 0.68,
      "eval_arxiv_num_pred_words": 40.834,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.41554077056017,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3625433074967942,
      "eval_arxiv_runtime": 10.2468,
      "eval_arxiv_samples_per_second": 48.796,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.35502807828030514,
      "eval_arxiv_token_set_f1_sem": 0.004236078488294291,
      "eval_arxiv_token_set_precision": 0.3060704591108825,
      "eval_arxiv_token_set_recall": 0.4425632761727061,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 148750
    },
    {
      "epoch": 28.56,
      "eval_python_code_alpaca_accuracy": 0.1613125,
      "eval_python_code_alpaca_bleu_score": 4.502380345139768,
      "eval_python_code_alpaca_bleu_score_sem": 0.14058627237128596,
      "eval_python_code_alpaca_emb_cos_sim": 0.7614978551864624,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007785972376424197,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8783135414123535,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.054,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.958,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.934,
      "eval_python_code_alpaca_num_pred_words": 44.118,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.784255464556985,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33412447209432394,
      "eval_python_code_alpaca_runtime": 10.0555,
      "eval_python_code_alpaca_samples_per_second": 49.724,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.47905262775355023,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005433762017092308,
      "eval_python_code_alpaca_token_set_precision": 0.5464928925229299,
      "eval_python_code_alpaca_token_set_recall": 0.4470709755220311,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 148750
    },
    {
      "epoch": 28.56,
      "eval_wikibio_accuracy": 0.32434375,
      "eval_wikibio_bleu_score": 6.028966990076317,
      "eval_wikibio_bleu_score_sem": 0.2219807337520827,
      "eval_wikibio_emb_cos_sim": 0.7519832849502563,
      "eval_wikibio_emb_cos_sim_sem": 0.009022705132981076,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.739767551422119,
      "eval_wikibio_n_ngrams_match_1": 10.144,
      "eval_wikibio_n_ngrams_match_2": 3.454,
      "eval_wikibio_n_ngrams_match_3": 1.284,
      "eval_wikibio_num_pred_words": 36.188,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.088205684289775,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3566398404420736,
      "eval_wikibio_runtime": 10.1372,
      "eval_wikibio_samples_per_second": 49.323,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.32181054103821344,
      "eval_wikibio_token_set_f1_sem": 0.005481928979619786,
      "eval_wikibio_token_set_precision": 0.3305455895231806,
      "eval_wikibio_token_set_recall": 0.3323655225855936,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 148750
    },
    {
      "epoch": 28.56,
      "eval_nq_accuracy": 0.531625,
      "eval_nq_bleu_score": 11.733330475134023,
      "eval_nq_bleu_score_sem": 0.4758405920195655,
      "eval_nq_emb_cos_sim": 0.8343696594238281,
      "eval_nq_emb_cos_sim_sem": 0.006893065707485096,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.166505813598633,
      "eval_nq_n_ngrams_match_1": 23.33,
      "eval_nq_n_ngrams_match_2": 8.486,
      "eval_nq_n_ngrams_match_3": 3.882,
      "eval_nq_num_pred_words": 49.434,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.727734367954577,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45111523021649913,
      "eval_nq_runtime": 10.4337,
      "eval_nq_samples_per_second": 47.921,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4658619928953663,
      "eval_nq_token_set_f1_sem": 0.004827877671905447,
      "eval_nq_token_set_precision": 0.4241522771524402,
      "eval_nq_token_set_recall": 0.5240705834766768,
      "eval_nq_true_num_tokens": 64.0,
      "step": 148750
    },
    {
      "epoch": 28.56,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 148752
    },
    {
      "epoch": 28.56,
      "learning_rate": 0.001,
      "loss": 2.5486,
      "step": 148764
    },
    {
      "epoch": 28.57,
      "learning_rate": 0.001,
      "loss": 2.5454,
      "step": 148776
    },
    {
      "epoch": 28.57,
      "learning_rate": 0.001,
      "loss": 2.5447,
      "step": 148788
    },
    {
      "epoch": 28.57,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 148800
    },
    {
      "epoch": 28.57,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 148812
    },
    {
      "epoch": 28.58,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 148824
    },
    {
      "epoch": 28.58,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 148836
    },
    {
      "epoch": 28.58,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 148848
    },
    {
      "epoch": 28.58,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 148860
    },
    {
      "epoch": 28.59,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 148872
    },
    {
      "epoch": 28.59,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 148884
    },
    {
      "epoch": 28.59,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 148896
    },
    {
      "epoch": 28.59,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 148908
    },
    {
      "epoch": 28.59,
      "learning_rate": 0.001,
      "loss": 2.5424,
      "step": 148920
    },
    {
      "epoch": 28.6,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 148932
    },
    {
      "epoch": 28.6,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 148944
    },
    {
      "epoch": 28.6,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 148956
    },
    {
      "epoch": 28.6,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 148968
    },
    {
      "epoch": 28.61,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 148980
    },
    {
      "epoch": 28.61,
      "learning_rate": 0.001,
      "loss": 2.537,
      "step": 148992
    },
    {
      "epoch": 28.61,
      "learning_rate": 0.001,
      "loss": 2.5363,
      "step": 149004
    },
    {
      "epoch": 28.61,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 149016
    },
    {
      "epoch": 28.62,
      "learning_rate": 0.001,
      "loss": 2.5449,
      "step": 149028
    },
    {
      "epoch": 28.62,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 149040
    },
    {
      "epoch": 28.62,
      "learning_rate": 0.001,
      "loss": 2.5408,
      "step": 149052
    },
    {
      "epoch": 28.62,
      "learning_rate": 0.001,
      "loss": 2.5407,
      "step": 149064
    },
    {
      "epoch": 28.62,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 149076
    },
    {
      "epoch": 28.63,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 149088
    },
    {
      "epoch": 28.63,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 149100
    },
    {
      "epoch": 28.63,
      "learning_rate": 0.001,
      "loss": 2.5454,
      "step": 149112
    },
    {
      "epoch": 28.63,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 149124
    },
    {
      "epoch": 28.64,
      "learning_rate": 0.001,
      "loss": 2.5452,
      "step": 149136
    },
    {
      "epoch": 28.64,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 149148
    },
    {
      "epoch": 28.64,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 149160
    },
    {
      "epoch": 28.64,
      "learning_rate": 0.001,
      "loss": 2.5461,
      "step": 149172
    },
    {
      "epoch": 28.65,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 149184
    },
    {
      "epoch": 28.65,
      "learning_rate": 0.001,
      "loss": 2.5478,
      "step": 149196
    },
    {
      "epoch": 28.65,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 149208
    },
    {
      "epoch": 28.65,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 149220
    },
    {
      "epoch": 28.65,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 149232
    },
    {
      "epoch": 28.66,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 149244
    },
    {
      "epoch": 28.66,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 149256
    },
    {
      "epoch": 28.66,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 149268
    },
    {
      "epoch": 28.66,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 149280
    },
    {
      "epoch": 28.67,
      "learning_rate": 0.001,
      "loss": 2.5517,
      "step": 149292
    },
    {
      "epoch": 28.67,
      "learning_rate": 0.001,
      "loss": 2.5507,
      "step": 149304
    },
    {
      "epoch": 28.67,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 149316
    },
    {
      "epoch": 28.67,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 149328
    },
    {
      "epoch": 28.68,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 149340
    },
    {
      "epoch": 28.68,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 149352
    },
    {
      "epoch": 28.68,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 149364
    },
    {
      "epoch": 28.68,
      "eval_ag_news_accuracy": 0.3219375,
      "eval_ag_news_bleu_score": 4.772041790436677,
      "eval_ag_news_bleu_score_sem": 0.1537271497890346,
      "eval_ag_news_emb_cos_sim": 0.8114557266235352,
      "eval_ag_news_emb_cos_sim_sem": 0.006865690670832762,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5282657146453857,
      "eval_ag_news_n_ngrams_match_1": 13.99,
      "eval_ag_news_n_ngrams_match_2": 3.124,
      "eval_ag_news_n_ngrams_match_3": 0.918,
      "eval_ag_news_num_pred_words": 46.602,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.06483820593316,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3483905258532579,
      "eval_ag_news_runtime": 10.4876,
      "eval_ag_news_samples_per_second": 47.675,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.34885650810860663,
      "eval_ag_news_token_set_f1_sem": 0.004584215635663528,
      "eval_ag_news_token_set_precision": 0.3342390967439693,
      "eval_ag_news_token_set_recall": 0.37885604416096513,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 149375
    },
    {
      "epoch": 28.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.11434375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1904436102936296,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12088460606534683,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6796077489852905,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008200283141275874,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.243507146835327,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.268,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.372,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.623429475909035,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21502481066848333,
      "eval_anthropic_toxic_prompts_runtime": 10.5231,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.515,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36275511208515815,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006592997696022939,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44340855886704533,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3353735231735486,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 149375
    },
    {
      "epoch": 28.68,
      "eval_arxiv_accuracy": 0.3488125,
      "eval_arxiv_bleu_score": 4.269698737808313,
      "eval_arxiv_bleu_score_sem": 0.12778169579667845,
      "eval_arxiv_emb_cos_sim": 0.7690363526344299,
      "eval_arxiv_emb_cos_sim_sem": 0.007080500645347138,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.370055675506592,
      "eval_arxiv_n_ngrams_match_1": 15.046,
      "eval_arxiv_n_ngrams_match_2": 2.928,
      "eval_arxiv_n_ngrams_match_3": 0.63,
      "eval_arxiv_num_pred_words": 40.24,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.08014606459105,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3631524136507738,
      "eval_arxiv_runtime": 10.2498,
      "eval_arxiv_samples_per_second": 48.782,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3552734211734464,
      "eval_arxiv_token_set_f1_sem": 0.004199874670979993,
      "eval_arxiv_token_set_precision": 0.30494772927429825,
      "eval_arxiv_token_set_recall": 0.4447780181514975,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 149375
    },
    {
      "epoch": 28.68,
      "eval_python_code_alpaca_accuracy": 0.161875,
      "eval_python_code_alpaca_bleu_score": 4.576457482748722,
      "eval_python_code_alpaca_bleu_score_sem": 0.14659049201287674,
      "eval_python_code_alpaca_emb_cos_sim": 0.7466869354248047,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009250980367050174,
      "eval_python_code_alpaca_emb_top1_equal": 0.0859375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8794357776641846,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.732,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.852,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.964,
      "eval_python_code_alpaca_num_pred_words": 43.494,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.804224803813547,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3290592140381472,
      "eval_python_code_alpaca_runtime": 10.0296,
      "eval_python_code_alpaca_samples_per_second": 49.853,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4722881605359816,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005548355310737996,
      "eval_python_code_alpaca_token_set_precision": 0.5336534711735468,
      "eval_python_code_alpaca_token_set_recall": 0.4473906501637632,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 149375
    },
    {
      "epoch": 28.68,
      "eval_wikibio_accuracy": 0.32565625,
      "eval_wikibio_bleu_score": 5.77750789569979,
      "eval_wikibio_bleu_score_sem": 0.20954370616634016,
      "eval_wikibio_emb_cos_sim": 0.7381787896156311,
      "eval_wikibio_emb_cos_sim_sem": 0.009751466949127452,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7005228996276855,
      "eval_wikibio_n_ngrams_match_1": 9.866,
      "eval_wikibio_n_ngrams_match_2": 3.284,
      "eval_wikibio_n_ngrams_match_3": 1.164,
      "eval_wikibio_num_pred_words": 35.906,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.468459771054384,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34995863214615996,
      "eval_wikibio_runtime": 10.0672,
      "eval_wikibio_samples_per_second": 49.666,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.31661482798084417,
      "eval_wikibio_token_set_f1_sem": 0.005622156353982404,
      "eval_wikibio_token_set_precision": 0.3233563077941803,
      "eval_wikibio_token_set_recall": 0.3284257556419153,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 149375
    },
    {
      "epoch": 28.68,
      "eval_nq_accuracy": 0.531,
      "eval_nq_bleu_score": 11.43955904137557,
      "eval_nq_bleu_score_sem": 0.4714544899456285,
      "eval_nq_emb_cos_sim": 0.8228099346160889,
      "eval_nq_emb_cos_sim_sem": 0.007503157803292987,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.166867733001709,
      "eval_nq_n_ngrams_match_1": 22.882,
      "eval_nq_n_ngrams_match_2": 8.356,
      "eval_nq_n_ngrams_match_3": 3.762,
      "eval_nq_num_pred_words": 49.19,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.730893676040196,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4448080950548505,
      "eval_nq_runtime": 10.5262,
      "eval_nq_samples_per_second": 47.501,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.45650527234887706,
      "eval_nq_token_set_f1_sem": 0.004953077512897897,
      "eval_nq_token_set_precision": 0.4146863164485548,
      "eval_nq_token_set_recall": 0.5178328184864912,
      "eval_nq_true_num_tokens": 64.0,
      "step": 149375
    },
    {
      "epoch": 28.68,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 149376
    },
    {
      "epoch": 28.68,
      "learning_rate": 0.001,
      "loss": 2.5464,
      "step": 149388
    },
    {
      "epoch": 28.69,
      "learning_rate": 0.001,
      "loss": 2.5383,
      "step": 149400
    },
    {
      "epoch": 28.69,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 149412
    },
    {
      "epoch": 28.69,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 149424
    },
    {
      "epoch": 28.69,
      "learning_rate": 0.001,
      "loss": 2.5507,
      "step": 149436
    },
    {
      "epoch": 28.7,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 149448
    },
    {
      "epoch": 28.7,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 149460
    },
    {
      "epoch": 28.7,
      "learning_rate": 0.001,
      "loss": 2.5518,
      "step": 149472
    },
    {
      "epoch": 28.7,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 149484
    },
    {
      "epoch": 28.71,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 149496
    },
    {
      "epoch": 28.71,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 149508
    },
    {
      "epoch": 28.71,
      "learning_rate": 0.001,
      "loss": 2.5407,
      "step": 149520
    },
    {
      "epoch": 28.71,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 149532
    },
    {
      "epoch": 28.71,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 149544
    },
    {
      "epoch": 28.72,
      "learning_rate": 0.001,
      "loss": 2.5467,
      "step": 149556
    },
    {
      "epoch": 28.72,
      "learning_rate": 0.001,
      "loss": 2.542,
      "step": 149568
    },
    {
      "epoch": 28.72,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 149580
    },
    {
      "epoch": 28.72,
      "learning_rate": 0.001,
      "loss": 2.5481,
      "step": 149592
    },
    {
      "epoch": 28.73,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 149604
    },
    {
      "epoch": 28.73,
      "learning_rate": 0.001,
      "loss": 2.5387,
      "step": 149616
    },
    {
      "epoch": 28.73,
      "learning_rate": 0.001,
      "loss": 2.5386,
      "step": 149628
    },
    {
      "epoch": 28.73,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 149640
    },
    {
      "epoch": 28.74,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 149652
    },
    {
      "epoch": 28.74,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 149664
    },
    {
      "epoch": 28.74,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 149676
    },
    {
      "epoch": 28.74,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 149688
    },
    {
      "epoch": 28.74,
      "learning_rate": 0.001,
      "loss": 2.542,
      "step": 149700
    },
    {
      "epoch": 28.75,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 149712
    },
    {
      "epoch": 28.75,
      "learning_rate": 0.001,
      "loss": 2.5432,
      "step": 149724
    },
    {
      "epoch": 28.75,
      "learning_rate": 0.001,
      "loss": 2.5424,
      "step": 149736
    },
    {
      "epoch": 28.75,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 149748
    },
    {
      "epoch": 28.76,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 149760
    },
    {
      "epoch": 28.76,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 149772
    },
    {
      "epoch": 28.76,
      "learning_rate": 0.001,
      "loss": 2.5492,
      "step": 149784
    },
    {
      "epoch": 28.76,
      "learning_rate": 0.001,
      "loss": 2.5427,
      "step": 149796
    },
    {
      "epoch": 28.76,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 149808
    },
    {
      "epoch": 28.77,
      "learning_rate": 0.001,
      "loss": 2.5424,
      "step": 149820
    },
    {
      "epoch": 28.77,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 149832
    },
    {
      "epoch": 28.77,
      "learning_rate": 0.001,
      "loss": 2.5424,
      "step": 149844
    },
    {
      "epoch": 28.77,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 149856
    },
    {
      "epoch": 28.78,
      "learning_rate": 0.001,
      "loss": 2.5386,
      "step": 149868
    },
    {
      "epoch": 28.78,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 149880
    },
    {
      "epoch": 28.78,
      "learning_rate": 0.001,
      "loss": 2.5523,
      "step": 149892
    },
    {
      "epoch": 28.78,
      "learning_rate": 0.001,
      "loss": 2.5488,
      "step": 149904
    },
    {
      "epoch": 28.79,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 149916
    },
    {
      "epoch": 28.79,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 149928
    },
    {
      "epoch": 28.79,
      "learning_rate": 0.001,
      "loss": 2.5542,
      "step": 149940
    },
    {
      "epoch": 28.79,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 149952
    },
    {
      "epoch": 28.79,
      "learning_rate": 0.001,
      "loss": 2.5408,
      "step": 149964
    },
    {
      "epoch": 28.8,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 149976
    },
    {
      "epoch": 28.8,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 149988
    },
    {
      "epoch": 28.8,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 150000
    },
    {
      "epoch": 28.8,
      "eval_ag_news_accuracy": 0.324375,
      "eval_ag_news_bleu_score": 4.7056634866851565,
      "eval_ag_news_bleu_score_sem": 0.14283871622014702,
      "eval_ag_news_emb_cos_sim": 0.8181965351104736,
      "eval_ag_news_emb_cos_sim_sem": 0.006325943640823077,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5157887935638428,
      "eval_ag_news_n_ngrams_match_1": 14.222,
      "eval_ag_news_n_ngrams_match_2": 3.108,
      "eval_ag_news_n_ngrams_match_3": 0.84,
      "eval_ag_news_num_pred_words": 46.81,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.64245441209795,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3520721827994989,
      "eval_ag_news_runtime": 10.4502,
      "eval_ag_news_samples_per_second": 47.846,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3556641217079547,
      "eval_ag_news_token_set_f1_sem": 0.004486645585032673,
      "eval_ag_news_token_set_precision": 0.34048718632930713,
      "eval_ag_news_token_set_recall": 0.38683471936444186,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 150000
    },
    {
      "epoch": 28.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.11628125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2923909119651347,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13009757907748862,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6801949739456177,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009209527624450693,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.215897798538208,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.38,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.03,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.818,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.63,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.925660089796313,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2180218869785031,
      "eval_anthropic_toxic_prompts_runtime": 10.4481,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.856,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35917874076071804,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006374282602473499,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44973359141432817,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3244919054013823,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 150000
    },
    {
      "epoch": 28.8,
      "eval_arxiv_accuracy": 0.34865625,
      "eval_arxiv_bleu_score": 4.365896166913111,
      "eval_arxiv_bleu_score_sem": 0.13988424476600514,
      "eval_arxiv_emb_cos_sim": 0.7636216282844543,
      "eval_arxiv_emb_cos_sim_sem": 0.0078008080671000315,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3648488521575928,
      "eval_arxiv_n_ngrams_match_1": 14.968,
      "eval_arxiv_n_ngrams_match_2": 2.932,
      "eval_arxiv_n_ngrams_match_3": 0.678,
      "eval_arxiv_num_pred_words": 39.714,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.92912439384603,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36020671358803785,
      "eval_arxiv_runtime": 10.3931,
      "eval_arxiv_samples_per_second": 48.109,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.35432192957842007,
      "eval_arxiv_token_set_f1_sem": 0.004564245948690358,
      "eval_arxiv_token_set_precision": 0.3035574178537733,
      "eval_arxiv_token_set_recall": 0.44815358109370773,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 150000
    },
    {
      "epoch": 28.8,
      "eval_python_code_alpaca_accuracy": 0.161125,
      "eval_python_code_alpaca_bleu_score": 4.755795493449977,
      "eval_python_code_alpaca_bleu_score_sem": 0.14549366900201757,
      "eval_python_code_alpaca_emb_cos_sim": 0.7626192569732666,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008396583405304378,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8663179874420166,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.08,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.046,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.056,
      "eval_python_code_alpaca_num_pred_words": 44.274,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.572197885178333,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33843861801528086,
      "eval_python_code_alpaca_runtime": 10.0438,
      "eval_python_code_alpaca_samples_per_second": 49.782,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.48493233617981535,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005579080368697369,
      "eval_python_code_alpaca_token_set_precision": 0.5509247502750639,
      "eval_python_code_alpaca_token_set_recall": 0.45493196296414046,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 150000
    },
    {
      "epoch": 28.8,
      "eval_wikibio_accuracy": 0.32121875,
      "eval_wikibio_bleu_score": 6.007782878271135,
      "eval_wikibio_bleu_score_sem": 0.21203780962119811,
      "eval_wikibio_emb_cos_sim": 0.7495360970497131,
      "eval_wikibio_emb_cos_sim_sem": 0.009200777543573128,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.74615740776062,
      "eval_wikibio_n_ngrams_match_1": 10.188,
      "eval_wikibio_n_ngrams_match_2": 3.494,
      "eval_wikibio_n_ngrams_match_3": 1.278,
      "eval_wikibio_num_pred_words": 36.532,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.35800434149235,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3581564612049972,
      "eval_wikibio_runtime": 9.8952,
      "eval_wikibio_samples_per_second": 50.529,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.32024840846497205,
      "eval_wikibio_token_set_f1_sem": 0.00550340127863008,
      "eval_wikibio_token_set_precision": 0.32988643236768794,
      "eval_wikibio_token_set_recall": 0.32797115482364864,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 150000
    },
    {
      "epoch": 28.8,
      "eval_nq_accuracy": 0.52965625,
      "eval_nq_bleu_score": 11.836088103151667,
      "eval_nq_bleu_score_sem": 0.481278945993792,
      "eval_nq_emb_cos_sim": 0.8382047414779663,
      "eval_nq_emb_cos_sim_sem": 0.007092566497822397,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1648871898651123,
      "eval_nq_n_ngrams_match_1": 23.172,
      "eval_nq_n_ngrams_match_2": 8.476,
      "eval_nq_n_ngrams_match_3": 3.916,
      "eval_nq_num_pred_words": 49.04,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.713618876882986,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4507962657434307,
      "eval_nq_runtime": 10.7526,
      "eval_nq_samples_per_second": 46.501,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.46396604445030754,
      "eval_nq_token_set_f1_sem": 0.004935143530120637,
      "eval_nq_token_set_precision": 0.42081194801954014,
      "eval_nq_token_set_recall": 0.5241043656630668,
      "eval_nq_true_num_tokens": 64.0,
      "step": 150000
    },
    {
      "epoch": 28.8,
      "learning_rate": 0.001,
      "loss": 2.5461,
      "step": 150012
    },
    {
      "epoch": 28.81,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 150024
    },
    {
      "epoch": 28.81,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 150036
    },
    {
      "epoch": 28.81,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 150048
    },
    {
      "epoch": 28.81,
      "learning_rate": 0.001,
      "loss": 2.5433,
      "step": 150060
    },
    {
      "epoch": 28.82,
      "learning_rate": 0.001,
      "loss": 2.5485,
      "step": 150072
    },
    {
      "epoch": 28.82,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 150084
    },
    {
      "epoch": 28.82,
      "learning_rate": 0.001,
      "loss": 2.5572,
      "step": 150096
    },
    {
      "epoch": 28.82,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 150108
    },
    {
      "epoch": 28.82,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 150120
    },
    {
      "epoch": 28.83,
      "learning_rate": 0.001,
      "loss": 2.5411,
      "step": 150132
    },
    {
      "epoch": 28.83,
      "learning_rate": 0.001,
      "loss": 2.5568,
      "step": 150144
    },
    {
      "epoch": 28.83,
      "learning_rate": 0.001,
      "loss": 2.5467,
      "step": 150156
    },
    {
      "epoch": 28.83,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 150168
    },
    {
      "epoch": 28.84,
      "learning_rate": 0.001,
      "loss": 2.5416,
      "step": 150180
    },
    {
      "epoch": 28.84,
      "learning_rate": 0.001,
      "loss": 2.5459,
      "step": 150192
    },
    {
      "epoch": 28.84,
      "learning_rate": 0.001,
      "loss": 2.5442,
      "step": 150204
    },
    {
      "epoch": 28.84,
      "learning_rate": 0.001,
      "loss": 2.545,
      "step": 150216
    },
    {
      "epoch": 28.85,
      "learning_rate": 0.001,
      "loss": 2.555,
      "step": 150228
    },
    {
      "epoch": 28.85,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 150240
    },
    {
      "epoch": 28.85,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 150252
    },
    {
      "epoch": 28.85,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 150264
    },
    {
      "epoch": 28.85,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 150276
    },
    {
      "epoch": 28.86,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 150288
    },
    {
      "epoch": 28.86,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 150300
    },
    {
      "epoch": 28.86,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 150312
    },
    {
      "epoch": 28.86,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 150324
    },
    {
      "epoch": 28.87,
      "learning_rate": 0.001,
      "loss": 2.5449,
      "step": 150336
    },
    {
      "epoch": 28.87,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 150348
    },
    {
      "epoch": 28.87,
      "learning_rate": 0.001,
      "loss": 2.5422,
      "step": 150360
    },
    {
      "epoch": 28.87,
      "learning_rate": 0.001,
      "loss": 2.5499,
      "step": 150372
    },
    {
      "epoch": 28.88,
      "learning_rate": 0.001,
      "loss": 2.5383,
      "step": 150384
    },
    {
      "epoch": 28.88,
      "learning_rate": 0.001,
      "loss": 2.5522,
      "step": 150396
    },
    {
      "epoch": 28.88,
      "learning_rate": 0.001,
      "loss": 2.5513,
      "step": 150408
    },
    {
      "epoch": 28.88,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 150420
    },
    {
      "epoch": 28.88,
      "learning_rate": 0.001,
      "loss": 2.5456,
      "step": 150432
    },
    {
      "epoch": 28.89,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 150444
    },
    {
      "epoch": 28.89,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 150456
    },
    {
      "epoch": 28.89,
      "learning_rate": 0.001,
      "loss": 2.5448,
      "step": 150468
    },
    {
      "epoch": 28.89,
      "learning_rate": 0.001,
      "loss": 2.5503,
      "step": 150480
    },
    {
      "epoch": 28.9,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 150492
    },
    {
      "epoch": 28.9,
      "learning_rate": 0.001,
      "loss": 2.549,
      "step": 150504
    },
    {
      "epoch": 28.9,
      "learning_rate": 0.001,
      "loss": 2.5487,
      "step": 150516
    },
    {
      "epoch": 28.9,
      "learning_rate": 0.001,
      "loss": 2.5435,
      "step": 150528
    },
    {
      "epoch": 28.91,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 150540
    },
    {
      "epoch": 28.91,
      "learning_rate": 0.001,
      "loss": 2.5405,
      "step": 150552
    },
    {
      "epoch": 28.91,
      "learning_rate": 0.001,
      "loss": 2.5461,
      "step": 150564
    },
    {
      "epoch": 28.91,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 150576
    },
    {
      "epoch": 28.91,
      "learning_rate": 0.001,
      "loss": 2.5521,
      "step": 150588
    },
    {
      "epoch": 28.92,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 150600
    },
    {
      "epoch": 28.92,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 150612
    },
    {
      "epoch": 28.92,
      "learning_rate": 0.001,
      "loss": 2.5565,
      "step": 150624
    },
    {
      "epoch": 28.92,
      "eval_ag_news_accuracy": 0.32484375,
      "eval_ag_news_bleu_score": 5.032894214158689,
      "eval_ag_news_bleu_score_sem": 0.15396842624079832,
      "eval_ag_news_emb_cos_sim": 0.8207932710647583,
      "eval_ag_news_emb_cos_sim_sem": 0.007046064035935705,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5192196369171143,
      "eval_ag_news_n_ngrams_match_1": 14.448,
      "eval_ag_news_n_ngrams_match_2": 3.286,
      "eval_ag_news_n_ngrams_match_3": 0.918,
      "eval_ag_news_num_pred_words": 46.55,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.75807462721796,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3584486115297454,
      "eval_ag_news_runtime": 10.6909,
      "eval_ag_news_samples_per_second": 46.769,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.36137190070270975,
      "eval_ag_news_token_set_f1_sem": 0.004324235428228524,
      "eval_ag_news_token_set_precision": 0.34654550122910605,
      "eval_ag_news_token_set_recall": 0.3927851697798266,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 150625
    },
    {
      "epoch": 28.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.1140625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2254266591380185,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12147323885363766,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.67835533618927,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008829004802670086,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.219359874725342,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.382,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.058,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.76,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.796,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.012104175704447,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21628296184474827,
      "eval_anthropic_toxic_prompts_runtime": 9.9529,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.237,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3604679689408342,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006571871501617973,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4525535627850996,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3241484427790535,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 150625
    },
    {
      "epoch": 28.92,
      "eval_arxiv_accuracy": 0.35090625,
      "eval_arxiv_bleu_score": 4.613563488489597,
      "eval_arxiv_bleu_score_sem": 0.13203620394768947,
      "eval_arxiv_emb_cos_sim": 0.7815990447998047,
      "eval_arxiv_emb_cos_sim_sem": 0.006599197464787395,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3649280071258545,
      "eval_arxiv_n_ngrams_match_1": 15.636,
      "eval_arxiv_n_ngrams_match_2": 3.14,
      "eval_arxiv_n_ngrams_match_3": 0.748,
      "eval_arxiv_num_pred_words": 41.43,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.9314143683995,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36972113661200434,
      "eval_arxiv_runtime": 10.4743,
      "eval_arxiv_samples_per_second": 47.736,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.36380014420776124,
      "eval_arxiv_token_set_f1_sem": 0.004212665916970724,
      "eval_arxiv_token_set_precision": 0.31665380363810286,
      "eval_arxiv_token_set_recall": 0.44348842155375345,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 150625
    },
    {
      "epoch": 28.92,
      "eval_python_code_alpaca_accuracy": 0.16103125,
      "eval_python_code_alpaca_bleu_score": 4.778076875730435,
      "eval_python_code_alpaca_bleu_score_sem": 0.1524799074416788,
      "eval_python_code_alpaca_emb_cos_sim": 0.7549819946289062,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00910292902494353,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.882640838623047,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.906,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.056,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.056,
      "eval_python_code_alpaca_num_pred_words": 43.728,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.861379973609917,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3354974984194967,
      "eval_python_code_alpaca_runtime": 10.0086,
      "eval_python_code_alpaca_samples_per_second": 49.957,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.47508127806752826,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054941455858405425,
      "eval_python_code_alpaca_token_set_precision": 0.5439744361355182,
      "eval_python_code_alpaca_token_set_recall": 0.441518002428255,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 150625
    },
    {
      "epoch": 28.92,
      "eval_wikibio_accuracy": 0.32446875,
      "eval_wikibio_bleu_score": 5.6012696989931285,
      "eval_wikibio_bleu_score_sem": 0.19973431937002056,
      "eval_wikibio_emb_cos_sim": 0.7597984075546265,
      "eval_wikibio_emb_cos_sim_sem": 0.008000335199217652,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7035703659057617,
      "eval_wikibio_n_ngrams_match_1": 9.932,
      "eval_wikibio_n_ngrams_match_2": 3.232,
      "eval_wikibio_n_ngrams_match_3": 1.158,
      "eval_wikibio_num_pred_words": 36.442,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.59197414488657,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3504889782888023,
      "eval_wikibio_runtime": 10.0745,
      "eval_wikibio_samples_per_second": 49.63,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.31785949312849954,
      "eval_wikibio_token_set_f1_sem": 0.0053684517034468734,
      "eval_wikibio_token_set_precision": 0.324917014408283,
      "eval_wikibio_token_set_recall": 0.32865650845956945,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 150625
    },
    {
      "epoch": 28.92,
      "eval_nq_accuracy": 0.52971875,
      "eval_nq_bleu_score": 11.767434355091405,
      "eval_nq_bleu_score_sem": 0.4767528394721157,
      "eval_nq_emb_cos_sim": 0.8367584943771362,
      "eval_nq_emb_cos_sim_sem": 0.00808737436829322,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1646652221679688,
      "eval_nq_n_ngrams_match_1": 23.14,
      "eval_nq_n_ngrams_match_2": 8.458,
      "eval_nq_n_ngrams_match_3": 3.94,
      "eval_nq_num_pred_words": 49.108,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.71168494960973,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4493522713913332,
      "eval_nq_runtime": 10.8307,
      "eval_nq_samples_per_second": 46.165,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.46213798915874227,
      "eval_nq_token_set_f1_sem": 0.0048903057913209326,
      "eval_nq_token_set_precision": 0.41949699954193087,
      "eval_nq_token_set_recall": 0.5234541896359106,
      "eval_nq_true_num_tokens": 64.0,
      "step": 150625
    },
    {
      "epoch": 28.92,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 150636
    },
    {
      "epoch": 28.93,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 150648
    },
    {
      "epoch": 28.93,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 150660
    },
    {
      "epoch": 28.93,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 150672
    },
    {
      "epoch": 28.93,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 150684
    },
    {
      "epoch": 28.94,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 150696
    },
    {
      "epoch": 28.94,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 150708
    },
    {
      "epoch": 28.94,
      "learning_rate": 0.001,
      "loss": 2.5484,
      "step": 150720
    },
    {
      "epoch": 28.94,
      "learning_rate": 0.001,
      "loss": 2.5528,
      "step": 150732
    },
    {
      "epoch": 28.94,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 150744
    },
    {
      "epoch": 28.95,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 150756
    },
    {
      "epoch": 28.95,
      "learning_rate": 0.001,
      "loss": 2.5318,
      "step": 150768
    },
    {
      "epoch": 28.95,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 150780
    },
    {
      "epoch": 28.95,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 150792
    },
    {
      "epoch": 28.96,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 150804
    },
    {
      "epoch": 28.96,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 150816
    },
    {
      "epoch": 28.96,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 150828
    },
    {
      "epoch": 28.96,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 150840
    },
    {
      "epoch": 28.97,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 150852
    },
    {
      "epoch": 28.97,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 150864
    },
    {
      "epoch": 28.97,
      "learning_rate": 0.001,
      "loss": 2.5496,
      "step": 150876
    },
    {
      "epoch": 28.97,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 150888
    },
    {
      "epoch": 28.97,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 150900
    },
    {
      "epoch": 28.98,
      "learning_rate": 0.001,
      "loss": 2.5448,
      "step": 150912
    },
    {
      "epoch": 28.98,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 150924
    },
    {
      "epoch": 28.98,
      "learning_rate": 0.001,
      "loss": 2.5536,
      "step": 150936
    },
    {
      "epoch": 28.98,
      "learning_rate": 0.001,
      "loss": 2.55,
      "step": 150948
    },
    {
      "epoch": 28.99,
      "learning_rate": 0.001,
      "loss": 2.5567,
      "step": 150960
    },
    {
      "epoch": 28.99,
      "learning_rate": 0.001,
      "loss": 2.5452,
      "step": 150972
    },
    {
      "epoch": 28.99,
      "learning_rate": 0.001,
      "loss": 2.5576,
      "step": 150984
    },
    {
      "epoch": 28.99,
      "learning_rate": 0.001,
      "loss": 2.5472,
      "step": 150996
    },
    {
      "epoch": 29.0,
      "learning_rate": 0.001,
      "loss": 2.5341,
      "step": 151008
    },
    {
      "epoch": 29.0,
      "learning_rate": 0.001,
      "loss": 2.5476,
      "step": 151020
    },
    {
      "epoch": 29.0,
      "learning_rate": 0.001,
      "loss": 2.5569,
      "step": 151032
    },
    {
      "epoch": 29.0,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 151044
    },
    {
      "epoch": 29.0,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 151056
    },
    {
      "epoch": 29.01,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 151068
    },
    {
      "epoch": 29.01,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 151080
    },
    {
      "epoch": 29.01,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 151092
    },
    {
      "epoch": 29.01,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 151104
    },
    {
      "epoch": 29.02,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 151116
    },
    {
      "epoch": 29.02,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 151128
    },
    {
      "epoch": 29.02,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 151140
    },
    {
      "epoch": 29.02,
      "learning_rate": 0.001,
      "loss": 2.5274,
      "step": 151152
    },
    {
      "epoch": 29.03,
      "learning_rate": 0.001,
      "loss": 2.529,
      "step": 151164
    },
    {
      "epoch": 29.03,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 151176
    },
    {
      "epoch": 29.03,
      "learning_rate": 0.001,
      "loss": 2.5282,
      "step": 151188
    },
    {
      "epoch": 29.03,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 151200
    },
    {
      "epoch": 29.03,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 151212
    },
    {
      "epoch": 29.04,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 151224
    },
    {
      "epoch": 29.04,
      "learning_rate": 0.001,
      "loss": 2.5287,
      "step": 151236
    },
    {
      "epoch": 29.04,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 151248
    },
    {
      "epoch": 29.04,
      "eval_ag_news_accuracy": 0.3245,
      "eval_ag_news_bleu_score": 4.844542290952034,
      "eval_ag_news_bleu_score_sem": 0.1438908421436427,
      "eval_ag_news_emb_cos_sim": 0.8165936470031738,
      "eval_ag_news_emb_cos_sim_sem": 0.007086061395172543,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.522404670715332,
      "eval_ag_news_n_ngrams_match_1": 14.45,
      "eval_ag_news_n_ngrams_match_2": 3.27,
      "eval_ag_news_n_ngrams_match_3": 0.912,
      "eval_ag_news_num_pred_words": 46.952,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.86576664618892,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3578247568636002,
      "eval_ag_news_runtime": 10.6168,
      "eval_ag_news_samples_per_second": 47.095,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.3582750992665711,
      "eval_ag_news_token_set_f1_sem": 0.004282706102249708,
      "eval_ag_news_token_set_precision": 0.34616911542423856,
      "eval_ag_news_token_set_recall": 0.3862545126528324,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 151250
    },
    {
      "epoch": 29.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.11459375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2409256335003986,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12571358938033889,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6790622472763062,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009037667264202444,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2250733375549316,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.044,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.786,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.104,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.15541892508935,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21603219162426923,
      "eval_anthropic_toxic_prompts_runtime": 10.2764,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.655,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3552492838968309,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006414730604567983,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44905100122164215,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3194184182004095,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 151250
    },
    {
      "epoch": 29.04,
      "eval_arxiv_accuracy": 0.35028125,
      "eval_arxiv_bleu_score": 4.420792034312053,
      "eval_arxiv_bleu_score_sem": 0.12908881180779902,
      "eval_arxiv_emb_cos_sim": 0.7626398205757141,
      "eval_arxiv_emb_cos_sim_sem": 0.009779892364270019,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3740620613098145,
      "eval_arxiv_n_ngrams_match_1": 15.22,
      "eval_arxiv_n_ngrams_match_2": 3.034,
      "eval_arxiv_n_ngrams_match_3": 0.696,
      "eval_arxiv_num_pred_words": 40.434,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.196886045490295,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36513158991505124,
      "eval_arxiv_runtime": 10.3092,
      "eval_arxiv_samples_per_second": 48.501,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.35699568139393784,
      "eval_arxiv_token_set_f1_sem": 0.004389136280561084,
      "eval_arxiv_token_set_precision": 0.3102832813633865,
      "eval_arxiv_token_set_recall": 0.43479062949989744,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 151250
    },
    {
      "epoch": 29.04,
      "eval_python_code_alpaca_accuracy": 0.161,
      "eval_python_code_alpaca_bleu_score": 4.604428890700413,
      "eval_python_code_alpaca_bleu_score_sem": 0.1443318173906819,
      "eval_python_code_alpaca_emb_cos_sim": 0.7585429549217224,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008154448443520276,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8876655101776123,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.912,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.892,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.982,
      "eval_python_code_alpaca_num_pred_words": 43.624,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.95135339563683,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3371788273305638,
      "eval_python_code_alpaca_runtime": 10.0568,
      "eval_python_code_alpaca_samples_per_second": 49.718,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.4795367750842602,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005504445672513359,
      "eval_python_code_alpaca_token_set_precision": 0.5424712281321773,
      "eval_python_code_alpaca_token_set_recall": 0.44800150958865204,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 151250
    },
    {
      "epoch": 29.04,
      "eval_wikibio_accuracy": 0.3276875,
      "eval_wikibio_bleu_score": 6.104190922202895,
      "eval_wikibio_bleu_score_sem": 0.2098002540460547,
      "eval_wikibio_emb_cos_sim": 0.7561072111129761,
      "eval_wikibio_emb_cos_sim_sem": 0.007763181468091931,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7275009155273438,
      "eval_wikibio_n_ngrams_match_1": 10.422,
      "eval_wikibio_n_ngrams_match_2": 3.576,
      "eval_wikibio_n_ngrams_match_3": 1.276,
      "eval_wikibio_num_pred_words": 36.608,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.57507859546454,
      "eval_wikibio_pred_num_tokens": 62.9609375,
      "eval_wikibio_rouge_score": 0.36571155693257246,
      "eval_wikibio_runtime": 10.1138,
      "eval_wikibio_samples_per_second": 49.437,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.32794561751383144,
      "eval_wikibio_token_set_f1_sem": 0.005204679436361614,
      "eval_wikibio_token_set_precision": 0.3378503939148683,
      "eval_wikibio_token_set_recall": 0.33316220151351056,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 151250
    },
    {
      "epoch": 29.04,
      "eval_nq_accuracy": 0.5314375,
      "eval_nq_bleu_score": 11.71077779044643,
      "eval_nq_bleu_score_sem": 0.47619231100809106,
      "eval_nq_emb_cos_sim": 0.8266245722770691,
      "eval_nq_emb_cos_sim_sem": 0.007954045447207039,
      "eval_nq_emb_top1_equal": 0.21875,
      "eval_nq_emb_top1_equal_sem": 0.03668319712192295,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.164546489715576,
      "eval_nq_n_ngrams_match_1": 23.03,
      "eval_nq_n_ngrams_match_2": 8.458,
      "eval_nq_n_ngrams_match_3": 3.884,
      "eval_nq_num_pred_words": 49.078,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.710650651294795,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44635495070588516,
      "eval_nq_runtime": 10.4427,
      "eval_nq_samples_per_second": 47.88,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4605788623881391,
      "eval_nq_token_set_f1_sem": 0.004967753861172408,
      "eval_nq_token_set_precision": 0.4196247129328556,
      "eval_nq_token_set_recall": 0.518356159964923,
      "eval_nq_true_num_tokens": 64.0,
      "step": 151250
    },
    {
      "epoch": 29.04,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 151260
    },
    {
      "epoch": 29.05,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 151272
    },
    {
      "epoch": 29.05,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 151284
    },
    {
      "epoch": 29.05,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 151296
    },
    {
      "epoch": 29.05,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 151308
    },
    {
      "epoch": 29.06,
      "learning_rate": 0.001,
      "loss": 2.5179,
      "step": 151320
    },
    {
      "epoch": 29.06,
      "learning_rate": 0.001,
      "loss": 2.5264,
      "step": 151332
    },
    {
      "epoch": 29.06,
      "learning_rate": 0.001,
      "loss": 2.5273,
      "step": 151344
    },
    {
      "epoch": 29.06,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 151356
    },
    {
      "epoch": 29.06,
      "learning_rate": 0.001,
      "loss": 2.5283,
      "step": 151368
    },
    {
      "epoch": 29.07,
      "learning_rate": 0.001,
      "loss": 2.5361,
      "step": 151380
    },
    {
      "epoch": 29.07,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 151392
    },
    {
      "epoch": 29.07,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 151404
    },
    {
      "epoch": 29.07,
      "learning_rate": 0.001,
      "loss": 2.5361,
      "step": 151416
    },
    {
      "epoch": 29.08,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 151428
    },
    {
      "epoch": 29.08,
      "learning_rate": 0.001,
      "loss": 2.5206,
      "step": 151440
    },
    {
      "epoch": 29.08,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 151452
    },
    {
      "epoch": 29.08,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 151464
    },
    {
      "epoch": 29.09,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 151476
    },
    {
      "epoch": 29.09,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 151488
    },
    {
      "epoch": 29.09,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 151500
    },
    {
      "epoch": 29.09,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 151512
    },
    {
      "epoch": 29.09,
      "learning_rate": 0.001,
      "loss": 2.5319,
      "step": 151524
    },
    {
      "epoch": 29.1,
      "learning_rate": 0.001,
      "loss": 2.5273,
      "step": 151536
    },
    {
      "epoch": 29.1,
      "learning_rate": 0.001,
      "loss": 2.5283,
      "step": 151548
    },
    {
      "epoch": 29.1,
      "learning_rate": 0.001,
      "loss": 2.5343,
      "step": 151560
    },
    {
      "epoch": 29.1,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 151572
    },
    {
      "epoch": 29.11,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 151584
    },
    {
      "epoch": 29.11,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 151596
    },
    {
      "epoch": 29.11,
      "learning_rate": 0.001,
      "loss": 2.5348,
      "step": 151608
    },
    {
      "epoch": 29.11,
      "learning_rate": 0.001,
      "loss": 2.5278,
      "step": 151620
    },
    {
      "epoch": 29.12,
      "learning_rate": 0.001,
      "loss": 2.529,
      "step": 151632
    },
    {
      "epoch": 29.12,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 151644
    },
    {
      "epoch": 29.12,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 151656
    },
    {
      "epoch": 29.12,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 151668
    },
    {
      "epoch": 29.12,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 151680
    },
    {
      "epoch": 29.13,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 151692
    },
    {
      "epoch": 29.13,
      "learning_rate": 0.001,
      "loss": 2.5302,
      "step": 151704
    },
    {
      "epoch": 29.13,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 151716
    },
    {
      "epoch": 29.13,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 151728
    },
    {
      "epoch": 29.14,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 151740
    },
    {
      "epoch": 29.14,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 151752
    },
    {
      "epoch": 29.14,
      "learning_rate": 0.001,
      "loss": 2.5312,
      "step": 151764
    },
    {
      "epoch": 29.14,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 151776
    },
    {
      "epoch": 29.15,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 151788
    },
    {
      "epoch": 29.15,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 151800
    },
    {
      "epoch": 29.15,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 151812
    },
    {
      "epoch": 29.15,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 151824
    },
    {
      "epoch": 29.15,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 151836
    },
    {
      "epoch": 29.16,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 151848
    },
    {
      "epoch": 29.16,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 151860
    },
    {
      "epoch": 29.16,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 151872
    },
    {
      "epoch": 29.16,
      "eval_ag_news_accuracy": 0.3241875,
      "eval_ag_news_bleu_score": 5.000084158728007,
      "eval_ag_news_bleu_score_sem": 0.1589416555240998,
      "eval_ag_news_emb_cos_sim": 0.817887008190155,
      "eval_ag_news_emb_cos_sim_sem": 0.006153123122682188,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.51975154876709,
      "eval_ag_news_n_ngrams_match_1": 14.144,
      "eval_ag_news_n_ngrams_match_2": 3.234,
      "eval_ag_news_n_ngrams_match_3": 0.95,
      "eval_ag_news_num_pred_words": 46.55,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.77603572358105,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3541474022600284,
      "eval_ag_news_runtime": 10.589,
      "eval_ag_news_samples_per_second": 47.219,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.35428814364454886,
      "eval_ag_news_token_set_f1_sem": 0.004478389152801508,
      "eval_ag_news_token_set_precision": 0.3374124046608325,
      "eval_ag_news_token_set_recall": 0.3881005435868758,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 151875
    },
    {
      "epoch": 29.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.1148125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1510851672400513,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11870211895287368,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6789723634719849,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009058141412955627,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2354660034179688,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.226,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.992,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.186,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.418213993977535,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21109292456923273,
      "eval_anthropic_toxic_prompts_runtime": 10.1236,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.39,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3611426957589993,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006721470213182206,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44139786066338826,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3322797000601225,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 151875
    },
    {
      "epoch": 29.16,
      "eval_arxiv_accuracy": 0.35125,
      "eval_arxiv_bleu_score": 4.538700267590577,
      "eval_arxiv_bleu_score_sem": 0.13352372088967007,
      "eval_arxiv_emb_cos_sim": 0.7694689035415649,
      "eval_arxiv_emb_cos_sim_sem": 0.009277152242886423,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3711049556732178,
      "eval_arxiv_n_ngrams_match_1": 15.302,
      "eval_arxiv_n_ngrams_match_2": 3.124,
      "eval_arxiv_n_ngrams_match_3": 0.75,
      "eval_arxiv_num_pred_words": 40.328,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.110675299158352,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36464202466603113,
      "eval_arxiv_runtime": 10.899,
      "eval_arxiv_samples_per_second": 45.876,
      "eval_arxiv_steps_per_second": 0.092,
      "eval_arxiv_token_set_f1": 0.357238884925682,
      "eval_arxiv_token_set_f1_sem": 0.004458090399046446,
      "eval_arxiv_token_set_precision": 0.3092882860631226,
      "eval_arxiv_token_set_recall": 0.4381859232065277,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 151875
    },
    {
      "epoch": 29.16,
      "eval_python_code_alpaca_accuracy": 0.160625,
      "eval_python_code_alpaca_bleu_score": 4.545048138710379,
      "eval_python_code_alpaca_bleu_score_sem": 0.1453256164865113,
      "eval_python_code_alpaca_emb_cos_sim": 0.7475377917289734,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009837034271227679,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8917899131774902,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.738,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.862,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.986,
      "eval_python_code_alpaca_num_pred_words": 43.544,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.025544904102944,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33105729840874065,
      "eval_python_code_alpaca_runtime": 9.9601,
      "eval_python_code_alpaca_samples_per_second": 50.2,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4723053265610159,
      "eval_python_code_alpaca_token_set_f1_sem": 0.006014967568017886,
      "eval_python_code_alpaca_token_set_precision": 0.5321521219672057,
      "eval_python_code_alpaca_token_set_recall": 0.4538138564617063,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 151875
    },
    {
      "epoch": 29.16,
      "eval_wikibio_accuracy": 0.3276875,
      "eval_wikibio_bleu_score": 6.086745486686603,
      "eval_wikibio_bleu_score_sem": 0.2122527282361847,
      "eval_wikibio_emb_cos_sim": 0.7429611682891846,
      "eval_wikibio_emb_cos_sim_sem": 0.008319091814505544,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7398264408111572,
      "eval_wikibio_n_ngrams_match_1": 10.28,
      "eval_wikibio_n_ngrams_match_2": 3.468,
      "eval_wikibio_n_ngrams_match_3": 1.268,
      "eval_wikibio_num_pred_words": 36.462,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 42.090684305989775,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36243806599945533,
      "eval_wikibio_runtime": 10.2282,
      "eval_wikibio_samples_per_second": 48.885,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.3258695637764661,
      "eval_wikibio_token_set_f1_sem": 0.005100899580146746,
      "eval_wikibio_token_set_precision": 0.335925905970325,
      "eval_wikibio_token_set_recall": 0.3308497906325442,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 151875
    },
    {
      "epoch": 29.16,
      "eval_nq_accuracy": 0.53078125,
      "eval_nq_bleu_score": 11.680687711048577,
      "eval_nq_bleu_score_sem": 0.47933077030978827,
      "eval_nq_emb_cos_sim": 0.8354138731956482,
      "eval_nq_emb_cos_sim_sem": 0.0065807114619038124,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.163125991821289,
      "eval_nq_n_ngrams_match_1": 23.024,
      "eval_nq_n_ngrams_match_2": 8.48,
      "eval_nq_n_ngrams_match_3": 3.9,
      "eval_nq_num_pred_words": 48.906,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.698285974464595,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4476638171724451,
      "eval_nq_runtime": 10.3472,
      "eval_nq_samples_per_second": 48.322,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.46071239013233994,
      "eval_nq_token_set_f1_sem": 0.005077971257057672,
      "eval_nq_token_set_precision": 0.4185660630895155,
      "eval_nq_token_set_recall": 0.5199926249950194,
      "eval_nq_true_num_tokens": 64.0,
      "step": 151875
    },
    {
      "epoch": 29.16,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 151884
    },
    {
      "epoch": 29.17,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 151896
    },
    {
      "epoch": 29.17,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 151908
    },
    {
      "epoch": 29.17,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 151920
    },
    {
      "epoch": 29.17,
      "learning_rate": 0.001,
      "loss": 2.5375,
      "step": 151932
    },
    {
      "epoch": 29.18,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 151944
    },
    {
      "epoch": 29.18,
      "learning_rate": 0.001,
      "loss": 2.5321,
      "step": 151956
    },
    {
      "epoch": 29.18,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 151968
    },
    {
      "epoch": 29.18,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 151980
    },
    {
      "epoch": 29.18,
      "learning_rate": 0.001,
      "loss": 2.5432,
      "step": 151992
    },
    {
      "epoch": 29.19,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 152004
    },
    {
      "epoch": 29.19,
      "learning_rate": 0.001,
      "loss": 2.5386,
      "step": 152016
    },
    {
      "epoch": 29.19,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 152028
    },
    {
      "epoch": 29.19,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 152040
    },
    {
      "epoch": 29.2,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 152052
    },
    {
      "epoch": 29.2,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 152064
    },
    {
      "epoch": 29.2,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 152076
    },
    {
      "epoch": 29.2,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 152088
    },
    {
      "epoch": 29.21,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 152100
    },
    {
      "epoch": 29.21,
      "learning_rate": 0.001,
      "loss": 2.5227,
      "step": 152112
    },
    {
      "epoch": 29.21,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 152124
    },
    {
      "epoch": 29.21,
      "learning_rate": 0.001,
      "loss": 2.5341,
      "step": 152136
    },
    {
      "epoch": 29.21,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 152148
    },
    {
      "epoch": 29.22,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 152160
    },
    {
      "epoch": 29.22,
      "learning_rate": 0.001,
      "loss": 2.5329,
      "step": 152172
    },
    {
      "epoch": 29.22,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 152184
    },
    {
      "epoch": 29.22,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 152196
    },
    {
      "epoch": 29.23,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 152208
    },
    {
      "epoch": 29.23,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 152220
    },
    {
      "epoch": 29.23,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 152232
    },
    {
      "epoch": 29.23,
      "learning_rate": 0.001,
      "loss": 2.5395,
      "step": 152244
    },
    {
      "epoch": 29.24,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 152256
    },
    {
      "epoch": 29.24,
      "learning_rate": 0.001,
      "loss": 2.5318,
      "step": 152268
    },
    {
      "epoch": 29.24,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 152280
    },
    {
      "epoch": 29.24,
      "learning_rate": 0.001,
      "loss": 2.5348,
      "step": 152292
    },
    {
      "epoch": 29.24,
      "learning_rate": 0.001,
      "loss": 2.5332,
      "step": 152304
    },
    {
      "epoch": 29.25,
      "learning_rate": 0.001,
      "loss": 2.5256,
      "step": 152316
    },
    {
      "epoch": 29.25,
      "learning_rate": 0.001,
      "loss": 2.5351,
      "step": 152328
    },
    {
      "epoch": 29.25,
      "learning_rate": 0.001,
      "loss": 2.544,
      "step": 152340
    },
    {
      "epoch": 29.25,
      "learning_rate": 0.001,
      "loss": 2.5265,
      "step": 152352
    },
    {
      "epoch": 29.26,
      "learning_rate": 0.001,
      "loss": 2.5325,
      "step": 152364
    },
    {
      "epoch": 29.26,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 152376
    },
    {
      "epoch": 29.26,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 152388
    },
    {
      "epoch": 29.26,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 152400
    },
    {
      "epoch": 29.26,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 152412
    },
    {
      "epoch": 29.27,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 152424
    },
    {
      "epoch": 29.27,
      "learning_rate": 0.001,
      "loss": 2.5282,
      "step": 152436
    },
    {
      "epoch": 29.27,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 152448
    },
    {
      "epoch": 29.27,
      "learning_rate": 0.001,
      "loss": 2.5351,
      "step": 152460
    },
    {
      "epoch": 29.28,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 152472
    },
    {
      "epoch": 29.28,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 152484
    },
    {
      "epoch": 29.28,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 152496
    },
    {
      "epoch": 29.28,
      "eval_ag_news_accuracy": 0.32571875,
      "eval_ag_news_bleu_score": 4.8357952399709845,
      "eval_ag_news_bleu_score_sem": 0.15108849088612306,
      "eval_ag_news_emb_cos_sim": 0.8196853995323181,
      "eval_ag_news_emb_cos_sim_sem": 0.006709633403142574,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.519531488418579,
      "eval_ag_news_n_ngrams_match_1": 14.172,
      "eval_ag_news_n_ngrams_match_2": 3.216,
      "eval_ag_news_n_ngrams_match_3": 0.914,
      "eval_ag_news_num_pred_words": 46.554,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.76860377515698,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3521433167175206,
      "eval_ag_news_runtime": 10.3514,
      "eval_ag_news_samples_per_second": 48.302,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3526650207991813,
      "eval_ag_news_token_set_f1_sem": 0.004478019878389956,
      "eval_ag_news_token_set_precision": 0.33673514573567986,
      "eval_ag_news_token_set_recall": 0.38482114364734127,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 152500
    },
    {
      "epoch": 29.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.11440625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2041181389940774,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12345400482851772,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6702789068222046,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009755586145024885,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2476284503936768,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.302,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.616,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.729249315044623,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2155460628838602,
      "eval_anthropic_toxic_prompts_runtime": 9.836,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.833,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36339760380715835,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006638544596447055,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4448925712023457,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33426795248847874,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 152500
    },
    {
      "epoch": 29.28,
      "eval_arxiv_accuracy": 0.34853125,
      "eval_arxiv_bleu_score": 4.392240454264218,
      "eval_arxiv_bleu_score_sem": 0.12801832934646346,
      "eval_arxiv_emb_cos_sim": 0.7635323405265808,
      "eval_arxiv_emb_cos_sim_sem": 0.007354411038185914,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3708224296569824,
      "eval_arxiv_n_ngrams_match_1": 15.086,
      "eval_arxiv_n_ngrams_match_2": 2.958,
      "eval_arxiv_n_ngrams_match_3": 0.684,
      "eval_arxiv_num_pred_words": 40.34,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.10245193774763,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.359936562999149,
      "eval_arxiv_runtime": 10.218,
      "eval_arxiv_samples_per_second": 48.933,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3514872651538017,
      "eval_arxiv_token_set_f1_sem": 0.00444384618200801,
      "eval_arxiv_token_set_precision": 0.305201980315453,
      "eval_arxiv_token_set_recall": 0.429694570079846,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 152500
    },
    {
      "epoch": 29.28,
      "eval_python_code_alpaca_accuracy": 0.16125,
      "eval_python_code_alpaca_bleu_score": 4.708651767662293,
      "eval_python_code_alpaca_bleu_score_sem": 0.1450505840872041,
      "eval_python_code_alpaca_emb_cos_sim": 0.7512420415878296,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009261219417060288,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.871154308319092,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.822,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.946,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.01,
      "eval_python_code_alpaca_num_pred_words": 43.238,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.65738851121748,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3345757801778281,
      "eval_python_code_alpaca_runtime": 9.7852,
      "eval_python_code_alpaca_samples_per_second": 51.098,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4749499215819101,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005438912566046437,
      "eval_python_code_alpaca_token_set_precision": 0.5363635997315532,
      "eval_python_code_alpaca_token_set_recall": 0.4467668670965634,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 152500
    },
    {
      "epoch": 29.28,
      "eval_wikibio_accuracy": 0.3243125,
      "eval_wikibio_bleu_score": 6.170355489342777,
      "eval_wikibio_bleu_score_sem": 0.21626342709264484,
      "eval_wikibio_emb_cos_sim": 0.7447891235351562,
      "eval_wikibio_emb_cos_sim_sem": 0.00958461481986241,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7073071002960205,
      "eval_wikibio_n_ngrams_match_1": 10.262,
      "eval_wikibio_n_ngrams_match_2": 3.528,
      "eval_wikibio_n_ngrams_match_3": 1.276,
      "eval_wikibio_num_pred_words": 36.562,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.74393932056487,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3619102206975344,
      "eval_wikibio_runtime": 9.8393,
      "eval_wikibio_samples_per_second": 50.816,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.32288781745287093,
      "eval_wikibio_token_set_f1_sem": 0.005292243994634853,
      "eval_wikibio_token_set_precision": 0.3318492022677943,
      "eval_wikibio_token_set_recall": 0.33011026734187837,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 152500
    },
    {
      "epoch": 29.28,
      "eval_nq_accuracy": 0.5313125,
      "eval_nq_bleu_score": 11.604526963423785,
      "eval_nq_bleu_score_sem": 0.47233919225509874,
      "eval_nq_emb_cos_sim": 0.8328354358673096,
      "eval_nq_emb_cos_sim_sem": 0.006959335041323397,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1629464626312256,
      "eval_nq_n_ngrams_match_1": 23.1,
      "eval_nq_n_ngrams_match_2": 8.412,
      "eval_nq_n_ngrams_match_3": 3.856,
      "eval_nq_num_pred_words": 48.816,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.696724518396325,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4484148132987383,
      "eval_nq_runtime": 10.8999,
      "eval_nq_samples_per_second": 45.872,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.4622161706914341,
      "eval_nq_token_set_f1_sem": 0.004978477992915881,
      "eval_nq_token_set_precision": 0.4203228620083173,
      "eval_nq_token_set_recall": 0.5211142140551582,
      "eval_nq_true_num_tokens": 64.0,
      "step": 152500
    },
    {
      "epoch": 29.28,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 152508
    },
    {
      "epoch": 29.29,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 152520
    },
    {
      "epoch": 29.29,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 152532
    },
    {
      "epoch": 29.29,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 152544
    },
    {
      "epoch": 29.29,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 152556
    },
    {
      "epoch": 29.29,
      "learning_rate": 0.001,
      "loss": 2.5349,
      "step": 152568
    },
    {
      "epoch": 29.3,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 152580
    },
    {
      "epoch": 29.3,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 152592
    },
    {
      "epoch": 29.3,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 152604
    },
    {
      "epoch": 29.3,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 152616
    },
    {
      "epoch": 29.31,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 152628
    },
    {
      "epoch": 29.31,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 152640
    },
    {
      "epoch": 29.31,
      "learning_rate": 0.001,
      "loss": 2.5522,
      "step": 152652
    },
    {
      "epoch": 29.31,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 152664
    },
    {
      "epoch": 29.32,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 152676
    },
    {
      "epoch": 29.32,
      "learning_rate": 0.001,
      "loss": 2.5464,
      "step": 152688
    },
    {
      "epoch": 29.32,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 152700
    },
    {
      "epoch": 29.32,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 152712
    },
    {
      "epoch": 29.32,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 152724
    },
    {
      "epoch": 29.33,
      "learning_rate": 0.001,
      "loss": 2.5264,
      "step": 152736
    },
    {
      "epoch": 29.33,
      "learning_rate": 0.001,
      "loss": 2.5321,
      "step": 152748
    },
    {
      "epoch": 29.33,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 152760
    },
    {
      "epoch": 29.33,
      "learning_rate": 0.001,
      "loss": 2.5343,
      "step": 152772
    },
    {
      "epoch": 29.34,
      "learning_rate": 0.001,
      "loss": 2.5322,
      "step": 152784
    },
    {
      "epoch": 29.34,
      "learning_rate": 0.001,
      "loss": 2.5321,
      "step": 152796
    },
    {
      "epoch": 29.34,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 152808
    },
    {
      "epoch": 29.34,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 152820
    },
    {
      "epoch": 29.35,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 152832
    },
    {
      "epoch": 29.35,
      "learning_rate": 0.001,
      "loss": 2.533,
      "step": 152844
    },
    {
      "epoch": 29.35,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 152856
    },
    {
      "epoch": 29.35,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 152868
    },
    {
      "epoch": 29.35,
      "learning_rate": 0.001,
      "loss": 2.5387,
      "step": 152880
    },
    {
      "epoch": 29.36,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 152892
    },
    {
      "epoch": 29.36,
      "learning_rate": 0.001,
      "loss": 2.545,
      "step": 152904
    },
    {
      "epoch": 29.36,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 152916
    },
    {
      "epoch": 29.36,
      "learning_rate": 0.001,
      "loss": 2.539,
      "step": 152928
    },
    {
      "epoch": 29.37,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 152940
    },
    {
      "epoch": 29.37,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 152952
    },
    {
      "epoch": 29.37,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 152964
    },
    {
      "epoch": 29.37,
      "learning_rate": 0.001,
      "loss": 2.545,
      "step": 152976
    },
    {
      "epoch": 29.38,
      "learning_rate": 0.001,
      "loss": 2.5348,
      "step": 152988
    },
    {
      "epoch": 29.38,
      "learning_rate": 0.001,
      "loss": 2.5412,
      "step": 153000
    },
    {
      "epoch": 29.38,
      "learning_rate": 0.001,
      "loss": 2.5434,
      "step": 153012
    },
    {
      "epoch": 29.38,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 153024
    },
    {
      "epoch": 29.38,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 153036
    },
    {
      "epoch": 29.39,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 153048
    },
    {
      "epoch": 29.39,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 153060
    },
    {
      "epoch": 29.39,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 153072
    },
    {
      "epoch": 29.39,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 153084
    },
    {
      "epoch": 29.4,
      "learning_rate": 0.001,
      "loss": 2.5332,
      "step": 153096
    },
    {
      "epoch": 29.4,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 153108
    },
    {
      "epoch": 29.4,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 153120
    },
    {
      "epoch": 29.4,
      "eval_ag_news_accuracy": 0.32275,
      "eval_ag_news_bleu_score": 4.727131132424163,
      "eval_ag_news_bleu_score_sem": 0.14846541570604924,
      "eval_ag_news_emb_cos_sim": 0.8130539059638977,
      "eval_ag_news_emb_cos_sim_sem": 0.0067402537471302456,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.531450033187866,
      "eval_ag_news_n_ngrams_match_1": 14.238,
      "eval_ag_news_n_ngrams_match_2": 3.11,
      "eval_ag_news_n_ngrams_match_3": 0.834,
      "eval_ag_news_num_pred_words": 46.772,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.17348439210654,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35234964592223067,
      "eval_ag_news_runtime": 10.4878,
      "eval_ag_news_samples_per_second": 47.675,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.35323183667683966,
      "eval_ag_news_token_set_f1_sem": 0.0045276964664471234,
      "eval_ag_news_token_set_precision": 0.3391260723421015,
      "eval_ag_news_token_set_recall": 0.3812286779843616,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 153125
    },
    {
      "epoch": 29.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.1146875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.094749298177269,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11965242742021427,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6682591438293457,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009238541119838888,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2619822025299072,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.024,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 26.10122381074379,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21201649055170305,
      "eval_anthropic_toxic_prompts_runtime": 10.3113,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.49,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35659793556420943,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064802144593629225,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4370085658737159,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32842114557386587,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 153125
    },
    {
      "epoch": 29.4,
      "eval_arxiv_accuracy": 0.3475625,
      "eval_arxiv_bleu_score": 4.436679851921937,
      "eval_arxiv_bleu_score_sem": 0.12367766371593797,
      "eval_arxiv_emb_cos_sim": 0.7703551650047302,
      "eval_arxiv_emb_cos_sim_sem": 0.00799319986947366,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.4008877277374268,
      "eval_arxiv_n_ngrams_match_1": 15.274,
      "eval_arxiv_n_ngrams_match_2": 3.024,
      "eval_arxiv_n_ngrams_match_3": 0.7,
      "eval_arxiv_num_pred_words": 40.638,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.990711820392992,
      "eval_arxiv_pred_num_tokens": 62.9609375,
      "eval_arxiv_rouge_score": 0.36658715684338206,
      "eval_arxiv_runtime": 10.4344,
      "eval_arxiv_samples_per_second": 47.918,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.35909541712914766,
      "eval_arxiv_token_set_f1_sem": 0.00411386885948219,
      "eval_arxiv_token_set_precision": 0.3112874522779814,
      "eval_arxiv_token_set_recall": 0.44162401115407596,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 153125
    },
    {
      "epoch": 29.4,
      "eval_python_code_alpaca_accuracy": 0.161,
      "eval_python_code_alpaca_bleu_score": 4.639786625479209,
      "eval_python_code_alpaca_bleu_score_sem": 0.149060509814988,
      "eval_python_code_alpaca_emb_cos_sim": 0.7475690841674805,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.01049772300012839,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.892303228378296,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.796,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.876,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.006,
      "eval_python_code_alpaca_num_pred_words": 43.538,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.03480006550932,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33247830362276876,
      "eval_python_code_alpaca_runtime": 10.2052,
      "eval_python_code_alpaca_samples_per_second": 48.995,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.46703583762286777,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005649303604768681,
      "eval_python_code_alpaca_token_set_precision": 0.5346446187826586,
      "eval_python_code_alpaca_token_set_recall": 0.43804529421209365,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 153125
    },
    {
      "epoch": 29.4,
      "eval_wikibio_accuracy": 0.3265625,
      "eval_wikibio_bleu_score": 6.161298010183409,
      "eval_wikibio_bleu_score_sem": 0.2305446938264135,
      "eval_wikibio_emb_cos_sim": 0.7415398955345154,
      "eval_wikibio_emb_cos_sim_sem": 0.007896822539274897,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.717679262161255,
      "eval_wikibio_n_ngrams_match_1": 10.18,
      "eval_wikibio_n_ngrams_match_2": 3.45,
      "eval_wikibio_n_ngrams_match_3": 1.276,
      "eval_wikibio_num_pred_words": 36.048,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.16874130328316,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.363731288311951,
      "eval_wikibio_runtime": 10.5334,
      "eval_wikibio_samples_per_second": 47.468,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.32439116384983174,
      "eval_wikibio_token_set_f1_sem": 0.005311041508510277,
      "eval_wikibio_token_set_precision": 0.3323911169049795,
      "eval_wikibio_token_set_recall": 0.33159097463536835,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 153125
    },
    {
      "epoch": 29.4,
      "eval_nq_accuracy": 0.53084375,
      "eval_nq_bleu_score": 11.9056362080981,
      "eval_nq_bleu_score_sem": 0.48633410526354454,
      "eval_nq_emb_cos_sim": 0.8390674591064453,
      "eval_nq_emb_cos_sim_sem": 0.006772789138547132,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1620209217071533,
      "eval_nq_n_ngrams_match_1": 23.348,
      "eval_nq_n_ngrams_match_2": 8.512,
      "eval_nq_n_ngrams_match_3": 3.964,
      "eval_nq_num_pred_words": 48.924,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.688679067720422,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4525557854168135,
      "eval_nq_runtime": 11.1957,
      "eval_nq_samples_per_second": 44.66,
      "eval_nq_steps_per_second": 0.089,
      "eval_nq_token_set_f1": 0.46648311505069967,
      "eval_nq_token_set_f1_sem": 0.004912789983806017,
      "eval_nq_token_set_precision": 0.425312631514177,
      "eval_nq_token_set_recall": 0.5239662201959386,
      "eval_nq_true_num_tokens": 64.0,
      "step": 153125
    },
    {
      "epoch": 29.4,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 153132
    },
    {
      "epoch": 29.41,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 153144
    },
    {
      "epoch": 29.41,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 153156
    },
    {
      "epoch": 29.41,
      "learning_rate": 0.001,
      "loss": 2.5342,
      "step": 153168
    },
    {
      "epoch": 29.41,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 153180
    },
    {
      "epoch": 29.41,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 153192
    },
    {
      "epoch": 29.42,
      "learning_rate": 0.001,
      "loss": 2.5322,
      "step": 153204
    },
    {
      "epoch": 29.42,
      "learning_rate": 0.001,
      "loss": 2.5272,
      "step": 153216
    },
    {
      "epoch": 29.42,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 153228
    },
    {
      "epoch": 29.42,
      "learning_rate": 0.001,
      "loss": 2.5373,
      "step": 153240
    },
    {
      "epoch": 29.43,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 153252
    },
    {
      "epoch": 29.43,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 153264
    },
    {
      "epoch": 29.43,
      "learning_rate": 0.001,
      "loss": 2.5351,
      "step": 153276
    },
    {
      "epoch": 29.43,
      "learning_rate": 0.001,
      "loss": 2.533,
      "step": 153288
    },
    {
      "epoch": 29.44,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 153300
    },
    {
      "epoch": 29.44,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 153312
    },
    {
      "epoch": 29.44,
      "learning_rate": 0.001,
      "loss": 2.5435,
      "step": 153324
    },
    {
      "epoch": 29.44,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 153336
    },
    {
      "epoch": 29.44,
      "learning_rate": 0.001,
      "loss": 2.5436,
      "step": 153348
    },
    {
      "epoch": 29.45,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 153360
    },
    {
      "epoch": 29.45,
      "learning_rate": 0.001,
      "loss": 2.5336,
      "step": 153372
    },
    {
      "epoch": 29.45,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 153384
    },
    {
      "epoch": 29.45,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 153396
    },
    {
      "epoch": 29.46,
      "learning_rate": 0.001,
      "loss": 2.5419,
      "step": 153408
    },
    {
      "epoch": 29.46,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 153420
    },
    {
      "epoch": 29.46,
      "learning_rate": 0.001,
      "loss": 2.5325,
      "step": 153432
    },
    {
      "epoch": 29.46,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 153444
    },
    {
      "epoch": 29.47,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 153456
    },
    {
      "epoch": 29.47,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 153468
    },
    {
      "epoch": 29.47,
      "learning_rate": 0.001,
      "loss": 2.5386,
      "step": 153480
    },
    {
      "epoch": 29.47,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 153492
    },
    {
      "epoch": 29.47,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 153504
    },
    {
      "epoch": 29.48,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 153516
    },
    {
      "epoch": 29.48,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 153528
    },
    {
      "epoch": 29.48,
      "learning_rate": 0.001,
      "loss": 2.5329,
      "step": 153540
    },
    {
      "epoch": 29.48,
      "learning_rate": 0.001,
      "loss": 2.5361,
      "step": 153552
    },
    {
      "epoch": 29.49,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 153564
    },
    {
      "epoch": 29.49,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 153576
    },
    {
      "epoch": 29.49,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 153588
    },
    {
      "epoch": 29.49,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 153600
    },
    {
      "epoch": 29.5,
      "learning_rate": 0.001,
      "loss": 2.5272,
      "step": 153612
    },
    {
      "epoch": 29.5,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 153624
    },
    {
      "epoch": 29.5,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 153636
    },
    {
      "epoch": 29.5,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 153648
    },
    {
      "epoch": 29.5,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 153660
    },
    {
      "epoch": 29.51,
      "learning_rate": 0.001,
      "loss": 2.5315,
      "step": 153672
    },
    {
      "epoch": 29.51,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 153684
    },
    {
      "epoch": 29.51,
      "learning_rate": 0.001,
      "loss": 2.5375,
      "step": 153696
    },
    {
      "epoch": 29.51,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 153708
    },
    {
      "epoch": 29.52,
      "learning_rate": 0.001,
      "loss": 2.5375,
      "step": 153720
    },
    {
      "epoch": 29.52,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 153732
    },
    {
      "epoch": 29.52,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 153744
    },
    {
      "epoch": 29.52,
      "eval_ag_news_accuracy": 0.32546875,
      "eval_ag_news_bleu_score": 5.0884880683787665,
      "eval_ag_news_bleu_score_sem": 0.16128621691349004,
      "eval_ag_news_emb_cos_sim": 0.8166429400444031,
      "eval_ag_news_emb_cos_sim_sem": 0.006777681465696936,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5160975456237793,
      "eval_ag_news_n_ngrams_match_1": 14.314,
      "eval_ag_news_n_ngrams_match_2": 3.178,
      "eval_ag_news_n_ngrams_match_3": 0.938,
      "eval_ag_news_num_pred_words": 47.0,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.65284319289521,
      "eval_ag_news_pred_num_tokens": 62.9609375,
      "eval_ag_news_rouge_score": 0.35385473180337534,
      "eval_ag_news_runtime": 10.4679,
      "eval_ag_news_samples_per_second": 47.765,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35552203181937475,
      "eval_ag_news_token_set_f1_sem": 0.004324311263915896,
      "eval_ag_news_token_set_precision": 0.3411261544436801,
      "eval_ag_news_token_set_recall": 0.3864101046198372,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 153750
    },
    {
      "epoch": 29.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.11478125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.166570391996507,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11826813855791353,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6783945560455322,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009271410393321322,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2225420475006104,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.97,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.75,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.762,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.091823786180072,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.215857727280069,
      "eval_anthropic_toxic_prompts_runtime": 10.0795,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.605,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36667837988495133,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065007062463194,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4559403292944089,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3345886790171598,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 153750
    },
    {
      "epoch": 29.52,
      "eval_arxiv_accuracy": 0.3509375,
      "eval_arxiv_bleu_score": 4.526329104388431,
      "eval_arxiv_bleu_score_sem": 0.13274732339726175,
      "eval_arxiv_emb_cos_sim": 0.7760580778121948,
      "eval_arxiv_emb_cos_sim_sem": 0.006906002155799908,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3723182678222656,
      "eval_arxiv_n_ngrams_match_1": 15.508,
      "eval_arxiv_n_ngrams_match_2": 3.076,
      "eval_arxiv_n_ngrams_match_3": 0.72,
      "eval_arxiv_num_pred_words": 41.262,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.14601707113083,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3684909686343052,
      "eval_arxiv_runtime": 10.2667,
      "eval_arxiv_samples_per_second": 48.701,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3627758262701893,
      "eval_arxiv_token_set_f1_sem": 0.0039809920895349255,
      "eval_arxiv_token_set_precision": 0.31505173557214594,
      "eval_arxiv_token_set_recall": 0.4446297743682353,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 153750
    },
    {
      "epoch": 29.52,
      "eval_python_code_alpaca_accuracy": 0.16209375,
      "eval_python_code_alpaca_bleu_score": 4.722444137748518,
      "eval_python_code_alpaca_bleu_score_sem": 0.14652267465499919,
      "eval_python_code_alpaca_emb_cos_sim": 0.7655680775642395,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007991520580978304,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8765552043914795,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.954,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.026,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.046,
      "eval_python_code_alpaca_num_pred_words": 44.66,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.753012225906247,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3331539217992684,
      "eval_python_code_alpaca_runtime": 11.0122,
      "eval_python_code_alpaca_samples_per_second": 45.404,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.48638780011528077,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005668090953059059,
      "eval_python_code_alpaca_token_set_precision": 0.5490494054444741,
      "eval_python_code_alpaca_token_set_recall": 0.4584435720869273,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 153750
    },
    {
      "epoch": 29.52,
      "eval_wikibio_accuracy": 0.32459375,
      "eval_wikibio_bleu_score": 6.0356667548973855,
      "eval_wikibio_bleu_score_sem": 0.20725233836470902,
      "eval_wikibio_emb_cos_sim": 0.7549325227737427,
      "eval_wikibio_emb_cos_sim_sem": 0.008114191617747292,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6997523307800293,
      "eval_wikibio_n_ngrams_match_1": 10.192,
      "eval_wikibio_n_ngrams_match_2": 3.478,
      "eval_wikibio_n_ngrams_match_3": 1.25,
      "eval_wikibio_num_pred_words": 35.878,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.43728804816389,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3636025086830905,
      "eval_wikibio_runtime": 10.5374,
      "eval_wikibio_samples_per_second": 47.45,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.32287164328214785,
      "eval_wikibio_token_set_f1_sem": 0.005350142309823111,
      "eval_wikibio_token_set_precision": 0.33072022461373424,
      "eval_wikibio_token_set_recall": 0.33270149809465766,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 153750
    },
    {
      "epoch": 29.52,
      "eval_nq_accuracy": 0.5319375,
      "eval_nq_bleu_score": 11.81763805158241,
      "eval_nq_bleu_score_sem": 0.4656285352602,
      "eval_nq_emb_cos_sim": 0.8430206775665283,
      "eval_nq_emb_cos_sim_sem": 0.006386959106576692,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.162168502807617,
      "eval_nq_n_ngrams_match_1": 23.154,
      "eval_nq_n_ngrams_match_2": 8.568,
      "eval_nq_n_ngrams_match_3": 3.944,
      "eval_nq_num_pred_words": 49.124,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.68996144716397,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44998185161445425,
      "eval_nq_runtime": 10.5108,
      "eval_nq_samples_per_second": 47.57,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4656747856974989,
      "eval_nq_token_set_f1_sem": 0.0049369660674267474,
      "eval_nq_token_set_precision": 0.4225186523974833,
      "eval_nq_token_set_recall": 0.527291210402914,
      "eval_nq_true_num_tokens": 64.0,
      "step": 153750
    },
    {
      "epoch": 29.52,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 153756
    },
    {
      "epoch": 29.53,
      "learning_rate": 0.001,
      "loss": 2.5395,
      "step": 153768
    },
    {
      "epoch": 29.53,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 153780
    },
    {
      "epoch": 29.53,
      "learning_rate": 0.001,
      "loss": 2.5342,
      "step": 153792
    },
    {
      "epoch": 29.53,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 153804
    },
    {
      "epoch": 29.53,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 153816
    },
    {
      "epoch": 29.54,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 153828
    },
    {
      "epoch": 29.54,
      "learning_rate": 0.001,
      "loss": 2.5424,
      "step": 153840
    },
    {
      "epoch": 29.54,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 153852
    },
    {
      "epoch": 29.54,
      "learning_rate": 0.001,
      "loss": 2.5482,
      "step": 153864
    },
    {
      "epoch": 29.55,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 153876
    },
    {
      "epoch": 29.55,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 153888
    },
    {
      "epoch": 29.55,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 153900
    },
    {
      "epoch": 29.55,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 153912
    },
    {
      "epoch": 29.56,
      "learning_rate": 0.001,
      "loss": 2.5383,
      "step": 153924
    },
    {
      "epoch": 29.56,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 153936
    },
    {
      "epoch": 29.56,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 153948
    },
    {
      "epoch": 29.56,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 153960
    },
    {
      "epoch": 29.56,
      "learning_rate": 0.001,
      "loss": 2.5432,
      "step": 153972
    },
    {
      "epoch": 29.57,
      "learning_rate": 0.001,
      "loss": 2.5473,
      "step": 153984
    },
    {
      "epoch": 29.57,
      "learning_rate": 0.001,
      "loss": 2.5444,
      "step": 153996
    },
    {
      "epoch": 29.57,
      "learning_rate": 0.001,
      "loss": 2.5287,
      "step": 154008
    },
    {
      "epoch": 29.57,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 154020
    },
    {
      "epoch": 29.58,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 154032
    },
    {
      "epoch": 29.58,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 154044
    },
    {
      "epoch": 29.58,
      "learning_rate": 0.001,
      "loss": 2.5381,
      "step": 154056
    },
    {
      "epoch": 29.58,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 154068
    },
    {
      "epoch": 29.59,
      "learning_rate": 0.001,
      "loss": 2.5325,
      "step": 154080
    },
    {
      "epoch": 29.59,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 154092
    },
    {
      "epoch": 29.59,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 154104
    },
    {
      "epoch": 29.59,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 154116
    },
    {
      "epoch": 29.59,
      "learning_rate": 0.001,
      "loss": 2.5373,
      "step": 154128
    },
    {
      "epoch": 29.6,
      "learning_rate": 0.001,
      "loss": 2.5287,
      "step": 154140
    },
    {
      "epoch": 29.6,
      "learning_rate": 0.001,
      "loss": 2.5492,
      "step": 154152
    },
    {
      "epoch": 29.6,
      "learning_rate": 0.001,
      "loss": 2.5339,
      "step": 154164
    },
    {
      "epoch": 29.6,
      "learning_rate": 0.001,
      "loss": 2.5329,
      "step": 154176
    },
    {
      "epoch": 29.61,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 154188
    },
    {
      "epoch": 29.61,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 154200
    },
    {
      "epoch": 29.61,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 154212
    },
    {
      "epoch": 29.61,
      "learning_rate": 0.001,
      "loss": 2.5209,
      "step": 154224
    },
    {
      "epoch": 29.62,
      "learning_rate": 0.001,
      "loss": 2.5381,
      "step": 154236
    },
    {
      "epoch": 29.62,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 154248
    },
    {
      "epoch": 29.62,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 154260
    },
    {
      "epoch": 29.62,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 154272
    },
    {
      "epoch": 29.62,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 154284
    },
    {
      "epoch": 29.63,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 154296
    },
    {
      "epoch": 29.63,
      "learning_rate": 0.001,
      "loss": 2.537,
      "step": 154308
    },
    {
      "epoch": 29.63,
      "learning_rate": 0.001,
      "loss": 2.5246,
      "step": 154320
    },
    {
      "epoch": 29.63,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 154332
    },
    {
      "epoch": 29.64,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 154344
    },
    {
      "epoch": 29.64,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 154356
    },
    {
      "epoch": 29.64,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 154368
    },
    {
      "epoch": 29.64,
      "eval_ag_news_accuracy": 0.32521875,
      "eval_ag_news_bleu_score": 5.012902685449532,
      "eval_ag_news_bleu_score_sem": 0.15705874915875578,
      "eval_ag_news_emb_cos_sim": 0.8178128004074097,
      "eval_ag_news_emb_cos_sim_sem": 0.006340916308756573,
      "eval_ag_news_emb_top1_equal": 0.3125,
      "eval_ag_news_emb_top1_equal_sem": 0.041130074229814934,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5275087356567383,
      "eval_ag_news_n_ngrams_match_1": 14.258,
      "eval_ag_news_n_ngrams_match_2": 3.152,
      "eval_ag_news_n_ngrams_match_3": 0.932,
      "eval_ag_news_num_pred_words": 46.564,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.039061596566306,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35532693660651615,
      "eval_ag_news_runtime": 10.4252,
      "eval_ag_news_samples_per_second": 47.961,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35746113313108385,
      "eval_ag_news_token_set_f1_sem": 0.004430115283958541,
      "eval_ag_news_token_set_precision": 0.34257999315790344,
      "eval_ag_news_token_set_recall": 0.3886728227187792,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 154375
    },
    {
      "epoch": 29.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.114125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1582854501084268,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12222832786168704,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6750933527946472,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009194914521818557,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.237285614013672,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.338,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.932,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.39,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.464507350652944,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21633668119458238,
      "eval_anthropic_toxic_prompts_runtime": 9.8646,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.686,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.360695358838236,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064656241270708344,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4470211767403358,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32739231824678594,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 154375
    },
    {
      "epoch": 29.64,
      "eval_arxiv_accuracy": 0.35221875,
      "eval_arxiv_bleu_score": 4.326964203076714,
      "eval_arxiv_bleu_score_sem": 0.125337949054851,
      "eval_arxiv_emb_cos_sim": 0.7610146999359131,
      "eval_arxiv_emb_cos_sim_sem": 0.008225729959608833,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.363086462020874,
      "eval_arxiv_n_ngrams_match_1": 15.21,
      "eval_arxiv_n_ngrams_match_2": 2.942,
      "eval_arxiv_n_ngrams_match_3": 0.67,
      "eval_arxiv_num_pred_words": 40.348,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.87818489117387,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3622590678362871,
      "eval_arxiv_runtime": 10.699,
      "eval_arxiv_samples_per_second": 46.733,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.3565557168678842,
      "eval_arxiv_token_set_f1_sem": 0.0043836824778191496,
      "eval_arxiv_token_set_precision": 0.3072107301135902,
      "eval_arxiv_token_set_recall": 0.4450525329518359,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 154375
    },
    {
      "epoch": 29.64,
      "eval_python_code_alpaca_accuracy": 0.161375,
      "eval_python_code_alpaca_bleu_score": 4.4499177339602936,
      "eval_python_code_alpaca_bleu_score_sem": 0.1412257028668921,
      "eval_python_code_alpaca_emb_cos_sim": 0.7586034536361694,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008925268863213476,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9138526916503906,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.738,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.808,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.93,
      "eval_python_code_alpaca_num_pred_words": 43.544,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.427658063832194,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32914943647812855,
      "eval_python_code_alpaca_runtime": 10.0116,
      "eval_python_code_alpaca_samples_per_second": 49.942,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.471186095054125,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005914170156224076,
      "eval_python_code_alpaca_token_set_precision": 0.5312704058706971,
      "eval_python_code_alpaca_token_set_recall": 0.45339734146655575,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 154375
    },
    {
      "epoch": 29.64,
      "eval_wikibio_accuracy": 0.324625,
      "eval_wikibio_bleu_score": 6.207945334710554,
      "eval_wikibio_bleu_score_sem": 0.2215587848145654,
      "eval_wikibio_emb_cos_sim": 0.752561092376709,
      "eval_wikibio_emb_cos_sim_sem": 0.007941769519033732,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7271616458892822,
      "eval_wikibio_n_ngrams_match_1": 10.392,
      "eval_wikibio_n_ngrams_match_2": 3.554,
      "eval_wikibio_n_ngrams_match_3": 1.326,
      "eval_wikibio_num_pred_words": 37.038,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.56097582605309,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.365882819838572,
      "eval_wikibio_runtime": 9.9368,
      "eval_wikibio_samples_per_second": 50.318,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.3266248816004787,
      "eval_wikibio_token_set_f1_sem": 0.005258264579000425,
      "eval_wikibio_token_set_precision": 0.3389986657301919,
      "eval_wikibio_token_set_recall": 0.3298875186680289,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 154375
    },
    {
      "epoch": 29.64,
      "eval_nq_accuracy": 0.53115625,
      "eval_nq_bleu_score": 11.722697724335703,
      "eval_nq_bleu_score_sem": 0.4757861681300113,
      "eval_nq_emb_cos_sim": 0.838560163974762,
      "eval_nq_emb_cos_sim_sem": 0.006934290593684814,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.157773971557617,
      "eval_nq_n_ngrams_match_1": 23.288,
      "eval_nq_n_ngrams_match_2": 8.554,
      "eval_nq_n_ngrams_match_3": 3.888,
      "eval_nq_num_pred_words": 49.286,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.651856927097764,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45020831639411363,
      "eval_nq_runtime": 10.5843,
      "eval_nq_samples_per_second": 47.24,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.465704194545563,
      "eval_nq_token_set_f1_sem": 0.005065087151021448,
      "eval_nq_token_set_precision": 0.42392459616411754,
      "eval_nq_token_set_recall": 0.5243949527129446,
      "eval_nq_true_num_tokens": 64.0,
      "step": 154375
    },
    {
      "epoch": 29.64,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 154380
    },
    {
      "epoch": 29.65,
      "learning_rate": 0.001,
      "loss": 2.5322,
      "step": 154392
    },
    {
      "epoch": 29.65,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 154404
    },
    {
      "epoch": 29.65,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 154416
    },
    {
      "epoch": 29.65,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 154428
    },
    {
      "epoch": 29.65,
      "learning_rate": 0.001,
      "loss": 2.5386,
      "step": 154440
    },
    {
      "epoch": 29.66,
      "learning_rate": 0.001,
      "loss": 2.5373,
      "step": 154452
    },
    {
      "epoch": 29.66,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 154464
    },
    {
      "epoch": 29.66,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 154476
    },
    {
      "epoch": 29.66,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 154488
    },
    {
      "epoch": 29.67,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 154500
    },
    {
      "epoch": 29.67,
      "learning_rate": 0.001,
      "loss": 2.5427,
      "step": 154512
    },
    {
      "epoch": 29.67,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 154524
    },
    {
      "epoch": 29.67,
      "learning_rate": 0.001,
      "loss": 2.5307,
      "step": 154536
    },
    {
      "epoch": 29.68,
      "learning_rate": 0.001,
      "loss": 2.5419,
      "step": 154548
    },
    {
      "epoch": 29.68,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 154560
    },
    {
      "epoch": 29.68,
      "learning_rate": 0.001,
      "loss": 2.5408,
      "step": 154572
    },
    {
      "epoch": 29.68,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 154584
    },
    {
      "epoch": 29.68,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 154596
    },
    {
      "epoch": 29.69,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 154608
    },
    {
      "epoch": 29.69,
      "learning_rate": 0.001,
      "loss": 2.5427,
      "step": 154620
    },
    {
      "epoch": 29.69,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 154632
    },
    {
      "epoch": 29.69,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 154644
    },
    {
      "epoch": 29.7,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 154656
    },
    {
      "epoch": 29.7,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 154668
    },
    {
      "epoch": 29.7,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 154680
    },
    {
      "epoch": 29.7,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 154692
    },
    {
      "epoch": 29.71,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 154704
    },
    {
      "epoch": 29.71,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 154716
    },
    {
      "epoch": 29.71,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 154728
    },
    {
      "epoch": 29.71,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 154740
    },
    {
      "epoch": 29.71,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 154752
    },
    {
      "epoch": 29.72,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 154764
    },
    {
      "epoch": 29.72,
      "learning_rate": 0.001,
      "loss": 2.5407,
      "step": 154776
    },
    {
      "epoch": 29.72,
      "learning_rate": 0.001,
      "loss": 2.5448,
      "step": 154788
    },
    {
      "epoch": 29.72,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 154800
    },
    {
      "epoch": 29.73,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 154812
    },
    {
      "epoch": 29.73,
      "learning_rate": 0.001,
      "loss": 2.5488,
      "step": 154824
    },
    {
      "epoch": 29.73,
      "learning_rate": 0.001,
      "loss": 2.5457,
      "step": 154836
    },
    {
      "epoch": 29.73,
      "learning_rate": 0.001,
      "loss": 2.5464,
      "step": 154848
    },
    {
      "epoch": 29.74,
      "learning_rate": 0.001,
      "loss": 2.5497,
      "step": 154860
    },
    {
      "epoch": 29.74,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 154872
    },
    {
      "epoch": 29.74,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 154884
    },
    {
      "epoch": 29.74,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 154896
    },
    {
      "epoch": 29.74,
      "learning_rate": 0.001,
      "loss": 2.5519,
      "step": 154908
    },
    {
      "epoch": 29.75,
      "learning_rate": 0.001,
      "loss": 2.5458,
      "step": 154920
    },
    {
      "epoch": 29.75,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 154932
    },
    {
      "epoch": 29.75,
      "learning_rate": 0.001,
      "loss": 2.5383,
      "step": 154944
    },
    {
      "epoch": 29.75,
      "learning_rate": 0.001,
      "loss": 2.545,
      "step": 154956
    },
    {
      "epoch": 29.76,
      "learning_rate": 0.001,
      "loss": 2.539,
      "step": 154968
    },
    {
      "epoch": 29.76,
      "learning_rate": 0.001,
      "loss": 2.5463,
      "step": 154980
    },
    {
      "epoch": 29.76,
      "learning_rate": 0.001,
      "loss": 2.5435,
      "step": 154992
    },
    {
      "epoch": 29.76,
      "eval_ag_news_accuracy": 0.32440625,
      "eval_ag_news_bleu_score": 4.918723393516643,
      "eval_ag_news_bleu_score_sem": 0.15008339969346499,
      "eval_ag_news_emb_cos_sim": 0.8142803311347961,
      "eval_ag_news_emb_cos_sim_sem": 0.006617780273713652,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5221900939941406,
      "eval_ag_news_n_ngrams_match_1": 14.318,
      "eval_ag_news_n_ngrams_match_2": 3.164,
      "eval_ag_news_n_ngrams_match_3": 0.906,
      "eval_ag_news_num_pred_words": 46.874,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.8585006206092,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35295604619885224,
      "eval_ag_news_runtime": 10.456,
      "eval_ag_news_samples_per_second": 47.819,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35509576487982475,
      "eval_ag_news_token_set_f1_sem": 0.004293523174751427,
      "eval_ag_news_token_set_precision": 0.3408242759432546,
      "eval_ag_news_token_set_recall": 0.38406877769672304,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 155000
    },
    {
      "epoch": 29.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.1141875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.222160102080987,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1268910601411968,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6764065027236938,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008927243179648409,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2388124465942383,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.33,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.78,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.503417086891567,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2162627095855561,
      "eval_anthropic_toxic_prompts_runtime": 10.334,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.384,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3575747876822104,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00633265733193282,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4486097670377029,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3221478276575159,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 155000
    },
    {
      "epoch": 29.76,
      "eval_arxiv_accuracy": 0.3510625,
      "eval_arxiv_bleu_score": 4.371270783679327,
      "eval_arxiv_bleu_score_sem": 0.12889401862848066,
      "eval_arxiv_emb_cos_sim": 0.7739661931991577,
      "eval_arxiv_emb_cos_sim_sem": 0.007297900016866901,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3622114658355713,
      "eval_arxiv_n_ngrams_match_1": 15.268,
      "eval_arxiv_n_ngrams_match_2": 2.94,
      "eval_arxiv_n_ngrams_match_3": 0.654,
      "eval_arxiv_num_pred_words": 40.586,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.85292764116578,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3637609111878547,
      "eval_arxiv_runtime": 10.2844,
      "eval_arxiv_samples_per_second": 48.617,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.355780048560854,
      "eval_arxiv_token_set_f1_sem": 0.004240442774635202,
      "eval_arxiv_token_set_precision": 0.30913912412583744,
      "eval_arxiv_token_set_recall": 0.4339336068734512,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 155000
    },
    {
      "epoch": 29.76,
      "eval_python_code_alpaca_accuracy": 0.16228125,
      "eval_python_code_alpaca_bleu_score": 4.892382853237619,
      "eval_python_code_alpaca_bleu_score_sem": 0.1515685779314612,
      "eval_python_code_alpaca_emb_cos_sim": 0.7611924409866333,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008666500384246748,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.9016482830047607,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.016,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.03,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.094,
      "eval_python_code_alpaca_num_pred_words": 43.64,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.204126206045533,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33784637920451477,
      "eval_python_code_alpaca_runtime": 10.0011,
      "eval_python_code_alpaca_samples_per_second": 49.994,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4827913308952058,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005714515060068195,
      "eval_python_code_alpaca_token_set_precision": 0.5482806994834478,
      "eval_python_code_alpaca_token_set_recall": 0.45291813216331395,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 155000
    },
    {
      "epoch": 29.76,
      "eval_wikibio_accuracy": 0.3261875,
      "eval_wikibio_bleu_score": 6.225144113051411,
      "eval_wikibio_bleu_score_sem": 0.21846633972148227,
      "eval_wikibio_emb_cos_sim": 0.7597200870513916,
      "eval_wikibio_emb_cos_sim_sem": 0.007626358968390901,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.69722318649292,
      "eval_wikibio_n_ngrams_match_1": 10.38,
      "eval_wikibio_n_ngrams_match_2": 3.54,
      "eval_wikibio_n_ngrams_match_3": 1.28,
      "eval_wikibio_num_pred_words": 36.36,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.335145533136675,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36752194188001464,
      "eval_wikibio_runtime": 9.9408,
      "eval_wikibio_samples_per_second": 50.298,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.3305379955531322,
      "eval_wikibio_token_set_f1_sem": 0.005038885511079791,
      "eval_wikibio_token_set_precision": 0.33957527807503374,
      "eval_wikibio_token_set_recall": 0.33651025077218183,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 155000
    },
    {
      "epoch": 29.76,
      "eval_nq_accuracy": 0.5313125,
      "eval_nq_bleu_score": 11.930756372274262,
      "eval_nq_bleu_score_sem": 0.4834372163814211,
      "eval_nq_emb_cos_sim": 0.8348702192306519,
      "eval_nq_emb_cos_sim_sem": 0.007248769962393358,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1603705883026123,
      "eval_nq_n_ngrams_match_1": 23.322,
      "eval_nq_n_ngrams_match_2": 8.588,
      "eval_nq_n_ngrams_match_3": 3.938,
      "eval_nq_num_pred_words": 49.088,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.67435167615192,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45370456930437236,
      "eval_nq_runtime": 10.5323,
      "eval_nq_samples_per_second": 47.473,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4657585442920766,
      "eval_nq_token_set_f1_sem": 0.005039962585795291,
      "eval_nq_token_set_precision": 0.4261823126065449,
      "eval_nq_token_set_recall": 0.5208232575212093,
      "eval_nq_true_num_tokens": 64.0,
      "step": 155000
    },
    {
      "epoch": 29.76,
      "learning_rate": 0.001,
      "loss": 2.5419,
      "step": 155004
    },
    {
      "epoch": 29.76,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 155016
    },
    {
      "epoch": 29.77,
      "learning_rate": 0.001,
      "loss": 2.5475,
      "step": 155028
    },
    {
      "epoch": 29.77,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 155040
    },
    {
      "epoch": 29.77,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 155052
    },
    {
      "epoch": 29.77,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 155064
    },
    {
      "epoch": 29.78,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 155076
    },
    {
      "epoch": 29.78,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 155088
    },
    {
      "epoch": 29.78,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 155100
    },
    {
      "epoch": 29.78,
      "learning_rate": 0.001,
      "loss": 2.5439,
      "step": 155112
    },
    {
      "epoch": 29.79,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 155124
    },
    {
      "epoch": 29.79,
      "learning_rate": 0.001,
      "loss": 2.5332,
      "step": 155136
    },
    {
      "epoch": 29.79,
      "learning_rate": 0.001,
      "loss": 2.5332,
      "step": 155148
    },
    {
      "epoch": 29.79,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 155160
    },
    {
      "epoch": 29.79,
      "learning_rate": 0.001,
      "loss": 2.537,
      "step": 155172
    },
    {
      "epoch": 29.8,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 155184
    },
    {
      "epoch": 29.8,
      "learning_rate": 0.001,
      "loss": 2.539,
      "step": 155196
    },
    {
      "epoch": 29.8,
      "learning_rate": 0.001,
      "loss": 2.5342,
      "step": 155208
    },
    {
      "epoch": 29.8,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 155220
    },
    {
      "epoch": 29.81,
      "learning_rate": 0.001,
      "loss": 2.5408,
      "step": 155232
    },
    {
      "epoch": 29.81,
      "learning_rate": 0.001,
      "loss": 2.5488,
      "step": 155244
    },
    {
      "epoch": 29.81,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 155256
    },
    {
      "epoch": 29.81,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 155268
    },
    {
      "epoch": 29.82,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 155280
    },
    {
      "epoch": 29.82,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 155292
    },
    {
      "epoch": 29.82,
      "learning_rate": 0.001,
      "loss": 2.5508,
      "step": 155304
    },
    {
      "epoch": 29.82,
      "learning_rate": 0.001,
      "loss": 2.539,
      "step": 155316
    },
    {
      "epoch": 29.82,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 155328
    },
    {
      "epoch": 29.83,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 155340
    },
    {
      "epoch": 29.83,
      "learning_rate": 0.001,
      "loss": 2.5357,
      "step": 155352
    },
    {
      "epoch": 29.83,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 155364
    },
    {
      "epoch": 29.83,
      "learning_rate": 0.001,
      "loss": 2.5412,
      "step": 155376
    },
    {
      "epoch": 29.84,
      "learning_rate": 0.001,
      "loss": 2.546,
      "step": 155388
    },
    {
      "epoch": 29.84,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 155400
    },
    {
      "epoch": 29.84,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 155412
    },
    {
      "epoch": 29.84,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 155424
    },
    {
      "epoch": 29.85,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 155436
    },
    {
      "epoch": 29.85,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 155448
    },
    {
      "epoch": 29.85,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 155460
    },
    {
      "epoch": 29.85,
      "learning_rate": 0.001,
      "loss": 2.5361,
      "step": 155472
    },
    {
      "epoch": 29.85,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 155484
    },
    {
      "epoch": 29.86,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 155496
    },
    {
      "epoch": 29.86,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 155508
    },
    {
      "epoch": 29.86,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 155520
    },
    {
      "epoch": 29.86,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 155532
    },
    {
      "epoch": 29.87,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 155544
    },
    {
      "epoch": 29.87,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 155556
    },
    {
      "epoch": 29.87,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 155568
    },
    {
      "epoch": 29.87,
      "learning_rate": 0.001,
      "loss": 2.5604,
      "step": 155580
    },
    {
      "epoch": 29.88,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 155592
    },
    {
      "epoch": 29.88,
      "learning_rate": 0.001,
      "loss": 2.5349,
      "step": 155604
    },
    {
      "epoch": 29.88,
      "learning_rate": 0.001,
      "loss": 2.5354,
      "step": 155616
    },
    {
      "epoch": 29.88,
      "eval_ag_news_accuracy": 0.32484375,
      "eval_ag_news_bleu_score": 4.952086652546281,
      "eval_ag_news_bleu_score_sem": 0.15125139442082425,
      "eval_ag_news_emb_cos_sim": 0.8183904886245728,
      "eval_ag_news_emb_cos_sim_sem": 0.0065492211806210945,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5249595642089844,
      "eval_ag_news_n_ngrams_match_1": 14.398,
      "eval_ag_news_n_ngrams_match_2": 3.19,
      "eval_ag_news_n_ngrams_match_3": 0.936,
      "eval_ag_news_num_pred_words": 46.686,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.952400696311145,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35758244962937286,
      "eval_ag_news_runtime": 10.6914,
      "eval_ag_news_samples_per_second": 46.766,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.3600904440123456,
      "eval_ag_news_token_set_f1_sem": 0.00424496723783764,
      "eval_ag_news_token_set_precision": 0.3453861360499154,
      "eval_ag_news_token_set_recall": 0.3900398253031491,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 155625
    },
    {
      "epoch": 29.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.11490625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2417495072455913,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12554304611460063,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6748640537261963,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009528572023688574,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2387568950653076,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.256,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.986,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.98,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.502000372430125,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2148126582692897,
      "eval_anthropic_toxic_prompts_runtime": 9.9263,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.371,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36260856382633255,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006479654440130748,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44291925250433334,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.334202466712735,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 155625
    },
    {
      "epoch": 29.88,
      "eval_arxiv_accuracy": 0.35090625,
      "eval_arxiv_bleu_score": 4.505581752252082,
      "eval_arxiv_bleu_score_sem": 0.13209890477513822,
      "eval_arxiv_emb_cos_sim": 0.770883321762085,
      "eval_arxiv_emb_cos_sim_sem": 0.008618350244001084,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.37870454788208,
      "eval_arxiv_n_ngrams_match_1": 15.318,
      "eval_arxiv_n_ngrams_match_2": 3.096,
      "eval_arxiv_n_ngrams_match_3": 0.694,
      "eval_arxiv_num_pred_words": 40.826,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.332747319965197,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36730055458220245,
      "eval_arxiv_runtime": 10.374,
      "eval_arxiv_samples_per_second": 48.198,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.358447328534449,
      "eval_arxiv_token_set_f1_sem": 0.004186700433961449,
      "eval_arxiv_token_set_precision": 0.31052965207162525,
      "eval_arxiv_token_set_recall": 0.4398475041172051,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 155625
    },
    {
      "epoch": 29.88,
      "eval_python_code_alpaca_accuracy": 0.1620625,
      "eval_python_code_alpaca_bleu_score": 4.764005260698104,
      "eval_python_code_alpaca_bleu_score_sem": 0.1498926327202342,
      "eval_python_code_alpaca_emb_cos_sim": 0.770979106426239,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007862454418492294,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.884103298187256,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.004,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.988,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.996,
      "eval_python_code_alpaca_num_pred_words": 43.494,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.887520629749478,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3397302916141258,
      "eval_python_code_alpaca_runtime": 10.5692,
      "eval_python_code_alpaca_samples_per_second": 47.307,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.47950619501897257,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005151002599964493,
      "eval_python_code_alpaca_token_set_precision": 0.5472949900950337,
      "eval_python_code_alpaca_token_set_recall": 0.4491924777801191,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 155625
    },
    {
      "epoch": 29.88,
      "eval_wikibio_accuracy": 0.32653125,
      "eval_wikibio_bleu_score": 6.049818715521647,
      "eval_wikibio_bleu_score_sem": 0.22226389530913723,
      "eval_wikibio_emb_cos_sim": 0.7485655546188354,
      "eval_wikibio_emb_cos_sim_sem": 0.007909253225407578,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.696716547012329,
      "eval_wikibio_n_ngrams_match_1": 9.998,
      "eval_wikibio_n_ngrams_match_2": 3.408,
      "eval_wikibio_n_ngrams_match_3": 1.274,
      "eval_wikibio_num_pred_words": 36.304,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.314715331764525,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.351339624505013,
      "eval_wikibio_runtime": 10.1536,
      "eval_wikibio_samples_per_second": 49.243,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.31757232621088477,
      "eval_wikibio_token_set_f1_sem": 0.005278620008801585,
      "eval_wikibio_token_set_precision": 0.3247958929063056,
      "eval_wikibio_token_set_recall": 0.32918626183086436,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 155625
    },
    {
      "epoch": 29.88,
      "eval_nq_accuracy": 0.53109375,
      "eval_nq_bleu_score": 11.635680358876966,
      "eval_nq_bleu_score_sem": 0.4753850858972373,
      "eval_nq_emb_cos_sim": 0.8347564339637756,
      "eval_nq_emb_cos_sim_sem": 0.006829161865519469,
      "eval_nq_emb_top1_equal": 0.3515625,
      "eval_nq_emb_top1_equal_sem": 0.04236756101983345,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.160743236541748,
      "eval_nq_n_ngrams_match_1": 23.238,
      "eval_nq_n_ngrams_match_2": 8.568,
      "eval_nq_n_ngrams_match_3": 3.874,
      "eval_nq_num_pred_words": 49.114,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.677584760393842,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4497797331113026,
      "eval_nq_runtime": 10.5859,
      "eval_nq_samples_per_second": 47.233,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.4631730229111701,
      "eval_nq_token_set_f1_sem": 0.004904152060659144,
      "eval_nq_token_set_precision": 0.4224903602667082,
      "eval_nq_token_set_recall": 0.5206331316733986,
      "eval_nq_true_num_tokens": 64.0,
      "step": 155625
    },
    {
      "epoch": 29.88,
      "learning_rate": 0.001,
      "loss": 2.551,
      "step": 155628
    },
    {
      "epoch": 29.88,
      "learning_rate": 0.001,
      "loss": 2.5512,
      "step": 155640
    },
    {
      "epoch": 29.89,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 155652
    },
    {
      "epoch": 29.89,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 155664
    },
    {
      "epoch": 29.89,
      "learning_rate": 0.001,
      "loss": 2.543,
      "step": 155676
    },
    {
      "epoch": 29.89,
      "learning_rate": 0.001,
      "loss": 2.5422,
      "step": 155688
    },
    {
      "epoch": 29.9,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 155700
    },
    {
      "epoch": 29.9,
      "learning_rate": 0.001,
      "loss": 2.5412,
      "step": 155712
    },
    {
      "epoch": 29.9,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 155724
    },
    {
      "epoch": 29.9,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 155736
    },
    {
      "epoch": 29.91,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 155748
    },
    {
      "epoch": 29.91,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 155760
    },
    {
      "epoch": 29.91,
      "learning_rate": 0.001,
      "loss": 2.552,
      "step": 155772
    },
    {
      "epoch": 29.91,
      "learning_rate": 0.001,
      "loss": 2.5305,
      "step": 155784
    },
    {
      "epoch": 29.91,
      "learning_rate": 0.001,
      "loss": 2.5437,
      "step": 155796
    },
    {
      "epoch": 29.92,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 155808
    },
    {
      "epoch": 29.92,
      "learning_rate": 0.001,
      "loss": 2.5412,
      "step": 155820
    },
    {
      "epoch": 29.92,
      "learning_rate": 0.001,
      "loss": 2.5406,
      "step": 155832
    },
    {
      "epoch": 29.92,
      "learning_rate": 0.001,
      "loss": 2.5325,
      "step": 155844
    },
    {
      "epoch": 29.93,
      "learning_rate": 0.001,
      "loss": 2.5474,
      "step": 155856
    },
    {
      "epoch": 29.93,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 155868
    },
    {
      "epoch": 29.93,
      "learning_rate": 0.001,
      "loss": 2.5424,
      "step": 155880
    },
    {
      "epoch": 29.93,
      "learning_rate": 0.001,
      "loss": 2.5465,
      "step": 155892
    },
    {
      "epoch": 29.94,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 155904
    },
    {
      "epoch": 29.94,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 155916
    },
    {
      "epoch": 29.94,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 155928
    },
    {
      "epoch": 29.94,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 155940
    },
    {
      "epoch": 29.94,
      "learning_rate": 0.001,
      "loss": 2.5483,
      "step": 155952
    },
    {
      "epoch": 29.95,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 155964
    },
    {
      "epoch": 29.95,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 155976
    },
    {
      "epoch": 29.95,
      "learning_rate": 0.001,
      "loss": 2.5401,
      "step": 155988
    },
    {
      "epoch": 29.95,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 156000
    },
    {
      "epoch": 29.96,
      "learning_rate": 0.001,
      "loss": 2.5443,
      "step": 156012
    },
    {
      "epoch": 29.96,
      "learning_rate": 0.001,
      "loss": 2.5489,
      "step": 156024
    },
    {
      "epoch": 29.96,
      "learning_rate": 0.001,
      "loss": 2.5513,
      "step": 156036
    },
    {
      "epoch": 29.96,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 156048
    },
    {
      "epoch": 29.97,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 156060
    },
    {
      "epoch": 29.97,
      "learning_rate": 0.001,
      "loss": 2.5405,
      "step": 156072
    },
    {
      "epoch": 29.97,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 156084
    },
    {
      "epoch": 29.97,
      "learning_rate": 0.001,
      "loss": 2.5471,
      "step": 156096
    },
    {
      "epoch": 29.97,
      "learning_rate": 0.001,
      "loss": 2.5417,
      "step": 156108
    },
    {
      "epoch": 29.98,
      "learning_rate": 0.001,
      "loss": 2.5391,
      "step": 156120
    },
    {
      "epoch": 29.98,
      "learning_rate": 0.001,
      "loss": 2.5384,
      "step": 156132
    },
    {
      "epoch": 29.98,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 156144
    },
    {
      "epoch": 29.98,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 156156
    },
    {
      "epoch": 29.99,
      "learning_rate": 0.001,
      "loss": 2.5407,
      "step": 156168
    },
    {
      "epoch": 29.99,
      "learning_rate": 0.001,
      "loss": 2.5446,
      "step": 156180
    },
    {
      "epoch": 29.99,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 156192
    },
    {
      "epoch": 29.99,
      "learning_rate": 0.001,
      "loss": 2.5466,
      "step": 156204
    },
    {
      "epoch": 30.0,
      "learning_rate": 0.001,
      "loss": 2.5479,
      "step": 156216
    },
    {
      "epoch": 30.0,
      "learning_rate": 0.001,
      "loss": 2.5438,
      "step": 156228
    },
    {
      "epoch": 30.0,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 156240
    },
    {
      "epoch": 30.0,
      "eval_ag_news_accuracy": 0.3256875,
      "eval_ag_news_bleu_score": 4.952198712747313,
      "eval_ag_news_bleu_score_sem": 0.16501101027865148,
      "eval_ag_news_emb_cos_sim": 0.8201794028282166,
      "eval_ag_news_emb_cos_sim_sem": 0.006350625230437762,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.50901460647583,
      "eval_ag_news_n_ngrams_match_1": 14.114,
      "eval_ag_news_n_ngrams_match_2": 3.146,
      "eval_ag_news_n_ngrams_match_3": 0.874,
      "eval_ag_news_num_pred_words": 46.482,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.415324311283904,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35395600557735385,
      "eval_ag_news_runtime": 10.4507,
      "eval_ag_news_samples_per_second": 47.844,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3526154363298412,
      "eval_ag_news_token_set_f1_sem": 0.00427935192668522,
      "eval_ag_news_token_set_precision": 0.3382957857110494,
      "eval_ag_news_token_set_recall": 0.3839005172887486,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 156250
    },
    {
      "epoch": 30.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.11584375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.191813219317776,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12393265672174822,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6729031801223755,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008686339037549727,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2171788215637207,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.27,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.982,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.722,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.95761089478656,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2141080923860717,
      "eval_anthropic_toxic_prompts_runtime": 9.9675,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.163,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3591335679912616,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006652981611880587,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4425810020779211,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3308347329763512,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 156250
    },
    {
      "epoch": 30.0,
      "eval_arxiv_accuracy": 0.351125,
      "eval_arxiv_bleu_score": 4.449350568299391,
      "eval_arxiv_bleu_score_sem": 0.13938156090728518,
      "eval_arxiv_emb_cos_sim": 0.7611921429634094,
      "eval_arxiv_emb_cos_sim_sem": 0.009035034403259529,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.371891975402832,
      "eval_arxiv_n_ngrams_match_1": 15.102,
      "eval_arxiv_n_ngrams_match_2": 3.018,
      "eval_arxiv_n_ngrams_match_3": 0.708,
      "eval_arxiv_num_pred_words": 40.05,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.133594992903735,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.362784949262172,
      "eval_arxiv_runtime": 10.2624,
      "eval_arxiv_samples_per_second": 48.722,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3536816353242398,
      "eval_arxiv_token_set_f1_sem": 0.0044563905631548,
      "eval_arxiv_token_set_precision": 0.3053094810539887,
      "eval_arxiv_token_set_recall": 0.4369377355367627,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 156250
    },
    {
      "epoch": 30.0,
      "eval_python_code_alpaca_accuracy": 0.161875,
      "eval_python_code_alpaca_bleu_score": 4.567276534819873,
      "eval_python_code_alpaca_bleu_score_sem": 0.14176757614863209,
      "eval_python_code_alpaca_emb_cos_sim": 0.7530335187911987,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009529549962282929,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8673653602600098,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.944,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.946,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.994,
      "eval_python_code_alpaca_num_pred_words": 44.478,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.590612169222595,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33423296055930685,
      "eval_python_code_alpaca_runtime": 10.1686,
      "eval_python_code_alpaca_samples_per_second": 49.171,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4795782777959037,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0055880244121022426,
      "eval_python_code_alpaca_token_set_precision": 0.5420998372696966,
      "eval_python_code_alpaca_token_set_recall": 0.452644518300551,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 156250
    },
    {
      "epoch": 30.0,
      "eval_wikibio_accuracy": 0.325375,
      "eval_wikibio_bleu_score": 6.2310717422902435,
      "eval_wikibio_bleu_score_sem": 0.2217136427339964,
      "eval_wikibio_emb_cos_sim": 0.7579187154769897,
      "eval_wikibio_emb_cos_sim_sem": 0.008380238330616248,
      "eval_wikibio_emb_top1_equal": 0.1328125,
      "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.71551775932312,
      "eval_wikibio_n_ngrams_match_1": 10.214,
      "eval_wikibio_n_ngrams_match_2": 3.502,
      "eval_wikibio_n_ngrams_match_3": 1.302,
      "eval_wikibio_num_pred_words": 35.902,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.07985105498431,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3641476729697386,
      "eval_wikibio_runtime": 10.281,
      "eval_wikibio_samples_per_second": 48.633,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.32486307207343096,
      "eval_wikibio_token_set_f1_sem": 0.0054074189432803885,
      "eval_wikibio_token_set_precision": 0.33213279154953373,
      "eval_wikibio_token_set_recall": 0.33403978678338864,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 156250
    },
    {
      "epoch": 30.0,
      "eval_nq_accuracy": 0.53334375,
      "eval_nq_bleu_score": 12.008924948458075,
      "eval_nq_bleu_score_sem": 0.4770210262206687,
      "eval_nq_emb_cos_sim": 0.8401103019714355,
      "eval_nq_emb_cos_sim_sem": 0.006983090058815349,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1568565368652344,
      "eval_nq_n_ngrams_match_1": 23.262,
      "eval_nq_n_ngrams_match_2": 8.612,
      "eval_nq_n_ngrams_match_3": 4.056,
      "eval_nq_num_pred_words": 48.97,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.643923053361304,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4529741489123078,
      "eval_nq_runtime": 10.7694,
      "eval_nq_samples_per_second": 46.428,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.46731023699601365,
      "eval_nq_token_set_f1_sem": 0.004965429608990865,
      "eval_nq_token_set_precision": 0.4245477684377968,
      "eval_nq_token_set_recall": 0.5285678036747155,
      "eval_nq_true_num_tokens": 64.0,
      "step": 156250
    },
    {
      "epoch": 30.0,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 156252
    },
    {
      "epoch": 30.0,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 156264
    },
    {
      "epoch": 30.01,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 156276
    },
    {
      "epoch": 30.01,
      "learning_rate": 0.001,
      "loss": 2.5287,
      "step": 156288
    },
    {
      "epoch": 30.01,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 156300
    },
    {
      "epoch": 30.01,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 156312
    },
    {
      "epoch": 30.02,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 156324
    },
    {
      "epoch": 30.02,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 156336
    },
    {
      "epoch": 30.02,
      "learning_rate": 0.001,
      "loss": 2.5269,
      "step": 156348
    },
    {
      "epoch": 30.02,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 156360
    },
    {
      "epoch": 30.03,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 156372
    },
    {
      "epoch": 30.03,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 156384
    },
    {
      "epoch": 30.03,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 156396
    },
    {
      "epoch": 30.03,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 156408
    },
    {
      "epoch": 30.03,
      "learning_rate": 0.001,
      "loss": 2.5168,
      "step": 156420
    },
    {
      "epoch": 30.04,
      "learning_rate": 0.001,
      "loss": 2.5318,
      "step": 156432
    },
    {
      "epoch": 30.04,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 156444
    },
    {
      "epoch": 30.04,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 156456
    },
    {
      "epoch": 30.04,
      "learning_rate": 0.001,
      "loss": 2.5206,
      "step": 156468
    },
    {
      "epoch": 30.05,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 156480
    },
    {
      "epoch": 30.05,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 156492
    },
    {
      "epoch": 30.05,
      "learning_rate": 0.001,
      "loss": 2.5239,
      "step": 156504
    },
    {
      "epoch": 30.05,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 156516
    },
    {
      "epoch": 30.06,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 156528
    },
    {
      "epoch": 30.06,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 156540
    },
    {
      "epoch": 30.06,
      "learning_rate": 0.001,
      "loss": 2.5171,
      "step": 156552
    },
    {
      "epoch": 30.06,
      "learning_rate": 0.001,
      "loss": 2.5256,
      "step": 156564
    },
    {
      "epoch": 30.06,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 156576
    },
    {
      "epoch": 30.07,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 156588
    },
    {
      "epoch": 30.07,
      "learning_rate": 0.001,
      "loss": 2.5246,
      "step": 156600
    },
    {
      "epoch": 30.07,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 156612
    },
    {
      "epoch": 30.07,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 156624
    },
    {
      "epoch": 30.08,
      "learning_rate": 0.001,
      "loss": 2.527,
      "step": 156636
    },
    {
      "epoch": 30.08,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 156648
    },
    {
      "epoch": 30.08,
      "learning_rate": 0.001,
      "loss": 2.5227,
      "step": 156660
    },
    {
      "epoch": 30.08,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 156672
    },
    {
      "epoch": 30.09,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 156684
    },
    {
      "epoch": 30.09,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 156696
    },
    {
      "epoch": 30.09,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 156708
    },
    {
      "epoch": 30.09,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 156720
    },
    {
      "epoch": 30.09,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 156732
    },
    {
      "epoch": 30.1,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 156744
    },
    {
      "epoch": 30.1,
      "learning_rate": 0.001,
      "loss": 2.5385,
      "step": 156756
    },
    {
      "epoch": 30.1,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 156768
    },
    {
      "epoch": 30.1,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 156780
    },
    {
      "epoch": 30.11,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 156792
    },
    {
      "epoch": 30.11,
      "learning_rate": 0.001,
      "loss": 2.5223,
      "step": 156804
    },
    {
      "epoch": 30.11,
      "learning_rate": 0.001,
      "loss": 2.5153,
      "step": 156816
    },
    {
      "epoch": 30.11,
      "learning_rate": 0.001,
      "loss": 2.5284,
      "step": 156828
    },
    {
      "epoch": 30.12,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 156840
    },
    {
      "epoch": 30.12,
      "learning_rate": 0.001,
      "loss": 2.5307,
      "step": 156852
    },
    {
      "epoch": 30.12,
      "learning_rate": 0.001,
      "loss": 2.5375,
      "step": 156864
    },
    {
      "epoch": 30.12,
      "eval_ag_news_accuracy": 0.32309375,
      "eval_ag_news_bleu_score": 4.8949900242191315,
      "eval_ag_news_bleu_score_sem": 0.1529257653334249,
      "eval_ag_news_emb_cos_sim": 0.814081072807312,
      "eval_ag_news_emb_cos_sim_sem": 0.007254983329826175,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.520991325378418,
      "eval_ag_news_n_ngrams_match_1": 14.25,
      "eval_ag_news_n_ngrams_match_2": 3.252,
      "eval_ag_news_n_ngrams_match_3": 0.946,
      "eval_ag_news_num_pred_words": 47.008,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.81793643108617,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35446625271231846,
      "eval_ag_news_runtime": 10.9418,
      "eval_ag_news_samples_per_second": 45.696,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.355520500036169,
      "eval_ag_news_token_set_f1_sem": 0.004403190894155138,
      "eval_ag_news_token_set_precision": 0.33965718467372263,
      "eval_ag_news_token_set_recall": 0.3893419219446139,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 156875
    },
    {
      "epoch": 30.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.115,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1505349882027933,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12474808992202031,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6709076166152954,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009117380125626703,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.248901605606079,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.206,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.924,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.024,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.76202750435498,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21068908037830206,
      "eval_anthropic_toxic_prompts_runtime": 9.9427,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.288,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35837125724281116,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006537739562108545,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43762910921461895,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32986004706646394,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 156875
    },
    {
      "epoch": 30.12,
      "eval_arxiv_accuracy": 0.35159375,
      "eval_arxiv_bleu_score": 4.407643662944918,
      "eval_arxiv_bleu_score_sem": 0.13207412593659715,
      "eval_arxiv_emb_cos_sim": 0.7672524452209473,
      "eval_arxiv_emb_cos_sim_sem": 0.00845611121901446,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3698806762695312,
      "eval_arxiv_n_ngrams_match_1": 15.022,
      "eval_arxiv_n_ngrams_match_2": 2.942,
      "eval_arxiv_n_ngrams_match_3": 0.706,
      "eval_arxiv_num_pred_words": 40.666,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.07505750647602,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3595043837157954,
      "eval_arxiv_runtime": 10.3444,
      "eval_arxiv_samples_per_second": 48.336,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3499389028075848,
      "eval_arxiv_token_set_f1_sem": 0.004091876456838303,
      "eval_arxiv_token_set_precision": 0.30276978660801523,
      "eval_arxiv_token_set_recall": 0.43161993113824787,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 156875
    },
    {
      "epoch": 30.12,
      "eval_python_code_alpaca_accuracy": 0.16190625,
      "eval_python_code_alpaca_bleu_score": 4.546297420719376,
      "eval_python_code_alpaca_bleu_score_sem": 0.1379698677714443,
      "eval_python_code_alpaca_emb_cos_sim": 0.7658563256263733,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007070094983576502,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.872417449951172,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.078,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.944,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.99,
      "eval_python_code_alpaca_num_pred_words": 44.704,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.679706386110873,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33324354563833325,
      "eval_python_code_alpaca_runtime": 10.0571,
      "eval_python_code_alpaca_samples_per_second": 49.716,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.48325844493624837,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054426218348746285,
      "eval_python_code_alpaca_token_set_precision": 0.5516745554128879,
      "eval_python_code_alpaca_token_set_recall": 0.4511151282955253,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 156875
    },
    {
      "epoch": 30.12,
      "eval_wikibio_accuracy": 0.3256875,
      "eval_wikibio_bleu_score": 6.337041882672664,
      "eval_wikibio_bleu_score_sem": 0.2202144506244899,
      "eval_wikibio_emb_cos_sim": 0.754089891910553,
      "eval_wikibio_emb_cos_sim_sem": 0.00780568590627066,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7035880088806152,
      "eval_wikibio_n_ngrams_match_1": 10.374,
      "eval_wikibio_n_ngrams_match_2": 3.604,
      "eval_wikibio_n_ngrams_match_3": 1.38,
      "eval_wikibio_num_pred_words": 36.792,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.592690314383326,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36584263134246897,
      "eval_wikibio_runtime": 10.0408,
      "eval_wikibio_samples_per_second": 49.797,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.32506640393263425,
      "eval_wikibio_token_set_f1_sem": 0.005165435102722425,
      "eval_wikibio_token_set_precision": 0.3362564179071182,
      "eval_wikibio_token_set_recall": 0.3290020871295805,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 156875
    },
    {
      "epoch": 30.12,
      "eval_nq_accuracy": 0.5323125,
      "eval_nq_bleu_score": 12.059231370279635,
      "eval_nq_bleu_score_sem": 0.47247935205452724,
      "eval_nq_emb_cos_sim": 0.837883710861206,
      "eval_nq_emb_cos_sim_sem": 0.007197772275695176,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.159616708755493,
      "eval_nq_n_ngrams_match_1": 23.338,
      "eval_nq_n_ngrams_match_2": 8.78,
      "eval_nq_n_ngrams_match_3": 4.03,
      "eval_nq_num_pred_words": 49.172,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.667814724185542,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4547128175221566,
      "eval_nq_runtime": 12.063,
      "eval_nq_samples_per_second": 41.449,
      "eval_nq_steps_per_second": 0.083,
      "eval_nq_token_set_f1": 0.46948511725358355,
      "eval_nq_token_set_f1_sem": 0.004769747528186598,
      "eval_nq_token_set_precision": 0.42531129812185897,
      "eval_nq_token_set_recall": 0.5323654080304644,
      "eval_nq_true_num_tokens": 64.0,
      "step": 156875
    },
    {
      "epoch": 30.12,
      "learning_rate": 0.001,
      "loss": 2.5319,
      "step": 156876
    },
    {
      "epoch": 30.12,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 156888
    },
    {
      "epoch": 30.13,
      "learning_rate": 0.001,
      "loss": 2.527,
      "step": 156900
    },
    {
      "epoch": 30.13,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 156912
    },
    {
      "epoch": 30.13,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 156924
    },
    {
      "epoch": 30.13,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 156936
    },
    {
      "epoch": 30.14,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 156948
    },
    {
      "epoch": 30.14,
      "learning_rate": 0.001,
      "loss": 2.5227,
      "step": 156960
    },
    {
      "epoch": 30.14,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 156972
    },
    {
      "epoch": 30.14,
      "learning_rate": 0.001,
      "loss": 2.5305,
      "step": 156984
    },
    {
      "epoch": 30.15,
      "learning_rate": 0.001,
      "loss": 2.5322,
      "step": 156996
    },
    {
      "epoch": 30.15,
      "learning_rate": 0.001,
      "loss": 2.5208,
      "step": 157008
    },
    {
      "epoch": 30.15,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 157020
    },
    {
      "epoch": 30.15,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 157032
    },
    {
      "epoch": 30.15,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 157044
    },
    {
      "epoch": 30.16,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 157056
    },
    {
      "epoch": 30.16,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 157068
    },
    {
      "epoch": 30.16,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 157080
    },
    {
      "epoch": 30.16,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 157092
    },
    {
      "epoch": 30.17,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 157104
    },
    {
      "epoch": 30.17,
      "learning_rate": 0.001,
      "loss": 2.5388,
      "step": 157116
    },
    {
      "epoch": 30.17,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 157128
    },
    {
      "epoch": 30.17,
      "learning_rate": 0.001,
      "loss": 2.5315,
      "step": 157140
    },
    {
      "epoch": 30.18,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 157152
    },
    {
      "epoch": 30.18,
      "learning_rate": 0.001,
      "loss": 2.526,
      "step": 157164
    },
    {
      "epoch": 30.18,
      "learning_rate": 0.001,
      "loss": 2.5325,
      "step": 157176
    },
    {
      "epoch": 30.18,
      "learning_rate": 0.001,
      "loss": 2.5257,
      "step": 157188
    },
    {
      "epoch": 30.18,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 157200
    },
    {
      "epoch": 30.19,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 157212
    },
    {
      "epoch": 30.19,
      "learning_rate": 0.001,
      "loss": 2.5246,
      "step": 157224
    },
    {
      "epoch": 30.19,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 157236
    },
    {
      "epoch": 30.19,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 157248
    },
    {
      "epoch": 30.2,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 157260
    },
    {
      "epoch": 30.2,
      "learning_rate": 0.001,
      "loss": 2.5318,
      "step": 157272
    },
    {
      "epoch": 30.2,
      "learning_rate": 0.001,
      "loss": 2.5275,
      "step": 157284
    },
    {
      "epoch": 30.2,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 157296
    },
    {
      "epoch": 30.21,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 157308
    },
    {
      "epoch": 30.21,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 157320
    },
    {
      "epoch": 30.21,
      "learning_rate": 0.001,
      "loss": 2.5226,
      "step": 157332
    },
    {
      "epoch": 30.21,
      "learning_rate": 0.001,
      "loss": 2.5247,
      "step": 157344
    },
    {
      "epoch": 30.21,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 157356
    },
    {
      "epoch": 30.22,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 157368
    },
    {
      "epoch": 30.22,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 157380
    },
    {
      "epoch": 30.22,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 157392
    },
    {
      "epoch": 30.22,
      "learning_rate": 0.001,
      "loss": 2.5354,
      "step": 157404
    },
    {
      "epoch": 30.23,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 157416
    },
    {
      "epoch": 30.23,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 157428
    },
    {
      "epoch": 30.23,
      "learning_rate": 0.001,
      "loss": 2.5336,
      "step": 157440
    },
    {
      "epoch": 30.23,
      "learning_rate": 0.001,
      "loss": 2.5336,
      "step": 157452
    },
    {
      "epoch": 30.24,
      "learning_rate": 0.001,
      "loss": 2.5264,
      "step": 157464
    },
    {
      "epoch": 30.24,
      "learning_rate": 0.001,
      "loss": 2.5224,
      "step": 157476
    },
    {
      "epoch": 30.24,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 157488
    },
    {
      "epoch": 30.24,
      "learning_rate": 0.001,
      "loss": 2.5307,
      "step": 157500
    },
    {
      "epoch": 30.24,
      "eval_ag_news_accuracy": 0.32434375,
      "eval_ag_news_bleu_score": 4.758053657351796,
      "eval_ag_news_bleu_score_sem": 0.143581842791104,
      "eval_ag_news_emb_cos_sim": 0.8139164447784424,
      "eval_ag_news_emb_cos_sim_sem": 0.006493669000105704,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5309855937957764,
      "eval_ag_news_n_ngrams_match_1": 14.186,
      "eval_ag_news_n_ngrams_match_2": 3.038,
      "eval_ag_news_n_ngrams_match_3": 0.856,
      "eval_ag_news_num_pred_words": 46.49,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 34.15761656489712,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35223402904055356,
      "eval_ag_news_runtime": 10.431,
      "eval_ag_news_samples_per_second": 47.934,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3543089371065836,
      "eval_ag_news_token_set_f1_sem": 0.004347235781361584,
      "eval_ag_news_token_set_precision": 0.3391530198503155,
      "eval_ag_news_token_set_recall": 0.38411816975795515,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 157500
    },
    {
      "epoch": 30.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.115125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1001371986036967,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11950594486831811,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6717466115951538,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009021263579053102,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2424442768096924,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.11,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.878,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.852,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.596209568939248,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21005113731368338,
      "eval_anthropic_toxic_prompts_runtime": 11.0359,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.307,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.091,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35156103128418076,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00657098766161286,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43484267135667165,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3257795283642292,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 157500
    },
    {
      "epoch": 30.24,
      "eval_arxiv_accuracy": 0.34884375,
      "eval_arxiv_bleu_score": 4.363456484179617,
      "eval_arxiv_bleu_score_sem": 0.12581095029683897,
      "eval_arxiv_emb_cos_sim": 0.7542411088943481,
      "eval_arxiv_emb_cos_sim_sem": 0.010374416467909198,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.376727819442749,
      "eval_arxiv_n_ngrams_match_1": 15.202,
      "eval_arxiv_n_ngrams_match_2": 2.982,
      "eval_arxiv_n_ngrams_match_3": 0.66,
      "eval_arxiv_num_pred_words": 40.176,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.27482171459167,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3649618898857102,
      "eval_arxiv_runtime": 10.3282,
      "eval_arxiv_samples_per_second": 48.411,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3557000234224597,
      "eval_arxiv_token_set_f1_sem": 0.004178592382223671,
      "eval_arxiv_token_set_precision": 0.3097388394222354,
      "eval_arxiv_token_set_recall": 0.4354133024821057,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 157500
    },
    {
      "epoch": 30.24,
      "eval_python_code_alpaca_accuracy": 0.16028125,
      "eval_python_code_alpaca_bleu_score": 4.6251060544518,
      "eval_python_code_alpaca_bleu_score_sem": 0.15235364170221244,
      "eval_python_code_alpaca_emb_cos_sim": 0.7514203786849976,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010706493462960057,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8958547115325928,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.718,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.858,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.97,
      "eval_python_code_alpaca_num_pred_words": 41.97,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.098964225657987,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33508325114583526,
      "eval_python_code_alpaca_runtime": 10.1957,
      "eval_python_code_alpaca_samples_per_second": 49.04,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4731767436339594,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005633077997519158,
      "eval_python_code_alpaca_token_set_precision": 0.5328896362917185,
      "eval_python_code_alpaca_token_set_recall": 0.45465780675229217,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 157500
    },
    {
      "epoch": 30.24,
      "eval_wikibio_accuracy": 0.32725,
      "eval_wikibio_bleu_score": 6.289617508885937,
      "eval_wikibio_bleu_score_sem": 0.21580508245872906,
      "eval_wikibio_emb_cos_sim": 0.7436895370483398,
      "eval_wikibio_emb_cos_sim_sem": 0.009231857551627372,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6938133239746094,
      "eval_wikibio_n_ngrams_match_1": 10.272,
      "eval_wikibio_n_ngrams_match_2": 3.532,
      "eval_wikibio_n_ngrams_match_3": 1.31,
      "eval_wikibio_num_pred_words": 35.952,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.19784245755524,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3617194131029448,
      "eval_wikibio_runtime": 10.2425,
      "eval_wikibio_samples_per_second": 48.816,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.3246204881331519,
      "eval_wikibio_token_set_f1_sem": 0.005348375476585237,
      "eval_wikibio_token_set_precision": 0.331537223025125,
      "eval_wikibio_token_set_recall": 0.33399686531592665,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 157500
    },
    {
      "epoch": 30.24,
      "eval_nq_accuracy": 0.5305625,
      "eval_nq_bleu_score": 11.512087301446106,
      "eval_nq_bleu_score_sem": 0.46493590767167253,
      "eval_nq_emb_cos_sim": 0.8377481698989868,
      "eval_nq_emb_cos_sim_sem": 0.007061162485870224,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1584322452545166,
      "eval_nq_n_ngrams_match_1": 23.08,
      "eval_nq_n_ngrams_match_2": 8.334,
      "eval_nq_n_ngrams_match_3": 3.806,
      "eval_nq_num_pred_words": 48.712,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.657554091883345,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4506501281163242,
      "eval_nq_runtime": 10.6207,
      "eval_nq_samples_per_second": 47.078,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.46324773590136453,
      "eval_nq_token_set_f1_sem": 0.004793307939318403,
      "eval_nq_token_set_precision": 0.42221918452776047,
      "eval_nq_token_set_recall": 0.5208339723322124,
      "eval_nq_true_num_tokens": 64.0,
      "step": 157500
    },
    {
      "epoch": 30.24,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 157512
    },
    {
      "epoch": 30.25,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 157524
    },
    {
      "epoch": 30.25,
      "learning_rate": 0.001,
      "loss": 2.5259,
      "step": 157536
    },
    {
      "epoch": 30.25,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 157548
    },
    {
      "epoch": 30.25,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 157560
    },
    {
      "epoch": 30.26,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 157572
    },
    {
      "epoch": 30.26,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 157584
    },
    {
      "epoch": 30.26,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 157596
    },
    {
      "epoch": 30.26,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 157608
    },
    {
      "epoch": 30.26,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 157620
    },
    {
      "epoch": 30.27,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 157632
    },
    {
      "epoch": 30.27,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 157644
    },
    {
      "epoch": 30.27,
      "learning_rate": 0.001,
      "loss": 2.5228,
      "step": 157656
    },
    {
      "epoch": 30.27,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 157668
    },
    {
      "epoch": 30.28,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 157680
    },
    {
      "epoch": 30.28,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 157692
    },
    {
      "epoch": 30.28,
      "learning_rate": 0.001,
      "loss": 2.5307,
      "step": 157704
    },
    {
      "epoch": 30.28,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 157716
    },
    {
      "epoch": 30.29,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 157728
    },
    {
      "epoch": 30.29,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 157740
    },
    {
      "epoch": 30.29,
      "learning_rate": 0.001,
      "loss": 2.5257,
      "step": 157752
    },
    {
      "epoch": 30.29,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 157764
    },
    {
      "epoch": 30.29,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 157776
    },
    {
      "epoch": 30.3,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 157788
    },
    {
      "epoch": 30.3,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 157800
    },
    {
      "epoch": 30.3,
      "learning_rate": 0.001,
      "loss": 2.5354,
      "step": 157812
    },
    {
      "epoch": 30.3,
      "learning_rate": 0.001,
      "loss": 2.5357,
      "step": 157824
    },
    {
      "epoch": 30.31,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 157836
    },
    {
      "epoch": 30.31,
      "learning_rate": 0.001,
      "loss": 2.5201,
      "step": 157848
    },
    {
      "epoch": 30.31,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 157860
    },
    {
      "epoch": 30.31,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 157872
    },
    {
      "epoch": 30.32,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 157884
    },
    {
      "epoch": 30.32,
      "learning_rate": 0.001,
      "loss": 2.5381,
      "step": 157896
    },
    {
      "epoch": 30.32,
      "learning_rate": 0.001,
      "loss": 2.5302,
      "step": 157908
    },
    {
      "epoch": 30.32,
      "learning_rate": 0.001,
      "loss": 2.5219,
      "step": 157920
    },
    {
      "epoch": 30.32,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 157932
    },
    {
      "epoch": 30.33,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 157944
    },
    {
      "epoch": 30.33,
      "learning_rate": 0.001,
      "loss": 2.5351,
      "step": 157956
    },
    {
      "epoch": 30.33,
      "learning_rate": 0.001,
      "loss": 2.5305,
      "step": 157968
    },
    {
      "epoch": 30.33,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 157980
    },
    {
      "epoch": 30.34,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 157992
    },
    {
      "epoch": 30.34,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 158004
    },
    {
      "epoch": 30.34,
      "learning_rate": 0.001,
      "loss": 2.5209,
      "step": 158016
    },
    {
      "epoch": 30.34,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 158028
    },
    {
      "epoch": 30.35,
      "learning_rate": 0.001,
      "loss": 2.5257,
      "step": 158040
    },
    {
      "epoch": 30.35,
      "learning_rate": 0.001,
      "loss": 2.5315,
      "step": 158052
    },
    {
      "epoch": 30.35,
      "learning_rate": 0.001,
      "loss": 2.5284,
      "step": 158064
    },
    {
      "epoch": 30.35,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 158076
    },
    {
      "epoch": 30.35,
      "learning_rate": 0.001,
      "loss": 2.5343,
      "step": 158088
    },
    {
      "epoch": 30.36,
      "learning_rate": 0.001,
      "loss": 2.5242,
      "step": 158100
    },
    {
      "epoch": 30.36,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 158112
    },
    {
      "epoch": 30.36,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 158124
    },
    {
      "epoch": 30.36,
      "eval_ag_news_accuracy": 0.32471875,
      "eval_ag_news_bleu_score": 4.8427856844103605,
      "eval_ag_news_bleu_score_sem": 0.15012938702407316,
      "eval_ag_news_emb_cos_sim": 0.8136986494064331,
      "eval_ag_news_emb_cos_sim_sem": 0.00657294590506568,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5194272994995117,
      "eval_ag_news_n_ngrams_match_1": 14.18,
      "eval_ag_news_n_ngrams_match_2": 3.168,
      "eval_ag_news_n_ngrams_match_3": 0.9,
      "eval_ag_news_num_pred_words": 46.76,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.765085644109554,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3533370736970962,
      "eval_ag_news_runtime": 10.4041,
      "eval_ag_news_samples_per_second": 48.058,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35501028044739813,
      "eval_ag_news_token_set_f1_sem": 0.004213580761439838,
      "eval_ag_news_token_set_precision": 0.34015320444333863,
      "eval_ag_news_token_set_recall": 0.3854947543851968,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 158125
    },
    {
      "epoch": 30.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.11484375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.071500101829459,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11258027794072333,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.674710750579834,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008671535615554784,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2308311462402344,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.28,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.91,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.708,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.426,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.30067679687331,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21361380952808293,
      "eval_anthropic_toxic_prompts_runtime": 9.8524,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.749,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35759965417590095,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006503850569137356,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44059801740878096,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3262328269793172,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 158125
    },
    {
      "epoch": 30.36,
      "eval_arxiv_accuracy": 0.34834375,
      "eval_arxiv_bleu_score": 4.2978621528238,
      "eval_arxiv_bleu_score_sem": 0.12290908491861556,
      "eval_arxiv_emb_cos_sim": 0.7749967575073242,
      "eval_arxiv_emb_cos_sim_sem": 0.006363304846394461,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.382624626159668,
      "eval_arxiv_n_ngrams_match_1": 15.282,
      "eval_arxiv_n_ngrams_match_2": 2.946,
      "eval_arxiv_n_ngrams_match_3": 0.638,
      "eval_arxiv_num_pred_words": 40.834,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.447959658710285,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3652486584506256,
      "eval_arxiv_runtime": 10.2121,
      "eval_arxiv_samples_per_second": 48.962,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3617443614581526,
      "eval_arxiv_token_set_f1_sem": 0.004176899718743156,
      "eval_arxiv_token_set_precision": 0.31270599370369495,
      "eval_arxiv_token_set_recall": 0.4465882658497927,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 158125
    },
    {
      "epoch": 30.36,
      "eval_python_code_alpaca_accuracy": 0.162,
      "eval_python_code_alpaca_bleu_score": 4.275658775053929,
      "eval_python_code_alpaca_bleu_score_sem": 0.13693856885349182,
      "eval_python_code_alpaca_emb_cos_sim": 0.7551605701446533,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008493371667276123,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8826234340667725,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.8,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.724,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.866,
      "eval_python_code_alpaca_num_pred_words": 44.016,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.861069106922283,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32812303963758704,
      "eval_python_code_alpaca_runtime": 9.9485,
      "eval_python_code_alpaca_samples_per_second": 50.259,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4727704776241421,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005521458940805648,
      "eval_python_code_alpaca_token_set_precision": 0.538321190845166,
      "eval_python_code_alpaca_token_set_recall": 0.44409053704885754,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 158125
    },
    {
      "epoch": 30.36,
      "eval_wikibio_accuracy": 0.32465625,
      "eval_wikibio_bleu_score": 5.942625832732892,
      "eval_wikibio_bleu_score_sem": 0.22032237041499633,
      "eval_wikibio_emb_cos_sim": 0.731481671333313,
      "eval_wikibio_emb_cos_sim_sem": 0.009106736070744179,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.69866681098938,
      "eval_wikibio_n_ngrams_match_1": 9.924,
      "eval_wikibio_n_ngrams_match_2": 3.316,
      "eval_wikibio_n_ngrams_match_3": 1.222,
      "eval_wikibio_num_pred_words": 35.518,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.39341638779324,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3507729288554753,
      "eval_wikibio_runtime": 10.0362,
      "eval_wikibio_samples_per_second": 49.82,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3181687918634861,
      "eval_wikibio_token_set_f1_sem": 0.005317006306493984,
      "eval_wikibio_token_set_precision": 0.32243025625466,
      "eval_wikibio_token_set_recall": 0.333261812433212,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 158125
    },
    {
      "epoch": 30.36,
      "eval_nq_accuracy": 0.53153125,
      "eval_nq_bleu_score": 11.486748725474277,
      "eval_nq_bleu_score_sem": 0.46453246893931965,
      "eval_nq_emb_cos_sim": 0.8362016677856445,
      "eval_nq_emb_cos_sim_sem": 0.007298759417449818,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1573336124420166,
      "eval_nq_n_ngrams_match_1": 23.212,
      "eval_nq_n_ngrams_match_2": 8.456,
      "eval_nq_n_ngrams_match_3": 3.814,
      "eval_nq_num_pred_words": 49.484,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.64804784177732,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4490733508704824,
      "eval_nq_runtime": 11.553,
      "eval_nq_samples_per_second": 43.279,
      "eval_nq_steps_per_second": 0.087,
      "eval_nq_token_set_f1": 0.4616884557122342,
      "eval_nq_token_set_f1_sem": 0.00483469966089439,
      "eval_nq_token_set_precision": 0.42239282339596934,
      "eval_nq_token_set_recall": 0.517920002163779,
      "eval_nq_true_num_tokens": 64.0,
      "step": 158125
    },
    {
      "epoch": 30.36,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 158136
    },
    {
      "epoch": 30.37,
      "learning_rate": 0.001,
      "loss": 2.5305,
      "step": 158148
    },
    {
      "epoch": 30.37,
      "learning_rate": 0.001,
      "loss": 2.5319,
      "step": 158160
    },
    {
      "epoch": 30.37,
      "learning_rate": 0.001,
      "loss": 2.5394,
      "step": 158172
    },
    {
      "epoch": 30.37,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 158184
    },
    {
      "epoch": 30.38,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 158196
    },
    {
      "epoch": 30.38,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 158208
    },
    {
      "epoch": 30.38,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 158220
    },
    {
      "epoch": 30.38,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 158232
    },
    {
      "epoch": 30.38,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 158244
    },
    {
      "epoch": 30.39,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 158256
    },
    {
      "epoch": 30.39,
      "learning_rate": 0.001,
      "loss": 2.5394,
      "step": 158268
    },
    {
      "epoch": 30.39,
      "learning_rate": 0.001,
      "loss": 2.5318,
      "step": 158280
    },
    {
      "epoch": 30.39,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 158292
    },
    {
      "epoch": 30.4,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 158304
    },
    {
      "epoch": 30.4,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 158316
    },
    {
      "epoch": 30.4,
      "learning_rate": 0.001,
      "loss": 2.5247,
      "step": 158328
    },
    {
      "epoch": 30.4,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 158340
    },
    {
      "epoch": 30.41,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 158352
    },
    {
      "epoch": 30.41,
      "learning_rate": 0.001,
      "loss": 2.5242,
      "step": 158364
    },
    {
      "epoch": 30.41,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 158376
    },
    {
      "epoch": 30.41,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 158388
    },
    {
      "epoch": 30.41,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 158400
    },
    {
      "epoch": 30.42,
      "learning_rate": 0.001,
      "loss": 2.5351,
      "step": 158412
    },
    {
      "epoch": 30.42,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 158424
    },
    {
      "epoch": 30.42,
      "learning_rate": 0.001,
      "loss": 2.5322,
      "step": 158436
    },
    {
      "epoch": 30.42,
      "learning_rate": 0.001,
      "loss": 2.529,
      "step": 158448
    },
    {
      "epoch": 30.43,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 158460
    },
    {
      "epoch": 30.43,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 158472
    },
    {
      "epoch": 30.43,
      "learning_rate": 0.001,
      "loss": 2.5247,
      "step": 158484
    },
    {
      "epoch": 30.43,
      "learning_rate": 0.001,
      "loss": 2.5232,
      "step": 158496
    },
    {
      "epoch": 30.44,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 158508
    },
    {
      "epoch": 30.44,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 158520
    },
    {
      "epoch": 30.44,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 158532
    },
    {
      "epoch": 30.44,
      "learning_rate": 0.001,
      "loss": 2.5387,
      "step": 158544
    },
    {
      "epoch": 30.44,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 158556
    },
    {
      "epoch": 30.45,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 158568
    },
    {
      "epoch": 30.45,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 158580
    },
    {
      "epoch": 30.45,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 158592
    },
    {
      "epoch": 30.45,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 158604
    },
    {
      "epoch": 30.46,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 158616
    },
    {
      "epoch": 30.46,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 158628
    },
    {
      "epoch": 30.46,
      "learning_rate": 0.001,
      "loss": 2.5242,
      "step": 158640
    },
    {
      "epoch": 30.46,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 158652
    },
    {
      "epoch": 30.47,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 158664
    },
    {
      "epoch": 30.47,
      "learning_rate": 0.001,
      "loss": 2.5348,
      "step": 158676
    },
    {
      "epoch": 30.47,
      "learning_rate": 0.001,
      "loss": 2.5253,
      "step": 158688
    },
    {
      "epoch": 30.47,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 158700
    },
    {
      "epoch": 30.47,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 158712
    },
    {
      "epoch": 30.48,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 158724
    },
    {
      "epoch": 30.48,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 158736
    },
    {
      "epoch": 30.48,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 158748
    },
    {
      "epoch": 30.48,
      "eval_ag_news_accuracy": 0.325375,
      "eval_ag_news_bleu_score": 4.878165485857089,
      "eval_ag_news_bleu_score_sem": 0.15439134540973087,
      "eval_ag_news_emb_cos_sim": 0.8145061135292053,
      "eval_ag_news_emb_cos_sim_sem": 0.007508954970596758,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.520663022994995,
      "eval_ag_news_n_ngrams_match_1": 14.314,
      "eval_ag_news_n_ngrams_match_2": 3.214,
      "eval_ag_news_n_ngrams_match_3": 0.92,
      "eval_ag_news_num_pred_words": 46.69,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.80683574424408,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3561943229560461,
      "eval_ag_news_runtime": 10.3976,
      "eval_ag_news_samples_per_second": 48.088,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3558517160863795,
      "eval_ag_news_token_set_f1_sem": 0.004551006538093029,
      "eval_ag_news_token_set_precision": 0.34212551662708557,
      "eval_ag_news_token_set_recall": 0.3844274878298257,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 158750
    },
    {
      "epoch": 30.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.115625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0588368189701205,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11557554497963778,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6634416580200195,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009347339258002713,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.219215154647827,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.198,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.706,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.324,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.008484683962216,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21079149734891484,
      "eval_anthropic_toxic_prompts_runtime": 10.2632,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.718,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3625880362589257,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006706776391805066,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4366398585090635,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33531326236850506,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 158750
    },
    {
      "epoch": 30.48,
      "eval_arxiv_accuracy": 0.35040625,
      "eval_arxiv_bleu_score": 4.368822880723829,
      "eval_arxiv_bleu_score_sem": 0.1277155185574493,
      "eval_arxiv_emb_cos_sim": 0.7667855024337769,
      "eval_arxiv_emb_cos_sim_sem": 0.008965874059031183,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3752377033233643,
      "eval_arxiv_n_ngrams_match_1": 15.296,
      "eval_arxiv_n_ngrams_match_2": 2.95,
      "eval_arxiv_n_ngrams_match_3": 0.674,
      "eval_arxiv_num_pred_words": 40.438,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.231231316306054,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36635425161089796,
      "eval_arxiv_runtime": 10.4737,
      "eval_arxiv_samples_per_second": 47.739,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.36084453968004065,
      "eval_arxiv_token_set_f1_sem": 0.004463439757308821,
      "eval_arxiv_token_set_precision": 0.313203924279303,
      "eval_arxiv_token_set_recall": 0.4453061731231374,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 158750
    },
    {
      "epoch": 30.48,
      "eval_python_code_alpaca_accuracy": 0.1638125,
      "eval_python_code_alpaca_bleu_score": 4.478360967894014,
      "eval_python_code_alpaca_bleu_score_sem": 0.14594466721567825,
      "eval_python_code_alpaca_emb_cos_sim": 0.7596727609634399,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008804581362502532,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8606534004211426,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.77,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.856,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.956,
      "eval_python_code_alpaca_num_pred_words": 44.588,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.472940033889387,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32659435648274376,
      "eval_python_code_alpaca_runtime": 9.8786,
      "eval_python_code_alpaca_samples_per_second": 50.615,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4756708808523636,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005626300794357611,
      "eval_python_code_alpaca_token_set_precision": 0.5339058879782336,
      "eval_python_code_alpaca_token_set_recall": 0.4491858371029622,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 158750
    },
    {
      "epoch": 30.48,
      "eval_wikibio_accuracy": 0.32759375,
      "eval_wikibio_bleu_score": 5.879216905551975,
      "eval_wikibio_bleu_score_sem": 0.20414104430683466,
      "eval_wikibio_emb_cos_sim": 0.7370492219924927,
      "eval_wikibio_emb_cos_sim_sem": 0.00960887757517002,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6781163215637207,
      "eval_wikibio_n_ngrams_match_1": 10.012,
      "eval_wikibio_n_ngrams_match_2": 3.352,
      "eval_wikibio_n_ngrams_match_3": 1.194,
      "eval_wikibio_num_pred_words": 35.404,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.5717833083871,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35632239314531655,
      "eval_wikibio_runtime": 10.1985,
      "eval_wikibio_samples_per_second": 49.027,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.32056417579054197,
      "eval_wikibio_token_set_f1_sem": 0.005457207044850231,
      "eval_wikibio_token_set_precision": 0.32565516678818285,
      "eval_wikibio_token_set_recall": 0.3328260382360125,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 158750
    },
    {
      "epoch": 30.48,
      "eval_nq_accuracy": 0.530875,
      "eval_nq_bleu_score": 11.861891386695568,
      "eval_nq_bleu_score_sem": 0.4922490714167595,
      "eval_nq_emb_cos_sim": 0.8299490809440613,
      "eval_nq_emb_cos_sim_sem": 0.007035008390735691,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1594924926757812,
      "eval_nq_n_ngrams_match_1": 23.234,
      "eval_nq_n_ngrams_match_2": 8.47,
      "eval_nq_n_ngrams_match_3": 3.978,
      "eval_nq_num_pred_words": 48.904,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.666738109088671,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45310891180677093,
      "eval_nq_runtime": 10.4601,
      "eval_nq_samples_per_second": 47.801,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4648715447948946,
      "eval_nq_token_set_f1_sem": 0.004991581463411779,
      "eval_nq_token_set_precision": 0.42293491387170323,
      "eval_nq_token_set_recall": 0.5248400957691678,
      "eval_nq_true_num_tokens": 64.0,
      "step": 158750
    },
    {
      "epoch": 30.48,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 158760
    },
    {
      "epoch": 30.49,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 158772
    },
    {
      "epoch": 30.49,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 158784
    },
    {
      "epoch": 30.49,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 158796
    },
    {
      "epoch": 30.49,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 158808
    },
    {
      "epoch": 30.5,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 158820
    },
    {
      "epoch": 30.5,
      "learning_rate": 0.001,
      "loss": 2.5413,
      "step": 158832
    },
    {
      "epoch": 30.5,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 158844
    },
    {
      "epoch": 30.5,
      "learning_rate": 0.001,
      "loss": 2.5336,
      "step": 158856
    },
    {
      "epoch": 30.5,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 158868
    },
    {
      "epoch": 30.51,
      "learning_rate": 0.001,
      "loss": 2.5269,
      "step": 158880
    },
    {
      "epoch": 30.51,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 158892
    },
    {
      "epoch": 30.51,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 158904
    },
    {
      "epoch": 30.51,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 158916
    },
    {
      "epoch": 30.52,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 158928
    },
    {
      "epoch": 30.52,
      "learning_rate": 0.001,
      "loss": 2.5283,
      "step": 158940
    },
    {
      "epoch": 30.52,
      "learning_rate": 0.001,
      "loss": 2.5209,
      "step": 158952
    },
    {
      "epoch": 30.52,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 158964
    },
    {
      "epoch": 30.53,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 158976
    },
    {
      "epoch": 30.53,
      "learning_rate": 0.001,
      "loss": 2.523,
      "step": 158988
    },
    {
      "epoch": 30.53,
      "learning_rate": 0.001,
      "loss": 2.5297,
      "step": 159000
    },
    {
      "epoch": 30.53,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 159012
    },
    {
      "epoch": 30.53,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 159024
    },
    {
      "epoch": 30.54,
      "learning_rate": 0.001,
      "loss": 2.5265,
      "step": 159036
    },
    {
      "epoch": 30.54,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 159048
    },
    {
      "epoch": 30.54,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 159060
    },
    {
      "epoch": 30.54,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 159072
    },
    {
      "epoch": 30.55,
      "learning_rate": 0.001,
      "loss": 2.5336,
      "step": 159084
    },
    {
      "epoch": 30.55,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 159096
    },
    {
      "epoch": 30.55,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 159108
    },
    {
      "epoch": 30.55,
      "learning_rate": 0.001,
      "loss": 2.5423,
      "step": 159120
    },
    {
      "epoch": 30.56,
      "learning_rate": 0.001,
      "loss": 2.5265,
      "step": 159132
    },
    {
      "epoch": 30.56,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 159144
    },
    {
      "epoch": 30.56,
      "learning_rate": 0.001,
      "loss": 2.5312,
      "step": 159156
    },
    {
      "epoch": 30.56,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 159168
    },
    {
      "epoch": 30.56,
      "learning_rate": 0.001,
      "loss": 2.5367,
      "step": 159180
    },
    {
      "epoch": 30.57,
      "learning_rate": 0.001,
      "loss": 2.5304,
      "step": 159192
    },
    {
      "epoch": 30.57,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 159204
    },
    {
      "epoch": 30.57,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 159216
    },
    {
      "epoch": 30.57,
      "learning_rate": 0.001,
      "loss": 2.5415,
      "step": 159228
    },
    {
      "epoch": 30.58,
      "learning_rate": 0.001,
      "loss": 2.5343,
      "step": 159240
    },
    {
      "epoch": 30.58,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 159252
    },
    {
      "epoch": 30.58,
      "learning_rate": 0.001,
      "loss": 2.5321,
      "step": 159264
    },
    {
      "epoch": 30.58,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 159276
    },
    {
      "epoch": 30.59,
      "learning_rate": 0.001,
      "loss": 2.5293,
      "step": 159288
    },
    {
      "epoch": 30.59,
      "learning_rate": 0.001,
      "loss": 2.5273,
      "step": 159300
    },
    {
      "epoch": 30.59,
      "learning_rate": 0.001,
      "loss": 2.5338,
      "step": 159312
    },
    {
      "epoch": 30.59,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 159324
    },
    {
      "epoch": 30.59,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 159336
    },
    {
      "epoch": 30.6,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 159348
    },
    {
      "epoch": 30.6,
      "learning_rate": 0.001,
      "loss": 2.5302,
      "step": 159360
    },
    {
      "epoch": 30.6,
      "learning_rate": 0.001,
      "loss": 2.5305,
      "step": 159372
    },
    {
      "epoch": 30.6,
      "eval_ag_news_accuracy": 0.32678125,
      "eval_ag_news_bleu_score": 4.923118249282192,
      "eval_ag_news_bleu_score_sem": 0.14889419402993623,
      "eval_ag_news_emb_cos_sim": 0.8170584440231323,
      "eval_ag_news_emb_cos_sim_sem": 0.007157052493133081,
      "eval_ag_news_emb_top1_equal": 0.2890625,
      "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5106892585754395,
      "eval_ag_news_n_ngrams_match_1": 14.342,
      "eval_ag_news_n_ngrams_match_2": 3.282,
      "eval_ag_news_n_ngrams_match_3": 0.896,
      "eval_ag_news_num_pred_words": 46.724,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.47133023643213,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3555692901811652,
      "eval_ag_news_runtime": 10.4647,
      "eval_ag_news_samples_per_second": 47.78,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3569700947398814,
      "eval_ag_news_token_set_f1_sem": 0.00432541952931231,
      "eval_ag_news_token_set_precision": 0.3426346521508827,
      "eval_ag_news_token_set_recall": 0.3863496905375196,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 159375
    },
    {
      "epoch": 30.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.11646875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2010010375991067,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12109595737703649,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.674258828163147,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009089438082152996,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2164714336395264,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.306,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.964,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.17,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.939962425115947,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21506010858386138,
      "eval_anthropic_toxic_prompts_runtime": 10.2099,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.972,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3638356274635589,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006679943671621058,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44706926628230775,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3326622467851707,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 159375
    },
    {
      "epoch": 30.6,
      "eval_arxiv_accuracy": 0.35059375,
      "eval_arxiv_bleu_score": 4.341593214572007,
      "eval_arxiv_bleu_score_sem": 0.12876889998663138,
      "eval_arxiv_emb_cos_sim": 0.7648574709892273,
      "eval_arxiv_emb_cos_sim_sem": 0.010043040822759341,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3589510917663574,
      "eval_arxiv_n_ngrams_match_1": 15.128,
      "eval_arxiv_n_ngrams_match_2": 2.924,
      "eval_arxiv_n_ngrams_match_3": 0.664,
      "eval_arxiv_num_pred_words": 40.054,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.759009491409785,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36219769119335693,
      "eval_arxiv_runtime": 10.8673,
      "eval_arxiv_samples_per_second": 46.01,
      "eval_arxiv_steps_per_second": 0.092,
      "eval_arxiv_token_set_f1": 0.3561839101546386,
      "eval_arxiv_token_set_f1_sem": 0.004314308628426794,
      "eval_arxiv_token_set_precision": 0.3081562061394426,
      "eval_arxiv_token_set_recall": 0.44107164677241584,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 159375
    },
    {
      "epoch": 30.6,
      "eval_python_code_alpaca_accuracy": 0.164375,
      "eval_python_code_alpaca_bleu_score": 4.716017573864346,
      "eval_python_code_alpaca_bleu_score_sem": 0.14934025458985162,
      "eval_python_code_alpaca_emb_cos_sim": 0.7462612390518188,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011473278321908338,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8493075370788574,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.708,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.948,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.05,
      "eval_python_code_alpaca_num_pred_words": 43.084,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.275814836485733,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3352271370090748,
      "eval_python_code_alpaca_runtime": 9.9108,
      "eval_python_code_alpaca_samples_per_second": 50.45,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.47509973640326264,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0059746369381191615,
      "eval_python_code_alpaca_token_set_precision": 0.5295235861561871,
      "eval_python_code_alpaca_token_set_recall": 0.4579034460887238,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 159375
    },
    {
      "epoch": 30.6,
      "eval_wikibio_accuracy": 0.32615625,
      "eval_wikibio_bleu_score": 5.964014459015691,
      "eval_wikibio_bleu_score_sem": 0.20725724989888034,
      "eval_wikibio_emb_cos_sim": 0.7523312568664551,
      "eval_wikibio_emb_cos_sim_sem": 0.009288788724342478,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6819639205932617,
      "eval_wikibio_n_ngrams_match_1": 10.14,
      "eval_wikibio_n_ngrams_match_2": 3.39,
      "eval_wikibio_n_ngrams_match_3": 1.232,
      "eval_wikibio_num_pred_words": 36.278,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.72433295017291,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35975966952098376,
      "eval_wikibio_runtime": 10.0333,
      "eval_wikibio_samples_per_second": 49.834,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.31819860957428275,
      "eval_wikibio_token_set_f1_sem": 0.005583364782927081,
      "eval_wikibio_token_set_precision": 0.32758241040150776,
      "eval_wikibio_token_set_recall": 0.32408614188775137,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 159375
    },
    {
      "epoch": 30.6,
      "eval_nq_accuracy": 0.53215625,
      "eval_nq_bleu_score": 11.981191883954724,
      "eval_nq_bleu_score_sem": 0.47526563796349297,
      "eval_nq_emb_cos_sim": 0.8336740732192993,
      "eval_nq_emb_cos_sim_sem": 0.00720989212882009,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1526739597320557,
      "eval_nq_n_ngrams_match_1": 23.234,
      "eval_nq_n_ngrams_match_2": 8.634,
      "eval_nq_n_ngrams_match_3": 4.028,
      "eval_nq_num_pred_words": 49.158,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.607844681340358,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45084998088904293,
      "eval_nq_runtime": 10.3957,
      "eval_nq_samples_per_second": 48.097,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4626375591175635,
      "eval_nq_token_set_f1_sem": 0.005043510554332337,
      "eval_nq_token_set_precision": 0.4216860162092115,
      "eval_nq_token_set_recall": 0.5206586046509615,
      "eval_nq_true_num_tokens": 64.0,
      "step": 159375
    },
    {
      "epoch": 30.6,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 159384
    },
    {
      "epoch": 30.61,
      "learning_rate": 0.001,
      "loss": 2.5269,
      "step": 159396
    },
    {
      "epoch": 30.61,
      "learning_rate": 0.001,
      "loss": 2.5206,
      "step": 159408
    },
    {
      "epoch": 30.61,
      "learning_rate": 0.001,
      "loss": 2.5357,
      "step": 159420
    },
    {
      "epoch": 30.61,
      "learning_rate": 0.001,
      "loss": 2.533,
      "step": 159432
    },
    {
      "epoch": 30.62,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 159444
    },
    {
      "epoch": 30.62,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 159456
    },
    {
      "epoch": 30.62,
      "learning_rate": 0.001,
      "loss": 2.5304,
      "step": 159468
    },
    {
      "epoch": 30.62,
      "learning_rate": 0.001,
      "loss": 2.5228,
      "step": 159480
    },
    {
      "epoch": 30.62,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 159492
    },
    {
      "epoch": 30.63,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 159504
    },
    {
      "epoch": 30.63,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 159516
    },
    {
      "epoch": 30.63,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 159528
    },
    {
      "epoch": 30.63,
      "learning_rate": 0.001,
      "loss": 2.5399,
      "step": 159540
    },
    {
      "epoch": 30.64,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 159552
    },
    {
      "epoch": 30.64,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 159564
    },
    {
      "epoch": 30.64,
      "learning_rate": 0.001,
      "loss": 2.5182,
      "step": 159576
    },
    {
      "epoch": 30.64,
      "learning_rate": 0.001,
      "loss": 2.5272,
      "step": 159588
    },
    {
      "epoch": 30.65,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 159600
    },
    {
      "epoch": 30.65,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 159612
    },
    {
      "epoch": 30.65,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 159624
    },
    {
      "epoch": 30.65,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 159636
    },
    {
      "epoch": 30.65,
      "learning_rate": 0.001,
      "loss": 2.539,
      "step": 159648
    },
    {
      "epoch": 30.66,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 159660
    },
    {
      "epoch": 30.66,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 159672
    },
    {
      "epoch": 30.66,
      "learning_rate": 0.001,
      "loss": 2.5297,
      "step": 159684
    },
    {
      "epoch": 30.66,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 159696
    },
    {
      "epoch": 30.67,
      "learning_rate": 0.001,
      "loss": 2.5348,
      "step": 159708
    },
    {
      "epoch": 30.67,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 159720
    },
    {
      "epoch": 30.67,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 159732
    },
    {
      "epoch": 30.67,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 159744
    },
    {
      "epoch": 30.68,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 159756
    },
    {
      "epoch": 30.68,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 159768
    },
    {
      "epoch": 30.68,
      "learning_rate": 0.001,
      "loss": 2.5381,
      "step": 159780
    },
    {
      "epoch": 30.68,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 159792
    },
    {
      "epoch": 30.68,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 159804
    },
    {
      "epoch": 30.69,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 159816
    },
    {
      "epoch": 30.69,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 159828
    },
    {
      "epoch": 30.69,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 159840
    },
    {
      "epoch": 30.69,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 159852
    },
    {
      "epoch": 30.7,
      "learning_rate": 0.001,
      "loss": 2.5389,
      "step": 159864
    },
    {
      "epoch": 30.7,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 159876
    },
    {
      "epoch": 30.7,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 159888
    },
    {
      "epoch": 30.7,
      "learning_rate": 0.001,
      "loss": 2.5354,
      "step": 159900
    },
    {
      "epoch": 30.71,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 159912
    },
    {
      "epoch": 30.71,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 159924
    },
    {
      "epoch": 30.71,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 159936
    },
    {
      "epoch": 30.71,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 159948
    },
    {
      "epoch": 30.71,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 159960
    },
    {
      "epoch": 30.72,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 159972
    },
    {
      "epoch": 30.72,
      "learning_rate": 0.001,
      "loss": 2.5381,
      "step": 159984
    },
    {
      "epoch": 30.72,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 159996
    },
    {
      "epoch": 30.72,
      "eval_ag_news_accuracy": 0.32665625,
      "eval_ag_news_bleu_score": 4.8211022640886485,
      "eval_ag_news_bleu_score_sem": 0.14933529579962598,
      "eval_ag_news_emb_cos_sim": 0.8135841488838196,
      "eval_ag_news_emb_cos_sim_sem": 0.007529377752878318,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5108516216278076,
      "eval_ag_news_n_ngrams_match_1": 14.182,
      "eval_ag_news_n_ngrams_match_2": 3.176,
      "eval_ag_news_n_ngrams_match_3": 0.888,
      "eval_ag_news_num_pred_words": 46.734,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.476765184981616,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3520814450767692,
      "eval_ag_news_runtime": 13.8999,
      "eval_ag_news_samples_per_second": 35.971,
      "eval_ag_news_steps_per_second": 0.072,
      "eval_ag_news_token_set_f1": 0.3531369150389347,
      "eval_ag_news_token_set_f1_sem": 0.004415020256536574,
      "eval_ag_news_token_set_precision": 0.3387969933087183,
      "eval_ag_news_token_set_recall": 0.3838311845614786,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 160000
    },
    {
      "epoch": 30.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.11559375,
      "eval_anthropic_toxic_prompts_bleu_score": 2.9588931101647624,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10841244418804719,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6672136187553406,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009386944783257302,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2257540225982666,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.2,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.86,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.534,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.172547671489866,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21143710105673152,
      "eval_anthropic_toxic_prompts_runtime": 11.9029,
      "eval_anthropic_toxic_prompts_samples_per_second": 42.007,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.084,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3541665891425984,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006686513148902574,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4389848120495779,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32438534561698307,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 160000
    },
    {
      "epoch": 30.72,
      "eval_arxiv_accuracy": 0.34984375,
      "eval_arxiv_bleu_score": 4.4218953829023135,
      "eval_arxiv_bleu_score_sem": 0.12849593479384475,
      "eval_arxiv_emb_cos_sim": 0.767446756362915,
      "eval_arxiv_emb_cos_sim_sem": 0.009005053505989107,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3747940063476562,
      "eval_arxiv_n_ngrams_match_1": 15.382,
      "eval_arxiv_n_ngrams_match_2": 2.984,
      "eval_arxiv_n_ngrams_match_3": 0.69,
      "eval_arxiv_num_pred_words": 41.178,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.21826438428178,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3657894847798854,
      "eval_arxiv_runtime": 12.8344,
      "eval_arxiv_samples_per_second": 38.958,
      "eval_arxiv_steps_per_second": 0.078,
      "eval_arxiv_token_set_f1": 0.35791723278536786,
      "eval_arxiv_token_set_f1_sem": 0.004216046687543057,
      "eval_arxiv_token_set_precision": 0.31001517612582224,
      "eval_arxiv_token_set_recall": 0.44020087523703066,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 160000
    },
    {
      "epoch": 30.72,
      "eval_python_code_alpaca_accuracy": 0.1621875,
      "eval_python_code_alpaca_bleu_score": 4.7142978533552,
      "eval_python_code_alpaca_bleu_score_sem": 0.14919315946492306,
      "eval_python_code_alpaca_emb_cos_sim": 0.7564646005630493,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009458119615135567,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8869330883026123,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.908,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.974,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.028,
      "eval_python_code_alpaca_num_pred_words": 44.166,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.938210245476895,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3318526748730509,
      "eval_python_code_alpaca_runtime": 12.037,
      "eval_python_code_alpaca_samples_per_second": 41.539,
      "eval_python_code_alpaca_steps_per_second": 0.083,
      "eval_python_code_alpaca_token_set_f1": 0.4827637467161597,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005482446242744389,
      "eval_python_code_alpaca_token_set_precision": 0.5429172537645555,
      "eval_python_code_alpaca_token_set_recall": 0.4560993072903012,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 160000
    },
    {
      "epoch": 30.72,
      "eval_wikibio_accuracy": 0.32628125,
      "eval_wikibio_bleu_score": 6.102570582861055,
      "eval_wikibio_bleu_score_sem": 0.23431773435141062,
      "eval_wikibio_emb_cos_sim": 0.7317752838134766,
      "eval_wikibio_emb_cos_sim_sem": 0.010413418107849966,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6575965881347656,
      "eval_wikibio_n_ngrams_match_1": 10.05,
      "eval_wikibio_n_ngrams_match_2": 3.438,
      "eval_wikibio_n_ngrams_match_3": 1.268,
      "eval_wikibio_num_pred_words": 35.938,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.768055208020485,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35477244583029943,
      "eval_wikibio_runtime": 12.6278,
      "eval_wikibio_samples_per_second": 39.595,
      "eval_wikibio_steps_per_second": 0.079,
      "eval_wikibio_token_set_f1": 0.31950528501300085,
      "eval_wikibio_token_set_f1_sem": 0.005725821948357305,
      "eval_wikibio_token_set_precision": 0.3274798394959956,
      "eval_wikibio_token_set_recall": 0.3281640255184498,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 160000
    },
    {
      "epoch": 30.72,
      "eval_nq_accuracy": 0.53140625,
      "eval_nq_bleu_score": 11.833525440067035,
      "eval_nq_bleu_score_sem": 0.47609663274117364,
      "eval_nq_emb_cos_sim": 0.8337178230285645,
      "eval_nq_emb_cos_sim_sem": 0.0071316583583607585,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1544744968414307,
      "eval_nq_n_ngrams_match_1": 23.196,
      "eval_nq_n_ngrams_match_2": 8.506,
      "eval_nq_n_ngrams_match_3": 3.932,
      "eval_nq_num_pred_words": 49.366,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.623357386530598,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44948892290774267,
      "eval_nq_runtime": 12.2507,
      "eval_nq_samples_per_second": 40.814,
      "eval_nq_steps_per_second": 0.082,
      "eval_nq_token_set_f1": 0.46120027141724096,
      "eval_nq_token_set_f1_sem": 0.0051430027443023895,
      "eval_nq_token_set_precision": 0.4208765705526024,
      "eval_nq_token_set_recall": 0.5170084313661555,
      "eval_nq_true_num_tokens": 64.0,
      "step": 160000
    },
    {
      "epoch": 30.72,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 160008
    },
    {
      "epoch": 30.73,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 160020
    },
    {
      "epoch": 30.73,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 160032
    },
    {
      "epoch": 30.73,
      "learning_rate": 0.001,
      "loss": 2.5403,
      "step": 160044
    },
    {
      "epoch": 30.73,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 160056
    },
    {
      "epoch": 30.74,
      "learning_rate": 0.001,
      "loss": 2.5274,
      "step": 160068
    },
    {
      "epoch": 30.74,
      "learning_rate": 0.001,
      "loss": 2.5375,
      "step": 160080
    },
    {
      "epoch": 30.74,
      "learning_rate": 0.001,
      "loss": 2.5334,
      "step": 160092
    },
    {
      "epoch": 30.74,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 160104
    },
    {
      "epoch": 30.74,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 160116
    },
    {
      "epoch": 30.75,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 160128
    },
    {
      "epoch": 30.75,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 160140
    },
    {
      "epoch": 30.75,
      "learning_rate": 0.001,
      "loss": 2.5349,
      "step": 160152
    },
    {
      "epoch": 30.75,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 160164
    },
    {
      "epoch": 30.76,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 160176
    },
    {
      "epoch": 30.76,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 160188
    },
    {
      "epoch": 30.76,
      "learning_rate": 0.001,
      "loss": 2.5297,
      "step": 160200
    },
    {
      "epoch": 30.76,
      "learning_rate": 0.001,
      "loss": 2.5283,
      "step": 160212
    },
    {
      "epoch": 30.76,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 160224
    },
    {
      "epoch": 30.77,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 160236
    },
    {
      "epoch": 30.77,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 160248
    },
    {
      "epoch": 30.77,
      "learning_rate": 0.001,
      "loss": 2.5334,
      "step": 160260
    },
    {
      "epoch": 30.77,
      "learning_rate": 0.001,
      "loss": 2.5223,
      "step": 160272
    },
    {
      "epoch": 30.78,
      "learning_rate": 0.001,
      "loss": 2.5372,
      "step": 160284
    },
    {
      "epoch": 30.78,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 160296
    },
    {
      "epoch": 30.78,
      "learning_rate": 0.001,
      "loss": 2.5441,
      "step": 160308
    },
    {
      "epoch": 30.78,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 160320
    },
    {
      "epoch": 30.79,
      "learning_rate": 0.001,
      "loss": 2.533,
      "step": 160332
    },
    {
      "epoch": 30.79,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 160344
    },
    {
      "epoch": 30.79,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 160356
    },
    {
      "epoch": 30.79,
      "learning_rate": 0.001,
      "loss": 2.537,
      "step": 160368
    },
    {
      "epoch": 30.79,
      "learning_rate": 0.001,
      "loss": 2.5323,
      "step": 160380
    },
    {
      "epoch": 30.8,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 160392
    },
    {
      "epoch": 30.8,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 160404
    },
    {
      "epoch": 30.8,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 160416
    },
    {
      "epoch": 30.8,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 160428
    },
    {
      "epoch": 30.81,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 160440
    },
    {
      "epoch": 30.81,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 160452
    },
    {
      "epoch": 30.81,
      "learning_rate": 0.001,
      "loss": 2.5297,
      "step": 160464
    },
    {
      "epoch": 30.81,
      "learning_rate": 0.001,
      "loss": 2.5283,
      "step": 160476
    },
    {
      "epoch": 30.82,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 160488
    },
    {
      "epoch": 30.82,
      "learning_rate": 0.001,
      "loss": 2.5374,
      "step": 160500
    },
    {
      "epoch": 30.82,
      "learning_rate": 0.001,
      "loss": 2.5343,
      "step": 160512
    },
    {
      "epoch": 30.82,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 160524
    },
    {
      "epoch": 30.82,
      "learning_rate": 0.001,
      "loss": 2.5319,
      "step": 160536
    },
    {
      "epoch": 30.83,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 160548
    },
    {
      "epoch": 30.83,
      "learning_rate": 0.001,
      "loss": 2.5275,
      "step": 160560
    },
    {
      "epoch": 30.83,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 160572
    },
    {
      "epoch": 30.83,
      "learning_rate": 0.001,
      "loss": 2.5293,
      "step": 160584
    },
    {
      "epoch": 30.84,
      "learning_rate": 0.001,
      "loss": 2.5435,
      "step": 160596
    },
    {
      "epoch": 30.84,
      "learning_rate": 0.001,
      "loss": 2.5348,
      "step": 160608
    },
    {
      "epoch": 30.84,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 160620
    },
    {
      "epoch": 30.84,
      "eval_ag_news_accuracy": 0.326625,
      "eval_ag_news_bleu_score": 4.994859670613498,
      "eval_ag_news_bleu_score_sem": 0.1503446370989245,
      "eval_ag_news_emb_cos_sim": 0.821833074092865,
      "eval_ag_news_emb_cos_sim_sem": 0.007148403169565235,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5184292793273926,
      "eval_ag_news_n_ngrams_match_1": 14.304,
      "eval_ag_news_n_ngrams_match_2": 3.256,
      "eval_ag_news_n_ngrams_match_3": 0.906,
      "eval_ag_news_num_pred_words": 46.838,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.731404217690574,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35421617091994784,
      "eval_ag_news_runtime": 11.064,
      "eval_ag_news_samples_per_second": 45.192,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.35537135166790546,
      "eval_ag_news_token_set_f1_sem": 0.004342803236543565,
      "eval_ag_news_token_set_precision": 0.3419064089044689,
      "eval_ag_news_token_set_recall": 0.3851753583651109,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 160625
    },
    {
      "epoch": 30.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.1166875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1784472985690444,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11829486181476633,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6784992218017578,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008208755974915579,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.21441388130188,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.462,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.982,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.406,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.888699902873654,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22150702482181478,
      "eval_anthropic_toxic_prompts_runtime": 9.8862,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.576,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36046428621724147,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006224414292495103,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4519661442243867,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32524242103810214,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 160625
    },
    {
      "epoch": 30.84,
      "eval_arxiv_accuracy": 0.34890625,
      "eval_arxiv_bleu_score": 4.371614132708319,
      "eval_arxiv_bleu_score_sem": 0.1284399091651756,
      "eval_arxiv_emb_cos_sim": 0.7687532901763916,
      "eval_arxiv_emb_cos_sim_sem": 0.008708890894225595,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.37642765045166,
      "eval_arxiv_n_ngrams_match_1": 15.296,
      "eval_arxiv_n_ngrams_match_2": 2.924,
      "eval_arxiv_n_ngrams_match_3": 0.648,
      "eval_arxiv_num_pred_words": 40.784,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.266035639612895,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3664985707100959,
      "eval_arxiv_runtime": 10.2785,
      "eval_arxiv_samples_per_second": 48.645,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3565195042228485,
      "eval_arxiv_token_set_f1_sem": 0.004203263291642058,
      "eval_arxiv_token_set_precision": 0.31159660576734755,
      "eval_arxiv_token_set_recall": 0.4302166566082645,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 160625
    },
    {
      "epoch": 30.84,
      "eval_python_code_alpaca_accuracy": 0.1629375,
      "eval_python_code_alpaca_bleu_score": 4.695726901908382,
      "eval_python_code_alpaca_bleu_score_sem": 0.14703906815409884,
      "eval_python_code_alpaca_emb_cos_sim": 0.7778176069259644,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007298174629926729,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8541946411132812,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.148,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.022,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.012,
      "eval_python_code_alpaca_num_pred_words": 44.308,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.360450183292524,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3417267456158005,
      "eval_python_code_alpaca_runtime": 9.7931,
      "eval_python_code_alpaca_samples_per_second": 51.056,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4799079705609145,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005291364236949242,
      "eval_python_code_alpaca_token_set_precision": 0.5576091271515401,
      "eval_python_code_alpaca_token_set_recall": 0.44144399430587894,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 160625
    },
    {
      "epoch": 30.84,
      "eval_wikibio_accuracy": 0.32359375,
      "eval_wikibio_bleu_score": 5.923409481802042,
      "eval_wikibio_bleu_score_sem": 0.2141103659814,
      "eval_wikibio_emb_cos_sim": 0.7442214488983154,
      "eval_wikibio_emb_cos_sim_sem": 0.007870452441541744,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.732048273086548,
      "eval_wikibio_n_ngrams_match_1": 10.018,
      "eval_wikibio_n_ngrams_match_2": 3.322,
      "eval_wikibio_n_ngrams_match_3": 1.214,
      "eval_wikibio_num_pred_words": 36.23,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.76456585001452,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3555785154706334,
      "eval_wikibio_runtime": 10.3761,
      "eval_wikibio_samples_per_second": 48.188,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.3193002262862752,
      "eval_wikibio_token_set_f1_sem": 0.005199064140476549,
      "eval_wikibio_token_set_precision": 0.3258305436947781,
      "eval_wikibio_token_set_recall": 0.3291215165996358,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 160625
    },
    {
      "epoch": 30.84,
      "eval_nq_accuracy": 0.5326875,
      "eval_nq_bleu_score": 11.979396930833406,
      "eval_nq_bleu_score_sem": 0.4834926063441438,
      "eval_nq_emb_cos_sim": 0.8307627439498901,
      "eval_nq_emb_cos_sim_sem": 0.007245120308730118,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1502037048339844,
      "eval_nq_n_ngrams_match_1": 23.28,
      "eval_nq_n_ngrams_match_2": 8.63,
      "eval_nq_n_ngrams_match_3": 4.002,
      "eval_nq_num_pred_words": 49.262,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.586607352461746,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45030207887744267,
      "eval_nq_runtime": 10.3617,
      "eval_nq_samples_per_second": 48.255,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4640430004506094,
      "eval_nq_token_set_f1_sem": 0.005046485678447061,
      "eval_nq_token_set_precision": 0.42385014986078706,
      "eval_nq_token_set_recall": 0.5204246436028306,
      "eval_nq_true_num_tokens": 64.0,
      "step": 160625
    },
    {
      "epoch": 30.84,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 160632
    },
    {
      "epoch": 30.85,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 160644
    },
    {
      "epoch": 30.85,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 160656
    },
    {
      "epoch": 30.85,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 160668
    },
    {
      "epoch": 30.85,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 160680
    },
    {
      "epoch": 30.85,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 160692
    },
    {
      "epoch": 30.86,
      "learning_rate": 0.001,
      "loss": 2.5278,
      "step": 160704
    },
    {
      "epoch": 30.86,
      "learning_rate": 0.001,
      "loss": 2.529,
      "step": 160716
    },
    {
      "epoch": 30.86,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 160728
    },
    {
      "epoch": 30.86,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 160740
    },
    {
      "epoch": 30.87,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 160752
    },
    {
      "epoch": 30.87,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 160764
    },
    {
      "epoch": 30.87,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 160776
    },
    {
      "epoch": 30.87,
      "learning_rate": 0.001,
      "loss": 2.5282,
      "step": 160788
    },
    {
      "epoch": 30.88,
      "learning_rate": 0.001,
      "loss": 2.533,
      "step": 160800
    },
    {
      "epoch": 30.88,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 160812
    },
    {
      "epoch": 30.88,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 160824
    },
    {
      "epoch": 30.88,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 160836
    },
    {
      "epoch": 30.88,
      "learning_rate": 0.001,
      "loss": 2.5381,
      "step": 160848
    },
    {
      "epoch": 30.89,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 160860
    },
    {
      "epoch": 30.89,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 160872
    },
    {
      "epoch": 30.89,
      "learning_rate": 0.001,
      "loss": 2.5201,
      "step": 160884
    },
    {
      "epoch": 30.89,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 160896
    },
    {
      "epoch": 30.9,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 160908
    },
    {
      "epoch": 30.9,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 160920
    },
    {
      "epoch": 30.9,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 160932
    },
    {
      "epoch": 30.9,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 160944
    },
    {
      "epoch": 30.91,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 160956
    },
    {
      "epoch": 30.91,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 160968
    },
    {
      "epoch": 30.91,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 160980
    },
    {
      "epoch": 30.91,
      "learning_rate": 0.001,
      "loss": 2.541,
      "step": 160992
    },
    {
      "epoch": 30.91,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 161004
    },
    {
      "epoch": 30.92,
      "learning_rate": 0.001,
      "loss": 2.5425,
      "step": 161016
    },
    {
      "epoch": 30.92,
      "learning_rate": 0.001,
      "loss": 2.5426,
      "step": 161028
    },
    {
      "epoch": 30.92,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 161040
    },
    {
      "epoch": 30.92,
      "learning_rate": 0.001,
      "loss": 2.535,
      "step": 161052
    },
    {
      "epoch": 30.93,
      "learning_rate": 0.001,
      "loss": 2.5264,
      "step": 161064
    },
    {
      "epoch": 30.93,
      "learning_rate": 0.001,
      "loss": 2.5269,
      "step": 161076
    },
    {
      "epoch": 30.93,
      "learning_rate": 0.001,
      "loss": 2.5338,
      "step": 161088
    },
    {
      "epoch": 30.93,
      "learning_rate": 0.001,
      "loss": 2.5262,
      "step": 161100
    },
    {
      "epoch": 30.94,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 161112
    },
    {
      "epoch": 30.94,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 161124
    },
    {
      "epoch": 30.94,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 161136
    },
    {
      "epoch": 30.94,
      "learning_rate": 0.001,
      "loss": 2.5332,
      "step": 161148
    },
    {
      "epoch": 30.94,
      "learning_rate": 0.001,
      "loss": 2.5421,
      "step": 161160
    },
    {
      "epoch": 30.95,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 161172
    },
    {
      "epoch": 30.95,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 161184
    },
    {
      "epoch": 30.95,
      "learning_rate": 0.001,
      "loss": 2.5315,
      "step": 161196
    },
    {
      "epoch": 30.95,
      "learning_rate": 0.001,
      "loss": 2.5334,
      "step": 161208
    },
    {
      "epoch": 30.96,
      "learning_rate": 0.001,
      "loss": 2.5357,
      "step": 161220
    },
    {
      "epoch": 30.96,
      "learning_rate": 0.001,
      "loss": 2.5189,
      "step": 161232
    },
    {
      "epoch": 30.96,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 161244
    },
    {
      "epoch": 30.96,
      "eval_ag_news_accuracy": 0.32625,
      "eval_ag_news_bleu_score": 4.763700920805595,
      "eval_ag_news_bleu_score_sem": 0.1444648189998102,
      "eval_ag_news_emb_cos_sim": 0.8166588544845581,
      "eval_ag_news_emb_cos_sim_sem": 0.006733893524272149,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.517921209335327,
      "eval_ag_news_n_ngrams_match_1": 14.052,
      "eval_ag_news_n_ngrams_match_2": 3.112,
      "eval_ag_news_n_ngrams_match_3": 0.864,
      "eval_ag_news_num_pred_words": 46.58,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.7142706563101,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3512767912932626,
      "eval_ag_news_runtime": 13.1821,
      "eval_ag_news_samples_per_second": 37.93,
      "eval_ag_news_steps_per_second": 0.076,
      "eval_ag_news_token_set_f1": 0.3497340067166666,
      "eval_ag_news_token_set_f1_sem": 0.004483809887190126,
      "eval_ag_news_token_set_precision": 0.33446862515263764,
      "eval_ag_news_token_set_recall": 0.3812839266087502,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 161250
    },
    {
      "epoch": 30.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.115625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.205134210821329,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1178727653003391,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6712790727615356,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008887372919501611,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2249948978424072,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.236,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.954,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.706,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.153445818646397,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21398959768031206,
      "eval_anthropic_toxic_prompts_runtime": 9.8125,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.955,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3587687765280112,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00637237883673165,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4394723904158812,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32832215636207457,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 161250
    },
    {
      "epoch": 30.96,
      "eval_arxiv_accuracy": 0.3493125,
      "eval_arxiv_bleu_score": 4.424346617931371,
      "eval_arxiv_bleu_score_sem": 0.12605220824833557,
      "eval_arxiv_emb_cos_sim": 0.7720806002616882,
      "eval_arxiv_emb_cos_sim_sem": 0.006980470368762594,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3651304244995117,
      "eval_arxiv_n_ngrams_match_1": 15.412,
      "eval_arxiv_n_ngrams_match_2": 3.036,
      "eval_arxiv_n_ngrams_match_3": 0.682,
      "eval_arxiv_num_pred_words": 40.152,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.937271182052562,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3676821466955644,
      "eval_arxiv_runtime": 10.8396,
      "eval_arxiv_samples_per_second": 46.127,
      "eval_arxiv_steps_per_second": 0.092,
      "eval_arxiv_token_set_f1": 0.36101756796020285,
      "eval_arxiv_token_set_f1_sem": 0.004296467845172957,
      "eval_arxiv_token_set_precision": 0.3131804612985979,
      "eval_arxiv_token_set_recall": 0.44547692937519234,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 161250
    },
    {
      "epoch": 30.96,
      "eval_python_code_alpaca_accuracy": 0.159,
      "eval_python_code_alpaca_bleu_score": 4.682202597730714,
      "eval_python_code_alpaca_bleu_score_sem": 0.14078400840801508,
      "eval_python_code_alpaca_emb_cos_sim": 0.7602322101593018,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008470578124765824,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8762505054473877,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.816,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.928,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.99,
      "eval_python_code_alpaca_num_pred_words": 43.264,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.747603725850535,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33638938309376853,
      "eval_python_code_alpaca_runtime": 10.0076,
      "eval_python_code_alpaca_samples_per_second": 49.962,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4754179225257856,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005564868277901597,
      "eval_python_code_alpaca_token_set_precision": 0.5349891190389903,
      "eval_python_code_alpaca_token_set_recall": 0.44713364818560003,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 161250
    },
    {
      "epoch": 30.96,
      "eval_wikibio_accuracy": 0.32415625,
      "eval_wikibio_bleu_score": 6.177236924693363,
      "eval_wikibio_bleu_score_sem": 0.22484216461256287,
      "eval_wikibio_emb_cos_sim": 0.7451162338256836,
      "eval_wikibio_emb_cos_sim_sem": 0.00865770157107559,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7214102745056152,
      "eval_wikibio_n_ngrams_match_1": 10.086,
      "eval_wikibio_n_ngrams_match_2": 3.442,
      "eval_wikibio_n_ngrams_match_3": 1.252,
      "eval_wikibio_num_pred_words": 35.816,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.32262928575278,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3626412895880609,
      "eval_wikibio_runtime": 10.3095,
      "eval_wikibio_samples_per_second": 48.499,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3218342306058661,
      "eval_wikibio_token_set_f1_sem": 0.005254815071430934,
      "eval_wikibio_token_set_precision": 0.32921647572891294,
      "eval_wikibio_token_set_recall": 0.3344275048608989,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 161250
    },
    {
      "epoch": 30.96,
      "eval_nq_accuracy": 0.53203125,
      "eval_nq_bleu_score": 11.93263695435168,
      "eval_nq_bleu_score_sem": 0.47192356019846055,
      "eval_nq_emb_cos_sim": 0.8366599082946777,
      "eval_nq_emb_cos_sim_sem": 0.00711704262155059,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.149911642074585,
      "eval_nq_n_ngrams_match_1": 23.294,
      "eval_nq_n_ngrams_match_2": 8.532,
      "eval_nq_n_ngrams_match_3": 3.992,
      "eval_nq_num_pred_words": 48.908,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.584099890410473,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45400187651642276,
      "eval_nq_runtime": 10.5624,
      "eval_nq_samples_per_second": 47.338,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.46716666079282915,
      "eval_nq_token_set_f1_sem": 0.004895317024823251,
      "eval_nq_token_set_precision": 0.42524513982289924,
      "eval_nq_token_set_recall": 0.5265835422703615,
      "eval_nq_true_num_tokens": 64.0,
      "step": 161250
    },
    {
      "epoch": 30.96,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 161256
    },
    {
      "epoch": 30.97,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 161268
    },
    {
      "epoch": 30.97,
      "learning_rate": 0.001,
      "loss": 2.5362,
      "step": 161280
    },
    {
      "epoch": 30.97,
      "learning_rate": 0.001,
      "loss": 2.5231,
      "step": 161292
    },
    {
      "epoch": 30.97,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 161304
    },
    {
      "epoch": 30.97,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 161316
    },
    {
      "epoch": 30.98,
      "learning_rate": 0.001,
      "loss": 2.537,
      "step": 161328
    },
    {
      "epoch": 30.98,
      "learning_rate": 0.001,
      "loss": 2.5359,
      "step": 161340
    },
    {
      "epoch": 30.98,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 161352
    },
    {
      "epoch": 30.98,
      "learning_rate": 0.001,
      "loss": 2.5397,
      "step": 161364
    },
    {
      "epoch": 30.99,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 161376
    },
    {
      "epoch": 30.99,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 161388
    },
    {
      "epoch": 30.99,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 161400
    },
    {
      "epoch": 30.99,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 161412
    },
    {
      "epoch": 31.0,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 161424
    },
    {
      "epoch": 31.0,
      "learning_rate": 0.001,
      "loss": 2.5382,
      "step": 161436
    },
    {
      "epoch": 31.0,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 161448
    },
    {
      "epoch": 31.0,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 161460
    },
    {
      "epoch": 31.0,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 161472
    },
    {
      "epoch": 31.01,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 161484
    },
    {
      "epoch": 31.01,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 161496
    },
    {
      "epoch": 31.01,
      "learning_rate": 0.001,
      "loss": 2.519,
      "step": 161508
    },
    {
      "epoch": 31.01,
      "learning_rate": 0.001,
      "loss": 2.521,
      "step": 161520
    },
    {
      "epoch": 31.02,
      "learning_rate": 0.001,
      "loss": 2.5212,
      "step": 161532
    },
    {
      "epoch": 31.02,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 161544
    },
    {
      "epoch": 31.02,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 161556
    },
    {
      "epoch": 31.02,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 161568
    },
    {
      "epoch": 31.03,
      "learning_rate": 0.001,
      "loss": 2.522,
      "step": 161580
    },
    {
      "epoch": 31.03,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 161592
    },
    {
      "epoch": 31.03,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 161604
    },
    {
      "epoch": 31.03,
      "learning_rate": 0.001,
      "loss": 2.5213,
      "step": 161616
    },
    {
      "epoch": 31.03,
      "learning_rate": 0.001,
      "loss": 2.5257,
      "step": 161628
    },
    {
      "epoch": 31.04,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 161640
    },
    {
      "epoch": 31.04,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 161652
    },
    {
      "epoch": 31.04,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 161664
    },
    {
      "epoch": 31.04,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 161676
    },
    {
      "epoch": 31.05,
      "learning_rate": 0.001,
      "loss": 2.5226,
      "step": 161688
    },
    {
      "epoch": 31.05,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 161700
    },
    {
      "epoch": 31.05,
      "learning_rate": 0.001,
      "loss": 2.5273,
      "step": 161712
    },
    {
      "epoch": 31.05,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 161724
    },
    {
      "epoch": 31.06,
      "learning_rate": 0.001,
      "loss": 2.5171,
      "step": 161736
    },
    {
      "epoch": 31.06,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 161748
    },
    {
      "epoch": 31.06,
      "learning_rate": 0.001,
      "loss": 2.5171,
      "step": 161760
    },
    {
      "epoch": 31.06,
      "learning_rate": 0.001,
      "loss": 2.5284,
      "step": 161772
    },
    {
      "epoch": 31.06,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 161784
    },
    {
      "epoch": 31.07,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 161796
    },
    {
      "epoch": 31.07,
      "learning_rate": 0.001,
      "loss": 2.5229,
      "step": 161808
    },
    {
      "epoch": 31.07,
      "learning_rate": 0.001,
      "loss": 2.5161,
      "step": 161820
    },
    {
      "epoch": 31.07,
      "learning_rate": 0.001,
      "loss": 2.529,
      "step": 161832
    },
    {
      "epoch": 31.08,
      "learning_rate": 0.001,
      "loss": 2.5257,
      "step": 161844
    },
    {
      "epoch": 31.08,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 161856
    },
    {
      "epoch": 31.08,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 161868
    },
    {
      "epoch": 31.08,
      "eval_ag_news_accuracy": 0.32559375,
      "eval_ag_news_bleu_score": 4.901664204289623,
      "eval_ag_news_bleu_score_sem": 0.15507902193259124,
      "eval_ag_news_emb_cos_sim": 0.8236854672431946,
      "eval_ag_news_emb_cos_sim_sem": 0.006079630872680333,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.518623113632202,
      "eval_ag_news_n_ngrams_match_1": 14.224,
      "eval_ag_news_n_ngrams_match_2": 3.182,
      "eval_ag_news_n_ngrams_match_3": 0.926,
      "eval_ag_news_num_pred_words": 46.668,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.73794315469205,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3547077283256893,
      "eval_ag_news_runtime": 12.2697,
      "eval_ag_news_samples_per_second": 40.751,
      "eval_ag_news_steps_per_second": 0.082,
      "eval_ag_news_token_set_f1": 0.3571035433728409,
      "eval_ag_news_token_set_f1_sem": 0.004461182716103829,
      "eval_ag_news_token_set_precision": 0.3402723154953633,
      "eval_ag_news_token_set_recall": 0.3905027789000208,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 161875
    },
    {
      "epoch": 31.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.1168125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.06864962821325,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10933382191572798,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6762846112251282,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009072190142944421,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1640625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.03286167651298939,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2147488594055176,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.248,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.848,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.89703846890967,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21388412251431768,
      "eval_anthropic_toxic_prompts_runtime": 11.1696,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.764,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.09,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3599860664755848,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006671608185089837,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4396704560039951,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3342703776125466,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 161875
    },
    {
      "epoch": 31.08,
      "eval_arxiv_accuracy": 0.34921875,
      "eval_arxiv_bleu_score": 4.5403501089476785,
      "eval_arxiv_bleu_score_sem": 0.13835839669324407,
      "eval_arxiv_emb_cos_sim": 0.7701067924499512,
      "eval_arxiv_emb_cos_sim_sem": 0.006764259011535232,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3710904121398926,
      "eval_arxiv_n_ngrams_match_1": 15.314,
      "eval_arxiv_n_ngrams_match_2": 3.05,
      "eval_arxiv_n_ngrams_match_3": 0.716,
      "eval_arxiv_num_pred_words": 40.288,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.11025193016067,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36707043037104325,
      "eval_arxiv_runtime": 11.3767,
      "eval_arxiv_samples_per_second": 43.949,
      "eval_arxiv_steps_per_second": 0.088,
      "eval_arxiv_token_set_f1": 0.3592909166139714,
      "eval_arxiv_token_set_f1_sem": 0.004200398816105726,
      "eval_arxiv_token_set_precision": 0.31136650773273344,
      "eval_arxiv_token_set_recall": 0.438719750780837,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 161875
    },
    {
      "epoch": 31.08,
      "eval_python_code_alpaca_accuracy": 0.163,
      "eval_python_code_alpaca_bleu_score": 4.510016234122968,
      "eval_python_code_alpaca_bleu_score_sem": 0.1419761170517517,
      "eval_python_code_alpaca_emb_cos_sim": 0.7613218426704407,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00808675072664416,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.881549596786499,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.94,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.926,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.948,
      "eval_python_code_alpaca_num_pred_words": 44.51,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.841899519400673,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3336546760579685,
      "eval_python_code_alpaca_runtime": 10.9589,
      "eval_python_code_alpaca_samples_per_second": 45.625,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.47567054116838636,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005632694293134458,
      "eval_python_code_alpaca_token_set_precision": 0.5430999931934424,
      "eval_python_code_alpaca_token_set_recall": 0.4436771494240172,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 161875
    },
    {
      "epoch": 31.08,
      "eval_wikibio_accuracy": 0.323875,
      "eval_wikibio_bleu_score": 6.00179036755062,
      "eval_wikibio_bleu_score_sem": 0.22314716690448563,
      "eval_wikibio_emb_cos_sim": 0.7465529441833496,
      "eval_wikibio_emb_cos_sim_sem": 0.00961898755045657,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.700416326522827,
      "eval_wikibio_n_ngrams_match_1": 9.996,
      "eval_wikibio_n_ngrams_match_2": 3.324,
      "eval_wikibio_n_ngrams_match_3": 1.256,
      "eval_wikibio_num_pred_words": 35.502,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.46414715145646,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35757832825184976,
      "eval_wikibio_runtime": 10.8455,
      "eval_wikibio_samples_per_second": 46.102,
      "eval_wikibio_steps_per_second": 0.092,
      "eval_wikibio_token_set_f1": 0.3167721591700093,
      "eval_wikibio_token_set_f1_sem": 0.0056380098609037635,
      "eval_wikibio_token_set_precision": 0.32651134222640493,
      "eval_wikibio_token_set_recall": 0.3227582660970667,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 161875
    },
    {
      "epoch": 31.08,
      "eval_nq_accuracy": 0.5325,
      "eval_nq_bleu_score": 11.992439285153472,
      "eval_nq_bleu_score_sem": 0.4845579856759089,
      "eval_nq_emb_cos_sim": 0.8342034816741943,
      "eval_nq_emb_cos_sim_sem": 0.008027959824238512,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.151998281478882,
      "eval_nq_n_ngrams_match_1": 23.178,
      "eval_nq_n_ngrams_match_2": 8.638,
      "eval_nq_n_ngrams_match_3": 3.988,
      "eval_nq_num_pred_words": 49.072,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.60203051235745,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4506395063249068,
      "eval_nq_runtime": 11.6149,
      "eval_nq_samples_per_second": 43.048,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.46553303170104965,
      "eval_nq_token_set_f1_sem": 0.005063049778589747,
      "eval_nq_token_set_precision": 0.4228063164120291,
      "eval_nq_token_set_recall": 0.5267799291803646,
      "eval_nq_true_num_tokens": 64.0,
      "step": 161875
    },
    {
      "epoch": 31.08,
      "learning_rate": 0.001,
      "loss": 2.5242,
      "step": 161880
    },
    {
      "epoch": 31.09,
      "learning_rate": 0.001,
      "loss": 2.5212,
      "step": 161892
    },
    {
      "epoch": 31.09,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 161904
    },
    {
      "epoch": 31.09,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 161916
    },
    {
      "epoch": 31.09,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 161928
    },
    {
      "epoch": 31.09,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 161940
    },
    {
      "epoch": 31.1,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 161952
    },
    {
      "epoch": 31.1,
      "learning_rate": 0.001,
      "loss": 2.5232,
      "step": 161964
    },
    {
      "epoch": 31.1,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 161976
    },
    {
      "epoch": 31.1,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 161988
    },
    {
      "epoch": 31.11,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 162000
    },
    {
      "epoch": 31.11,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 162012
    },
    {
      "epoch": 31.11,
      "learning_rate": 0.001,
      "loss": 2.5193,
      "step": 162024
    },
    {
      "epoch": 31.11,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 162036
    },
    {
      "epoch": 31.12,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 162048
    },
    {
      "epoch": 31.12,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 162060
    },
    {
      "epoch": 31.12,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 162072
    },
    {
      "epoch": 31.12,
      "learning_rate": 0.001,
      "loss": 2.523,
      "step": 162084
    },
    {
      "epoch": 31.12,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 162096
    },
    {
      "epoch": 31.13,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 162108
    },
    {
      "epoch": 31.13,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 162120
    },
    {
      "epoch": 31.13,
      "learning_rate": 0.001,
      "loss": 2.5239,
      "step": 162132
    },
    {
      "epoch": 31.13,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 162144
    },
    {
      "epoch": 31.14,
      "learning_rate": 0.001,
      "loss": 2.5209,
      "step": 162156
    },
    {
      "epoch": 31.14,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 162168
    },
    {
      "epoch": 31.14,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 162180
    },
    {
      "epoch": 31.14,
      "learning_rate": 0.001,
      "loss": 2.521,
      "step": 162192
    },
    {
      "epoch": 31.15,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 162204
    },
    {
      "epoch": 31.15,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 162216
    },
    {
      "epoch": 31.15,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 162228
    },
    {
      "epoch": 31.15,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 162240
    },
    {
      "epoch": 31.15,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 162252
    },
    {
      "epoch": 31.16,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 162264
    },
    {
      "epoch": 31.16,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 162276
    },
    {
      "epoch": 31.16,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 162288
    },
    {
      "epoch": 31.16,
      "learning_rate": 0.001,
      "loss": 2.5253,
      "step": 162300
    },
    {
      "epoch": 31.17,
      "learning_rate": 0.001,
      "loss": 2.5274,
      "step": 162312
    },
    {
      "epoch": 31.17,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 162324
    },
    {
      "epoch": 31.17,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 162336
    },
    {
      "epoch": 31.17,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 162348
    },
    {
      "epoch": 31.18,
      "learning_rate": 0.001,
      "loss": 2.5283,
      "step": 162360
    },
    {
      "epoch": 31.18,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 162372
    },
    {
      "epoch": 31.18,
      "learning_rate": 0.001,
      "loss": 2.5275,
      "step": 162384
    },
    {
      "epoch": 31.18,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 162396
    },
    {
      "epoch": 31.18,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 162408
    },
    {
      "epoch": 31.19,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 162420
    },
    {
      "epoch": 31.19,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 162432
    },
    {
      "epoch": 31.19,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 162444
    },
    {
      "epoch": 31.19,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 162456
    },
    {
      "epoch": 31.2,
      "learning_rate": 0.001,
      "loss": 2.5284,
      "step": 162468
    },
    {
      "epoch": 31.2,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 162480
    },
    {
      "epoch": 31.2,
      "learning_rate": 0.001,
      "loss": 2.5365,
      "step": 162492
    },
    {
      "epoch": 31.2,
      "eval_ag_news_accuracy": 0.3255,
      "eval_ag_news_bleu_score": 4.8157969104955765,
      "eval_ag_news_bleu_score_sem": 0.14343317213654067,
      "eval_ag_news_emb_cos_sim": 0.814470112323761,
      "eval_ag_news_emb_cos_sim_sem": 0.00717236628712171,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5124094486236572,
      "eval_ag_news_n_ngrams_match_1": 14.252,
      "eval_ag_news_n_ngrams_match_2": 3.166,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.918,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.52895683574676,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3526979657070748,
      "eval_ag_news_runtime": 10.7704,
      "eval_ag_news_samples_per_second": 46.423,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.35472414562009524,
      "eval_ag_news_token_set_f1_sem": 0.0043366112160831746,
      "eval_ag_news_token_set_precision": 0.3392239084581563,
      "eval_ag_news_token_set_recall": 0.3867220334368949,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 162500
    },
    {
      "epoch": 31.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.115375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0765887867127684,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11501881902507922,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6759611368179321,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008862206641129052,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.221386194229126,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.222,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.908,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.728,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.252,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.06283807441126,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21150325841157802,
      "eval_anthropic_toxic_prompts_runtime": 10.3298,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.404,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36029945475367525,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006256222211117072,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44336828737449474,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33048520789764047,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 162500
    },
    {
      "epoch": 31.2,
      "eval_arxiv_accuracy": 0.350375,
      "eval_arxiv_bleu_score": 4.516365933146245,
      "eval_arxiv_bleu_score_sem": 0.13362652451020654,
      "eval_arxiv_emb_cos_sim": 0.7727753520011902,
      "eval_arxiv_emb_cos_sim_sem": 0.008605923509135449,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3669466972351074,
      "eval_arxiv_n_ngrams_match_1": 15.422,
      "eval_arxiv_n_ngrams_match_2": 3.054,
      "eval_arxiv_n_ngrams_match_3": 0.714,
      "eval_arxiv_num_pred_words": 40.418,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.989876917463025,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3680837233936842,
      "eval_arxiv_runtime": 10.0863,
      "eval_arxiv_samples_per_second": 49.572,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3598643137804065,
      "eval_arxiv_token_set_f1_sem": 0.0043905950071664685,
      "eval_arxiv_token_set_precision": 0.3122073605404567,
      "eval_arxiv_token_set_recall": 0.44161512642063805,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 162500
    },
    {
      "epoch": 31.2,
      "eval_python_code_alpaca_accuracy": 0.16128125,
      "eval_python_code_alpaca_bleu_score": 4.661977437590216,
      "eval_python_code_alpaca_bleu_score_sem": 0.15195722144163165,
      "eval_python_code_alpaca_emb_cos_sim": 0.7648204565048218,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008633995944421735,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8921637535095215,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.008,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.968,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.028,
      "eval_python_code_alpaca_num_pred_words": 44.28,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.032284839546293,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33735298511441925,
      "eval_python_code_alpaca_runtime": 9.6837,
      "eval_python_code_alpaca_samples_per_second": 51.633,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.4841179463208355,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005605954368543609,
      "eval_python_code_alpaca_token_set_precision": 0.545098084412822,
      "eval_python_code_alpaca_token_set_recall": 0.4569658162848549,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 162500
    },
    {
      "epoch": 31.2,
      "eval_wikibio_accuracy": 0.32315625,
      "eval_wikibio_bleu_score": 5.998422941832177,
      "eval_wikibio_bleu_score_sem": 0.21960553848771863,
      "eval_wikibio_emb_cos_sim": 0.7335643172264099,
      "eval_wikibio_emb_cos_sim_sem": 0.01070594687101843,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.67614483833313,
      "eval_wikibio_n_ngrams_match_1": 10.05,
      "eval_wikibio_n_ngrams_match_2": 3.354,
      "eval_wikibio_n_ngrams_match_3": 1.222,
      "eval_wikibio_num_pred_words": 36.178,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.493845053415235,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3554553576993378,
      "eval_wikibio_runtime": 10.2708,
      "eval_wikibio_samples_per_second": 48.682,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3211354887587338,
      "eval_wikibio_token_set_f1_sem": 0.005245332210316318,
      "eval_wikibio_token_set_precision": 0.3287808604204915,
      "eval_wikibio_token_set_recall": 0.33056852341874576,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 162500
    },
    {
      "epoch": 31.2,
      "eval_nq_accuracy": 0.53240625,
      "eval_nq_bleu_score": 11.85249815758199,
      "eval_nq_bleu_score_sem": 0.4966317828949102,
      "eval_nq_emb_cos_sim": 0.835763692855835,
      "eval_nq_emb_cos_sim_sem": 0.007175556935262887,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1523280143737793,
      "eval_nq_n_ngrams_match_1": 23.25,
      "eval_nq_n_ngrams_match_2": 8.566,
      "eval_nq_n_ngrams_match_3": 3.952,
      "eval_nq_num_pred_words": 49.024,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.604867352454333,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45042104075010403,
      "eval_nq_runtime": 10.2861,
      "eval_nq_samples_per_second": 48.609,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.46330395481002234,
      "eval_nq_token_set_f1_sem": 0.005044755859379872,
      "eval_nq_token_set_precision": 0.4216141183572738,
      "eval_nq_token_set_recall": 0.5219597882821291,
      "eval_nq_true_num_tokens": 64.0,
      "step": 162500
    },
    {
      "epoch": 31.2,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 162504
    },
    {
      "epoch": 31.21,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 162516
    },
    {
      "epoch": 31.21,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 162528
    },
    {
      "epoch": 31.21,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 162540
    },
    {
      "epoch": 31.21,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 162552
    },
    {
      "epoch": 31.21,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 162564
    },
    {
      "epoch": 31.22,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 162576
    },
    {
      "epoch": 31.22,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 162588
    },
    {
      "epoch": 31.22,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 162600
    },
    {
      "epoch": 31.22,
      "learning_rate": 0.001,
      "loss": 2.5275,
      "step": 162612
    },
    {
      "epoch": 31.23,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 162624
    },
    {
      "epoch": 31.23,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 162636
    },
    {
      "epoch": 31.23,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 162648
    },
    {
      "epoch": 31.23,
      "learning_rate": 0.001,
      "loss": 2.5212,
      "step": 162660
    },
    {
      "epoch": 31.24,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 162672
    },
    {
      "epoch": 31.24,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 162684
    },
    {
      "epoch": 31.24,
      "learning_rate": 0.001,
      "loss": 2.5223,
      "step": 162696
    },
    {
      "epoch": 31.24,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 162708
    },
    {
      "epoch": 31.24,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 162720
    },
    {
      "epoch": 31.25,
      "learning_rate": 0.001,
      "loss": 2.5265,
      "step": 162732
    },
    {
      "epoch": 31.25,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 162744
    },
    {
      "epoch": 31.25,
      "learning_rate": 0.001,
      "loss": 2.5265,
      "step": 162756
    },
    {
      "epoch": 31.25,
      "learning_rate": 0.001,
      "loss": 2.533,
      "step": 162768
    },
    {
      "epoch": 31.26,
      "learning_rate": 0.001,
      "loss": 2.5208,
      "step": 162780
    },
    {
      "epoch": 31.26,
      "learning_rate": 0.001,
      "loss": 2.5336,
      "step": 162792
    },
    {
      "epoch": 31.26,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 162804
    },
    {
      "epoch": 31.26,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 162816
    },
    {
      "epoch": 31.26,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 162828
    },
    {
      "epoch": 31.27,
      "learning_rate": 0.001,
      "loss": 2.5335,
      "step": 162840
    },
    {
      "epoch": 31.27,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 162852
    },
    {
      "epoch": 31.27,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 162864
    },
    {
      "epoch": 31.27,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 162876
    },
    {
      "epoch": 31.28,
      "learning_rate": 0.001,
      "loss": 2.5282,
      "step": 162888
    },
    {
      "epoch": 31.28,
      "learning_rate": 0.001,
      "loss": 2.5226,
      "step": 162900
    },
    {
      "epoch": 31.28,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 162912
    },
    {
      "epoch": 31.28,
      "learning_rate": 0.001,
      "loss": 2.5379,
      "step": 162924
    },
    {
      "epoch": 31.29,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 162936
    },
    {
      "epoch": 31.29,
      "learning_rate": 0.001,
      "loss": 2.5227,
      "step": 162948
    },
    {
      "epoch": 31.29,
      "learning_rate": 0.001,
      "loss": 2.5231,
      "step": 162960
    },
    {
      "epoch": 31.29,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 162972
    },
    {
      "epoch": 31.29,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 162984
    },
    {
      "epoch": 31.3,
      "learning_rate": 0.001,
      "loss": 2.526,
      "step": 162996
    },
    {
      "epoch": 31.3,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 163008
    },
    {
      "epoch": 31.3,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 163020
    },
    {
      "epoch": 31.3,
      "learning_rate": 0.001,
      "loss": 2.5226,
      "step": 163032
    },
    {
      "epoch": 31.31,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 163044
    },
    {
      "epoch": 31.31,
      "learning_rate": 0.001,
      "loss": 2.5414,
      "step": 163056
    },
    {
      "epoch": 31.31,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 163068
    },
    {
      "epoch": 31.31,
      "learning_rate": 0.001,
      "loss": 2.526,
      "step": 163080
    },
    {
      "epoch": 31.32,
      "learning_rate": 0.001,
      "loss": 2.538,
      "step": 163092
    },
    {
      "epoch": 31.32,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 163104
    },
    {
      "epoch": 31.32,
      "learning_rate": 0.001,
      "loss": 2.5269,
      "step": 163116
    },
    {
      "epoch": 31.32,
      "eval_ag_news_accuracy": 0.3258125,
      "eval_ag_news_bleu_score": 5.05141179206533,
      "eval_ag_news_bleu_score_sem": 0.15785918778929836,
      "eval_ag_news_emb_cos_sim": 0.816232442855835,
      "eval_ag_news_emb_cos_sim_sem": 0.007050745628842954,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.523041009902954,
      "eval_ag_news_n_ngrams_match_1": 14.392,
      "eval_ag_news_n_ngrams_match_2": 3.31,
      "eval_ag_news_n_ngrams_match_3": 0.918,
      "eval_ag_news_num_pred_words": 46.554,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.88732361867052,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35730433387532157,
      "eval_ag_news_runtime": 10.4459,
      "eval_ag_news_samples_per_second": 47.866,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35979926231773707,
      "eval_ag_news_token_set_f1_sem": 0.004275888215531512,
      "eval_ag_news_token_set_precision": 0.3446038034398017,
      "eval_ag_news_token_set_recall": 0.39051428680686573,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 163125
    },
    {
      "epoch": 31.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.11490625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1953841379288743,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12380286848932094,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6841152906417847,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00884034493909755,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.207458257675171,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.222,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.99,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.0,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.716184146661625,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9765625,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21617079038016188,
      "eval_anthropic_toxic_prompts_runtime": 9.8386,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.82,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35916719908479533,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006865310890588754,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4417478563921143,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3264666692226753,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 163125
    },
    {
      "epoch": 31.32,
      "eval_arxiv_accuracy": 0.34775,
      "eval_arxiv_bleu_score": 4.274956260823223,
      "eval_arxiv_bleu_score_sem": 0.12371440771032687,
      "eval_arxiv_emb_cos_sim": 0.7722713947296143,
      "eval_arxiv_emb_cos_sim_sem": 0.00696955828626532,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3779945373535156,
      "eval_arxiv_n_ngrams_match_1": 15.03,
      "eval_arxiv_n_ngrams_match_2": 2.858,
      "eval_arxiv_n_ngrams_match_3": 0.62,
      "eval_arxiv_num_pred_words": 39.608,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.311928152325013,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36297525739503217,
      "eval_arxiv_runtime": 10.7286,
      "eval_arxiv_samples_per_second": 46.604,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.35319981847151216,
      "eval_arxiv_token_set_f1_sem": 0.004238243965915341,
      "eval_arxiv_token_set_precision": 0.3036387711509786,
      "eval_arxiv_token_set_recall": 0.4421727786980744,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 163125
    },
    {
      "epoch": 31.32,
      "eval_python_code_alpaca_accuracy": 0.16121875,
      "eval_python_code_alpaca_bleu_score": 4.704841465728496,
      "eval_python_code_alpaca_bleu_score_sem": 0.14970498746283054,
      "eval_python_code_alpaca_emb_cos_sim": 0.7609281539916992,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00770459669011728,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8745200634002686,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.922,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.982,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.002,
      "eval_python_code_alpaca_num_pred_words": 43.984,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.716919082782915,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33434303139208166,
      "eval_python_code_alpaca_runtime": 9.8394,
      "eval_python_code_alpaca_samples_per_second": 50.816,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4839838619599384,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00538776787646499,
      "eval_python_code_alpaca_token_set_precision": 0.5419609860173293,
      "eval_python_code_alpaca_token_set_recall": 0.4579719061606382,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 163125
    },
    {
      "epoch": 31.32,
      "eval_wikibio_accuracy": 0.3233125,
      "eval_wikibio_bleu_score": 6.032168041877137,
      "eval_wikibio_bleu_score_sem": 0.20542151548156423,
      "eval_wikibio_emb_cos_sim": 0.7514613270759583,
      "eval_wikibio_emb_cos_sim_sem": 0.008478430609479011,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.713515043258667,
      "eval_wikibio_n_ngrams_match_1": 10.368,
      "eval_wikibio_n_ngrams_match_2": 3.456,
      "eval_wikibio_n_ngrams_match_3": 1.268,
      "eval_wikibio_num_pred_words": 36.624,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.997662105386766,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35995825411647586,
      "eval_wikibio_runtime": 10.3184,
      "eval_wikibio_samples_per_second": 48.457,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3271373011203448,
      "eval_wikibio_token_set_f1_sem": 0.005245375736310838,
      "eval_wikibio_token_set_precision": 0.3362228613100062,
      "eval_wikibio_token_set_recall": 0.334700881726363,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 163125
    },
    {
      "epoch": 31.32,
      "eval_nq_accuracy": 0.531625,
      "eval_nq_bleu_score": 11.931165235614845,
      "eval_nq_bleu_score_sem": 0.4792714629073597,
      "eval_nq_emb_cos_sim": 0.8411628007888794,
      "eval_nq_emb_cos_sim_sem": 0.0072527772057244325,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1540606021881104,
      "eval_nq_n_ngrams_match_1": 23.33,
      "eval_nq_n_ngrams_match_2": 8.606,
      "eval_nq_n_ngrams_match_3": 3.948,
      "eval_nq_num_pred_words": 49.058,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.619788963541184,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4534051113926958,
      "eval_nq_runtime": 10.404,
      "eval_nq_samples_per_second": 48.058,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46844718309509725,
      "eval_nq_token_set_f1_sem": 0.004880959919905421,
      "eval_nq_token_set_precision": 0.4256123992319499,
      "eval_nq_token_set_recall": 0.5281752103927022,
      "eval_nq_true_num_tokens": 64.0,
      "step": 163125
    },
    {
      "epoch": 31.32,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 163128
    },
    {
      "epoch": 31.32,
      "learning_rate": 0.001,
      "loss": 2.527,
      "step": 163140
    },
    {
      "epoch": 31.33,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 163152
    },
    {
      "epoch": 31.33,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 163164
    },
    {
      "epoch": 31.33,
      "learning_rate": 0.001,
      "loss": 2.5278,
      "step": 163176
    },
    {
      "epoch": 31.33,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 163188
    },
    {
      "epoch": 31.34,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 163200
    },
    {
      "epoch": 31.34,
      "learning_rate": 0.001,
      "loss": 2.5257,
      "step": 163212
    },
    {
      "epoch": 31.34,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 163224
    },
    {
      "epoch": 31.34,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 163236
    },
    {
      "epoch": 31.35,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 163248
    },
    {
      "epoch": 31.35,
      "learning_rate": 0.001,
      "loss": 2.5253,
      "step": 163260
    },
    {
      "epoch": 31.35,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 163272
    },
    {
      "epoch": 31.35,
      "learning_rate": 0.001,
      "loss": 2.5325,
      "step": 163284
    },
    {
      "epoch": 31.35,
      "learning_rate": 0.001,
      "loss": 2.5275,
      "step": 163296
    },
    {
      "epoch": 31.36,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 163308
    },
    {
      "epoch": 31.36,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 163320
    },
    {
      "epoch": 31.36,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 163332
    },
    {
      "epoch": 31.36,
      "learning_rate": 0.001,
      "loss": 2.5248,
      "step": 163344
    },
    {
      "epoch": 31.37,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 163356
    },
    {
      "epoch": 31.37,
      "learning_rate": 0.001,
      "loss": 2.5224,
      "step": 163368
    },
    {
      "epoch": 31.37,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 163380
    },
    {
      "epoch": 31.37,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 163392
    },
    {
      "epoch": 31.38,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 163404
    },
    {
      "epoch": 31.38,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 163416
    },
    {
      "epoch": 31.38,
      "learning_rate": 0.001,
      "loss": 2.5212,
      "step": 163428
    },
    {
      "epoch": 31.38,
      "learning_rate": 0.001,
      "loss": 2.5246,
      "step": 163440
    },
    {
      "epoch": 31.38,
      "learning_rate": 0.001,
      "loss": 2.5291,
      "step": 163452
    },
    {
      "epoch": 31.39,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 163464
    },
    {
      "epoch": 31.39,
      "learning_rate": 0.001,
      "loss": 2.5315,
      "step": 163476
    },
    {
      "epoch": 31.39,
      "learning_rate": 0.001,
      "loss": 2.5305,
      "step": 163488
    },
    {
      "epoch": 31.39,
      "learning_rate": 0.001,
      "loss": 2.5449,
      "step": 163500
    },
    {
      "epoch": 31.4,
      "learning_rate": 0.001,
      "loss": 2.5256,
      "step": 163512
    },
    {
      "epoch": 31.4,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 163524
    },
    {
      "epoch": 31.4,
      "learning_rate": 0.001,
      "loss": 2.5265,
      "step": 163536
    },
    {
      "epoch": 31.4,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 163548
    },
    {
      "epoch": 31.41,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 163560
    },
    {
      "epoch": 31.41,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 163572
    },
    {
      "epoch": 31.41,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 163584
    },
    {
      "epoch": 31.41,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 163596
    },
    {
      "epoch": 31.41,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 163608
    },
    {
      "epoch": 31.42,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 163620
    },
    {
      "epoch": 31.42,
      "learning_rate": 0.001,
      "loss": 2.5408,
      "step": 163632
    },
    {
      "epoch": 31.42,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 163644
    },
    {
      "epoch": 31.42,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 163656
    },
    {
      "epoch": 31.43,
      "learning_rate": 0.001,
      "loss": 2.5284,
      "step": 163668
    },
    {
      "epoch": 31.43,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 163680
    },
    {
      "epoch": 31.43,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 163692
    },
    {
      "epoch": 31.43,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 163704
    },
    {
      "epoch": 31.44,
      "learning_rate": 0.001,
      "loss": 2.5228,
      "step": 163716
    },
    {
      "epoch": 31.44,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 163728
    },
    {
      "epoch": 31.44,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 163740
    },
    {
      "epoch": 31.44,
      "eval_ag_news_accuracy": 0.32846875,
      "eval_ag_news_bleu_score": 5.039003197574389,
      "eval_ag_news_bleu_score_sem": 0.1541715898406364,
      "eval_ag_news_emb_cos_sim": 0.8208545446395874,
      "eval_ag_news_emb_cos_sim_sem": 0.007011624792346241,
      "eval_ag_news_emb_top1_equal": 0.328125,
      "eval_ag_news_emb_top1_equal_sem": 0.041664103776406315,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.505810022354126,
      "eval_ag_news_n_ngrams_match_1": 14.422,
      "eval_ag_news_n_ngrams_match_2": 3.276,
      "eval_ag_news_n_ngrams_match_3": 0.944,
      "eval_ag_news_num_pred_words": 46.488,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.30841348743077,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.361005150251066,
      "eval_ag_news_runtime": 11.0556,
      "eval_ag_news_samples_per_second": 45.226,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.36118238111025575,
      "eval_ag_news_token_set_f1_sem": 0.0042183406955035715,
      "eval_ag_news_token_set_precision": 0.34607792607459703,
      "eval_ag_news_token_set_recall": 0.39142795024551025,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 163750
    },
    {
      "epoch": 31.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.11471875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.210588473979575,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12834171134688066,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6734957098960876,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00938085061238043,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2179293632507324,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.24,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.984,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.976349653401417,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2148668053002212,
      "eval_anthropic_toxic_prompts_runtime": 9.9728,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.136,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3547417572838096,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006582008520789323,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4365952650506784,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3270299461151323,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 163750
    },
    {
      "epoch": 31.44,
      "eval_arxiv_accuracy": 0.3505625,
      "eval_arxiv_bleu_score": 4.530408259249223,
      "eval_arxiv_bleu_score_sem": 0.12983283075612403,
      "eval_arxiv_emb_cos_sim": 0.7802456617355347,
      "eval_arxiv_emb_cos_sim_sem": 0.006992751565291964,
      "eval_arxiv_emb_top1_equal": 0.328125,
      "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.371882438659668,
      "eval_arxiv_n_ngrams_match_1": 15.624,
      "eval_arxiv_n_ngrams_match_2": 3.06,
      "eval_arxiv_n_ngrams_match_3": 0.69,
      "eval_arxiv_num_pred_words": 40.68,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.13331715461568,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37320598796674354,
      "eval_arxiv_runtime": 10.458,
      "eval_arxiv_samples_per_second": 47.81,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.36633409059665983,
      "eval_arxiv_token_set_f1_sem": 0.004224340322652853,
      "eval_arxiv_token_set_precision": 0.31885869175441045,
      "eval_arxiv_token_set_recall": 0.4474813895945389,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 163750
    },
    {
      "epoch": 31.44,
      "eval_python_code_alpaca_accuracy": 0.1603125,
      "eval_python_code_alpaca_bleu_score": 4.784778660045248,
      "eval_python_code_alpaca_bleu_score_sem": 0.1469057093692457,
      "eval_python_code_alpaca_emb_cos_sim": 0.7650465965270996,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008007274941242775,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.880115509033203,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.232,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.122,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.086,
      "eval_python_code_alpaca_num_pred_words": 44.836,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.816331007930984,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33974047333292134,
      "eval_python_code_alpaca_runtime": 9.9608,
      "eval_python_code_alpaca_samples_per_second": 50.197,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4899935861346017,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005416135402955436,
      "eval_python_code_alpaca_token_set_precision": 0.5599297540270534,
      "eval_python_code_alpaca_token_set_recall": 0.4565526748742225,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 163750
    },
    {
      "epoch": 31.44,
      "eval_wikibio_accuracy": 0.32615625,
      "eval_wikibio_bleu_score": 6.147025233425069,
      "eval_wikibio_bleu_score_sem": 0.22781627144441916,
      "eval_wikibio_emb_cos_sim": 0.7602639198303223,
      "eval_wikibio_emb_cos_sim_sem": 0.007502393891663727,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6860883235931396,
      "eval_wikibio_n_ngrams_match_1": 10.382,
      "eval_wikibio_n_ngrams_match_2": 3.492,
      "eval_wikibio_n_ngrams_match_3": 1.286,
      "eval_wikibio_num_pred_words": 36.454,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.888510442500575,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36492933202131284,
      "eval_wikibio_runtime": 10.3641,
      "eval_wikibio_samples_per_second": 48.243,
      "eval_wikibio_steps_per_second": 0.096,
      "eval_wikibio_token_set_f1": 0.32842318131728926,
      "eval_wikibio_token_set_f1_sem": 0.005267693123155587,
      "eval_wikibio_token_set_precision": 0.3375679619713562,
      "eval_wikibio_token_set_recall": 0.33484089095201286,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 163750
    },
    {
      "epoch": 31.44,
      "eval_nq_accuracy": 0.531625,
      "eval_nq_bleu_score": 11.709163434302502,
      "eval_nq_bleu_score_sem": 0.47968053842186154,
      "eval_nq_emb_cos_sim": 0.8332261443138123,
      "eval_nq_emb_cos_sim_sem": 0.007499645126887407,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.153640031814575,
      "eval_nq_n_ngrams_match_1": 23.192,
      "eval_nq_n_ngrams_match_2": 8.5,
      "eval_nq_n_ngrams_match_3": 3.932,
      "eval_nq_num_pred_words": 49.31,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.61616449790185,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4485341197756589,
      "eval_nq_runtime": 10.386,
      "eval_nq_samples_per_second": 48.142,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4646035692329125,
      "eval_nq_token_set_f1_sem": 0.004942877606261212,
      "eval_nq_token_set_precision": 0.4225956721210808,
      "eval_nq_token_set_recall": 0.5241405612354216,
      "eval_nq_true_num_tokens": 64.0,
      "step": 163750
    },
    {
      "epoch": 31.44,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 163752
    },
    {
      "epoch": 31.44,
      "learning_rate": 0.001,
      "loss": 2.5325,
      "step": 163764
    },
    {
      "epoch": 31.45,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 163776
    },
    {
      "epoch": 31.45,
      "learning_rate": 0.001,
      "loss": 2.5315,
      "step": 163788
    },
    {
      "epoch": 31.45,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 163800
    },
    {
      "epoch": 31.45,
      "learning_rate": 0.001,
      "loss": 2.5273,
      "step": 163812
    },
    {
      "epoch": 31.46,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 163824
    },
    {
      "epoch": 31.46,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 163836
    },
    {
      "epoch": 31.46,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 163848
    },
    {
      "epoch": 31.46,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 163860
    },
    {
      "epoch": 31.47,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 163872
    },
    {
      "epoch": 31.47,
      "learning_rate": 0.001,
      "loss": 2.5293,
      "step": 163884
    },
    {
      "epoch": 31.47,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 163896
    },
    {
      "epoch": 31.47,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 163908
    },
    {
      "epoch": 31.47,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 163920
    },
    {
      "epoch": 31.48,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 163932
    },
    {
      "epoch": 31.48,
      "learning_rate": 0.001,
      "loss": 2.5186,
      "step": 163944
    },
    {
      "epoch": 31.48,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 163956
    },
    {
      "epoch": 31.48,
      "learning_rate": 0.001,
      "loss": 2.5336,
      "step": 163968
    },
    {
      "epoch": 31.49,
      "learning_rate": 0.001,
      "loss": 2.5269,
      "step": 163980
    },
    {
      "epoch": 31.49,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 163992
    },
    {
      "epoch": 31.49,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 164004
    },
    {
      "epoch": 31.49,
      "learning_rate": 0.001,
      "loss": 2.5272,
      "step": 164016
    },
    {
      "epoch": 31.5,
      "learning_rate": 0.001,
      "loss": 2.5356,
      "step": 164028
    },
    {
      "epoch": 31.5,
      "learning_rate": 0.001,
      "loss": 2.519,
      "step": 164040
    },
    {
      "epoch": 31.5,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 164052
    },
    {
      "epoch": 31.5,
      "learning_rate": 0.001,
      "loss": 2.5293,
      "step": 164064
    },
    {
      "epoch": 31.5,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 164076
    },
    {
      "epoch": 31.51,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 164088
    },
    {
      "epoch": 31.51,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 164100
    },
    {
      "epoch": 31.51,
      "learning_rate": 0.001,
      "loss": 2.5272,
      "step": 164112
    },
    {
      "epoch": 31.51,
      "learning_rate": 0.001,
      "loss": 2.5226,
      "step": 164124
    },
    {
      "epoch": 31.52,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 164136
    },
    {
      "epoch": 31.52,
      "learning_rate": 0.001,
      "loss": 2.5428,
      "step": 164148
    },
    {
      "epoch": 31.52,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 164160
    },
    {
      "epoch": 31.52,
      "learning_rate": 0.001,
      "loss": 2.5262,
      "step": 164172
    },
    {
      "epoch": 31.53,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 164184
    },
    {
      "epoch": 31.53,
      "learning_rate": 0.001,
      "loss": 2.5291,
      "step": 164196
    },
    {
      "epoch": 31.53,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 164208
    },
    {
      "epoch": 31.53,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 164220
    },
    {
      "epoch": 31.53,
      "learning_rate": 0.001,
      "loss": 2.5319,
      "step": 164232
    },
    {
      "epoch": 31.54,
      "learning_rate": 0.001,
      "loss": 2.5275,
      "step": 164244
    },
    {
      "epoch": 31.54,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 164256
    },
    {
      "epoch": 31.54,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 164268
    },
    {
      "epoch": 31.54,
      "learning_rate": 0.001,
      "loss": 2.5304,
      "step": 164280
    },
    {
      "epoch": 31.55,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 164292
    },
    {
      "epoch": 31.55,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 164304
    },
    {
      "epoch": 31.55,
      "learning_rate": 0.001,
      "loss": 2.5224,
      "step": 164316
    },
    {
      "epoch": 31.55,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 164328
    },
    {
      "epoch": 31.56,
      "learning_rate": 0.001,
      "loss": 2.5302,
      "step": 164340
    },
    {
      "epoch": 31.56,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 164352
    },
    {
      "epoch": 31.56,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 164364
    },
    {
      "epoch": 31.56,
      "eval_ag_news_accuracy": 0.326375,
      "eval_ag_news_bleu_score": 5.003465702907255,
      "eval_ag_news_bleu_score_sem": 0.1654964090580242,
      "eval_ag_news_emb_cos_sim": 0.814096212387085,
      "eval_ag_news_emb_cos_sim_sem": 0.007507160436812505,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.517871618270874,
      "eval_ag_news_n_ngrams_match_1": 14.266,
      "eval_ag_news_n_ngrams_match_2": 3.21,
      "eval_ag_news_n_ngrams_match_3": 0.97,
      "eval_ag_news_num_pred_words": 46.702,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.71259877119662,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3546250416272494,
      "eval_ag_news_runtime": 10.2094,
      "eval_ag_news_samples_per_second": 48.974,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.353953756214178,
      "eval_ag_news_token_set_f1_sem": 0.004544093782482766,
      "eval_ag_news_token_set_precision": 0.3390273724286383,
      "eval_ag_news_token_set_recall": 0.3876031235622757,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 164375
    },
    {
      "epoch": 31.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.115125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.175341015605718,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11980818916127134,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6803178787231445,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008559745709691192,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.22593092918396,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.288,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.402,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.177001254874135,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21776062843113347,
      "eval_anthropic_toxic_prompts_runtime": 9.7449,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.309,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3570546015576493,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006526763686160576,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4420488178664897,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3233557179888408,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 164375
    },
    {
      "epoch": 31.56,
      "eval_arxiv_accuracy": 0.34890625,
      "eval_arxiv_bleu_score": 4.300640942706155,
      "eval_arxiv_bleu_score_sem": 0.12462781001762695,
      "eval_arxiv_emb_cos_sim": 0.7701647281646729,
      "eval_arxiv_emb_cos_sim_sem": 0.009626926304500301,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.373840093612671,
      "eval_arxiv_n_ngrams_match_1": 15.194,
      "eval_arxiv_n_ngrams_match_2": 2.918,
      "eval_arxiv_n_ngrams_match_3": 0.62,
      "eval_arxiv_num_pred_words": 40.104,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.190405999138104,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3629685994650904,
      "eval_arxiv_runtime": 10.1877,
      "eval_arxiv_samples_per_second": 49.079,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.35722902882629975,
      "eval_arxiv_token_set_f1_sem": 0.0042983235346720486,
      "eval_arxiv_token_set_precision": 0.310239895502916,
      "eval_arxiv_token_set_recall": 0.4374415791640846,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 164375
    },
    {
      "epoch": 31.56,
      "eval_python_code_alpaca_accuracy": 0.1600625,
      "eval_python_code_alpaca_bleu_score": 4.671859038326225,
      "eval_python_code_alpaca_bleu_score_sem": 0.14711054893136344,
      "eval_python_code_alpaca_emb_cos_sim": 0.7626504302024841,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008872178322068927,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.893451690673828,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.994,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.952,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.982,
      "eval_python_code_alpaca_num_pred_words": 43.608,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.055524251587304,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33943716617776704,
      "eval_python_code_alpaca_runtime": 9.9029,
      "eval_python_code_alpaca_samples_per_second": 50.49,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4806877998489439,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005594440906963912,
      "eval_python_code_alpaca_token_set_precision": 0.54350236287606,
      "eval_python_code_alpaca_token_set_recall": 0.45065989950433116,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 164375
    },
    {
      "epoch": 31.56,
      "eval_wikibio_accuracy": 0.3254375,
      "eval_wikibio_bleu_score": 6.085191009966251,
      "eval_wikibio_bleu_score_sem": 0.20736608793035471,
      "eval_wikibio_emb_cos_sim": 0.7358441352844238,
      "eval_wikibio_emb_cos_sim_sem": 0.00892497120109587,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6791563034057617,
      "eval_wikibio_n_ngrams_match_1": 10.086,
      "eval_wikibio_n_ngrams_match_2": 3.414,
      "eval_wikibio_n_ngrams_match_3": 1.248,
      "eval_wikibio_num_pred_words": 36.012,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.61295865157849,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36009769363907795,
      "eval_wikibio_runtime": 9.7555,
      "eval_wikibio_samples_per_second": 51.253,
      "eval_wikibio_steps_per_second": 0.103,
      "eval_wikibio_token_set_f1": 0.32237325424182417,
      "eval_wikibio_token_set_f1_sem": 0.005284262574665414,
      "eval_wikibio_token_set_precision": 0.3268104017877293,
      "eval_wikibio_token_set_recall": 0.33361421743377817,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 164375
    },
    {
      "epoch": 31.56,
      "eval_nq_accuracy": 0.53259375,
      "eval_nq_bleu_score": 11.931243476346538,
      "eval_nq_bleu_score_sem": 0.47923175101304166,
      "eval_nq_emb_cos_sim": 0.8351569771766663,
      "eval_nq_emb_cos_sim_sem": 0.006897443711374165,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.153125524520874,
      "eval_nq_n_ngrams_match_1": 23.21,
      "eval_nq_n_ngrams_match_2": 8.654,
      "eval_nq_n_ngrams_match_3": 3.974,
      "eval_nq_num_pred_words": 48.782,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.611732558654253,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45251041126762526,
      "eval_nq_runtime": 11.1426,
      "eval_nq_samples_per_second": 44.873,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.4645486155757907,
      "eval_nq_token_set_f1_sem": 0.004969776111751923,
      "eval_nq_token_set_precision": 0.42241175680633714,
      "eval_nq_token_set_recall": 0.5251101939435706,
      "eval_nq_true_num_tokens": 64.0,
      "step": 164375
    },
    {
      "epoch": 31.56,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 164376
    },
    {
      "epoch": 31.56,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 164388
    },
    {
      "epoch": 31.57,
      "learning_rate": 0.001,
      "loss": 2.526,
      "step": 164400
    },
    {
      "epoch": 31.57,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 164412
    },
    {
      "epoch": 31.57,
      "learning_rate": 0.001,
      "loss": 2.5278,
      "step": 164424
    },
    {
      "epoch": 31.57,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 164436
    },
    {
      "epoch": 31.58,
      "learning_rate": 0.001,
      "loss": 2.5262,
      "step": 164448
    },
    {
      "epoch": 31.58,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 164460
    },
    {
      "epoch": 31.58,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 164472
    },
    {
      "epoch": 31.58,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 164484
    },
    {
      "epoch": 31.59,
      "learning_rate": 0.001,
      "loss": 2.5291,
      "step": 164496
    },
    {
      "epoch": 31.59,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 164508
    },
    {
      "epoch": 31.59,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 164520
    },
    {
      "epoch": 31.59,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 164532
    },
    {
      "epoch": 31.59,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 164544
    },
    {
      "epoch": 31.6,
      "learning_rate": 0.001,
      "loss": 2.5274,
      "step": 164556
    },
    {
      "epoch": 31.6,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 164568
    },
    {
      "epoch": 31.6,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 164580
    },
    {
      "epoch": 31.6,
      "learning_rate": 0.001,
      "loss": 2.5358,
      "step": 164592
    },
    {
      "epoch": 31.61,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 164604
    },
    {
      "epoch": 31.61,
      "learning_rate": 0.001,
      "loss": 2.5202,
      "step": 164616
    },
    {
      "epoch": 31.61,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 164628
    },
    {
      "epoch": 31.61,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 164640
    },
    {
      "epoch": 31.62,
      "learning_rate": 0.001,
      "loss": 2.5229,
      "step": 164652
    },
    {
      "epoch": 31.62,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 164664
    },
    {
      "epoch": 31.62,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 164676
    },
    {
      "epoch": 31.62,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 164688
    },
    {
      "epoch": 31.62,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 164700
    },
    {
      "epoch": 31.63,
      "learning_rate": 0.001,
      "loss": 2.5282,
      "step": 164712
    },
    {
      "epoch": 31.63,
      "learning_rate": 0.001,
      "loss": 2.5307,
      "step": 164724
    },
    {
      "epoch": 31.63,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 164736
    },
    {
      "epoch": 31.63,
      "learning_rate": 0.001,
      "loss": 2.5291,
      "step": 164748
    },
    {
      "epoch": 31.64,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 164760
    },
    {
      "epoch": 31.64,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 164772
    },
    {
      "epoch": 31.64,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 164784
    },
    {
      "epoch": 31.64,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 164796
    },
    {
      "epoch": 31.65,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 164808
    },
    {
      "epoch": 31.65,
      "learning_rate": 0.001,
      "loss": 2.5275,
      "step": 164820
    },
    {
      "epoch": 31.65,
      "learning_rate": 0.001,
      "loss": 2.5336,
      "step": 164832
    },
    {
      "epoch": 31.65,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 164844
    },
    {
      "epoch": 31.65,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 164856
    },
    {
      "epoch": 31.66,
      "learning_rate": 0.001,
      "loss": 2.536,
      "step": 164868
    },
    {
      "epoch": 31.66,
      "learning_rate": 0.001,
      "loss": 2.5328,
      "step": 164880
    },
    {
      "epoch": 31.66,
      "learning_rate": 0.001,
      "loss": 2.5364,
      "step": 164892
    },
    {
      "epoch": 31.66,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 164904
    },
    {
      "epoch": 31.67,
      "learning_rate": 0.001,
      "loss": 2.5284,
      "step": 164916
    },
    {
      "epoch": 31.67,
      "learning_rate": 0.001,
      "loss": 2.529,
      "step": 164928
    },
    {
      "epoch": 31.67,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 164940
    },
    {
      "epoch": 31.67,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 164952
    },
    {
      "epoch": 31.68,
      "learning_rate": 0.001,
      "loss": 2.5366,
      "step": 164964
    },
    {
      "epoch": 31.68,
      "learning_rate": 0.001,
      "loss": 2.5302,
      "step": 164976
    },
    {
      "epoch": 31.68,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 164988
    },
    {
      "epoch": 31.68,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 165000
    },
    {
      "epoch": 31.68,
      "eval_ag_news_accuracy": 0.325625,
      "eval_ag_news_bleu_score": 4.949571385475815,
      "eval_ag_news_bleu_score_sem": 0.1514546212422152,
      "eval_ag_news_emb_cos_sim": 0.820349395275116,
      "eval_ag_news_emb_cos_sim_sem": 0.006364998622868993,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5219197273254395,
      "eval_ag_news_n_ngrams_match_1": 14.3,
      "eval_ag_news_n_ngrams_match_2": 3.28,
      "eval_ag_news_n_ngrams_match_3": 0.92,
      "eval_ag_news_num_pred_words": 46.618,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.849347647974305,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3556960223375405,
      "eval_ag_news_runtime": 10.5222,
      "eval_ag_news_samples_per_second": 47.519,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3556806451883441,
      "eval_ag_news_token_set_f1_sem": 0.004507897490328704,
      "eval_ag_news_token_set_precision": 0.3419088772426846,
      "eval_ag_news_token_set_recall": 0.38412966770720486,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 165000
    },
    {
      "epoch": 31.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.1146875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.165954720330608,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1191423804704478,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6786759495735168,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00955979164317851,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2220876216888428,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.198,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.94,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.818,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.08042400416108,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21307968750888823,
      "eval_anthropic_toxic_prompts_runtime": 9.8868,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.572,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3608222408362462,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006772623360191061,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4384232433800395,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3327021523574849,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 165000
    },
    {
      "epoch": 31.68,
      "eval_arxiv_accuracy": 0.34846875,
      "eval_arxiv_bleu_score": 4.390771225550452,
      "eval_arxiv_bleu_score_sem": 0.12442418811137847,
      "eval_arxiv_emb_cos_sim": 0.7720425128936768,
      "eval_arxiv_emb_cos_sim_sem": 0.008063797816362931,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3646864891052246,
      "eval_arxiv_n_ngrams_match_1": 15.256,
      "eval_arxiv_n_ngrams_match_2": 2.968,
      "eval_arxiv_n_ngrams_match_3": 0.662,
      "eval_arxiv_num_pred_words": 40.028,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.9244277541978,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3680764397478407,
      "eval_arxiv_runtime": 10.2606,
      "eval_arxiv_samples_per_second": 48.73,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.35985968147687586,
      "eval_arxiv_token_set_f1_sem": 0.004275902651255107,
      "eval_arxiv_token_set_precision": 0.31164442399355274,
      "eval_arxiv_token_set_recall": 0.4433459528223471,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 165000
    },
    {
      "epoch": 31.68,
      "eval_python_code_alpaca_accuracy": 0.16290625,
      "eval_python_code_alpaca_bleu_score": 4.832747243268115,
      "eval_python_code_alpaca_bleu_score_sem": 0.16041248560223093,
      "eval_python_code_alpaca_emb_cos_sim": 0.746342122554779,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009925256844784092,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8708529472351074,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.95,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.998,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.036,
      "eval_python_code_alpaca_num_pred_words": 42.788,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.652068063203664,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3406843589151201,
      "eval_python_code_alpaca_runtime": 9.8107,
      "eval_python_code_alpaca_samples_per_second": 50.965,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4863497168353393,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005579415155745632,
      "eval_python_code_alpaca_token_set_precision": 0.5435204942939534,
      "eval_python_code_alpaca_token_set_recall": 0.46376368221896563,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 165000
    },
    {
      "epoch": 31.68,
      "eval_wikibio_accuracy": 0.32428125,
      "eval_wikibio_bleu_score": 6.2112413855973605,
      "eval_wikibio_bleu_score_sem": 0.22597355532603242,
      "eval_wikibio_emb_cos_sim": 0.7518303394317627,
      "eval_wikibio_emb_cos_sim_sem": 0.007407385675497059,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.723221778869629,
      "eval_wikibio_n_ngrams_match_1": 10.15,
      "eval_wikibio_n_ngrams_match_2": 3.398,
      "eval_wikibio_n_ngrams_match_3": 1.294,
      "eval_wikibio_num_pred_words": 36.474,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.39755325109265,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3604627382537412,
      "eval_wikibio_runtime": 10.3022,
      "eval_wikibio_samples_per_second": 48.533,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.32199169460023463,
      "eval_wikibio_token_set_f1_sem": 0.00509250618171414,
      "eval_wikibio_token_set_precision": 0.3314764950137064,
      "eval_wikibio_token_set_recall": 0.32681247147373677,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 165000
    },
    {
      "epoch": 31.68,
      "eval_nq_accuracy": 0.5315625,
      "eval_nq_bleu_score": 11.77311915757099,
      "eval_nq_bleu_score_sem": 0.4818562872841172,
      "eval_nq_emb_cos_sim": 0.8427734375,
      "eval_nq_emb_cos_sim_sem": 0.0066100390830223146,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.154539108276367,
      "eval_nq_n_ngrams_match_1": 23.3,
      "eval_nq_n_ngrams_match_2": 8.52,
      "eval_nq_n_ngrams_match_3": 3.85,
      "eval_nq_num_pred_words": 49.382,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.623914572025397,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4518855102801904,
      "eval_nq_runtime": 14.3007,
      "eval_nq_samples_per_second": 34.963,
      "eval_nq_steps_per_second": 0.07,
      "eval_nq_token_set_f1": 0.46532767579981826,
      "eval_nq_token_set_f1_sem": 0.004879542835258077,
      "eval_nq_token_set_precision": 0.4228913868077098,
      "eval_nq_token_set_recall": 0.5248575728469105,
      "eval_nq_true_num_tokens": 64.0,
      "step": 165000
    },
    {
      "epoch": 31.68,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 165012
    },
    {
      "epoch": 31.69,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 165024
    },
    {
      "epoch": 31.69,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 165036
    },
    {
      "epoch": 31.69,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 165048
    },
    {
      "epoch": 31.69,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 165060
    },
    {
      "epoch": 31.7,
      "learning_rate": 0.001,
      "loss": 2.5312,
      "step": 165072
    },
    {
      "epoch": 31.7,
      "learning_rate": 0.001,
      "loss": 2.5213,
      "step": 165084
    },
    {
      "epoch": 31.7,
      "learning_rate": 0.001,
      "loss": 2.5429,
      "step": 165096
    },
    {
      "epoch": 31.7,
      "learning_rate": 0.001,
      "loss": 2.5396,
      "step": 165108
    },
    {
      "epoch": 31.71,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 165120
    },
    {
      "epoch": 31.71,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 165132
    },
    {
      "epoch": 31.71,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 165144
    },
    {
      "epoch": 31.71,
      "learning_rate": 0.001,
      "loss": 2.5381,
      "step": 165156
    },
    {
      "epoch": 31.71,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 165168
    },
    {
      "epoch": 31.72,
      "learning_rate": 0.001,
      "loss": 2.5404,
      "step": 165180
    },
    {
      "epoch": 31.72,
      "learning_rate": 0.001,
      "loss": 2.5355,
      "step": 165192
    },
    {
      "epoch": 31.72,
      "learning_rate": 0.001,
      "loss": 2.5375,
      "step": 165204
    },
    {
      "epoch": 31.72,
      "learning_rate": 0.001,
      "loss": 2.5318,
      "step": 165216
    },
    {
      "epoch": 31.73,
      "learning_rate": 0.001,
      "loss": 2.5398,
      "step": 165228
    },
    {
      "epoch": 31.73,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 165240
    },
    {
      "epoch": 31.73,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 165252
    },
    {
      "epoch": 31.73,
      "learning_rate": 0.001,
      "loss": 2.5239,
      "step": 165264
    },
    {
      "epoch": 31.74,
      "learning_rate": 0.001,
      "loss": 2.5304,
      "step": 165276
    },
    {
      "epoch": 31.74,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 165288
    },
    {
      "epoch": 31.74,
      "learning_rate": 0.001,
      "loss": 2.5407,
      "step": 165300
    },
    {
      "epoch": 31.74,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 165312
    },
    {
      "epoch": 31.74,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 165324
    },
    {
      "epoch": 31.75,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 165336
    },
    {
      "epoch": 31.75,
      "learning_rate": 0.001,
      "loss": 2.5392,
      "step": 165348
    },
    {
      "epoch": 31.75,
      "learning_rate": 0.001,
      "loss": 2.5219,
      "step": 165360
    },
    {
      "epoch": 31.75,
      "learning_rate": 0.001,
      "loss": 2.5228,
      "step": 165372
    },
    {
      "epoch": 31.76,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 165384
    },
    {
      "epoch": 31.76,
      "learning_rate": 0.001,
      "loss": 2.5164,
      "step": 165396
    },
    {
      "epoch": 31.76,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 165408
    },
    {
      "epoch": 31.76,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 165420
    },
    {
      "epoch": 31.76,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 165432
    },
    {
      "epoch": 31.77,
      "learning_rate": 0.001,
      "loss": 2.5293,
      "step": 165444
    },
    {
      "epoch": 31.77,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 165456
    },
    {
      "epoch": 31.77,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 165468
    },
    {
      "epoch": 31.77,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 165480
    },
    {
      "epoch": 31.78,
      "learning_rate": 0.001,
      "loss": 2.5338,
      "step": 165492
    },
    {
      "epoch": 31.78,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 165504
    },
    {
      "epoch": 31.78,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 165516
    },
    {
      "epoch": 31.78,
      "learning_rate": 0.001,
      "loss": 2.5291,
      "step": 165528
    },
    {
      "epoch": 31.79,
      "learning_rate": 0.001,
      "loss": 2.5378,
      "step": 165540
    },
    {
      "epoch": 31.79,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 165552
    },
    {
      "epoch": 31.79,
      "learning_rate": 0.001,
      "loss": 2.5322,
      "step": 165564
    },
    {
      "epoch": 31.79,
      "learning_rate": 0.001,
      "loss": 2.5253,
      "step": 165576
    },
    {
      "epoch": 31.79,
      "learning_rate": 0.001,
      "loss": 2.5293,
      "step": 165588
    },
    {
      "epoch": 31.8,
      "learning_rate": 0.001,
      "loss": 2.5322,
      "step": 165600
    },
    {
      "epoch": 31.8,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 165612
    },
    {
      "epoch": 31.8,
      "learning_rate": 0.001,
      "loss": 2.5393,
      "step": 165624
    },
    {
      "epoch": 31.8,
      "eval_ag_news_accuracy": 0.3273125,
      "eval_ag_news_bleu_score": 4.855065404281462,
      "eval_ag_news_bleu_score_sem": 0.15106250472974067,
      "eval_ag_news_emb_cos_sim": 0.8158227205276489,
      "eval_ag_news_emb_cos_sim_sem": 0.007410889132279528,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5137622356414795,
      "eval_ag_news_n_ngrams_match_1": 14.24,
      "eval_ag_news_n_ngrams_match_2": 3.204,
      "eval_ag_news_n_ngrams_match_3": 0.934,
      "eval_ag_news_num_pred_words": 46.844,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.57434506665822,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3553260578487887,
      "eval_ag_news_runtime": 12.0382,
      "eval_ag_news_samples_per_second": 41.534,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.35394369376665724,
      "eval_ag_news_token_set_f1_sem": 0.004517835193987404,
      "eval_ag_news_token_set_precision": 0.3399524740158876,
      "eval_ag_news_token_set_recall": 0.3824921760346473,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 165625
    },
    {
      "epoch": 31.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.114,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0203975735697655,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11458695695530144,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6762200593948364,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009375118640982949,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2238645553588867,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.128,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.838,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.674,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.64,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.12502987313139,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21170482733460078,
      "eval_anthropic_toxic_prompts_runtime": 11.6261,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.006,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.086,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35560367711589413,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006625378686358801,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4345931557504943,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3297450542969775,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 165625
    },
    {
      "epoch": 31.8,
      "eval_arxiv_accuracy": 0.350375,
      "eval_arxiv_bleu_score": 4.424113702290277,
      "eval_arxiv_bleu_score_sem": 0.12201878097626956,
      "eval_arxiv_emb_cos_sim": 0.777930736541748,
      "eval_arxiv_emb_cos_sim_sem": 0.0067551256577540195,
      "eval_arxiv_emb_top1_equal": 0.328125,
      "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3710415363311768,
      "eval_arxiv_n_ngrams_match_1": 15.54,
      "eval_arxiv_n_ngrams_match_2": 3.024,
      "eval_arxiv_n_ngrams_match_3": 0.662,
      "eval_arxiv_num_pred_words": 40.888,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.10882917782503,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.370172741487134,
      "eval_arxiv_runtime": 11.6252,
      "eval_arxiv_samples_per_second": 43.01,
      "eval_arxiv_steps_per_second": 0.086,
      "eval_arxiv_token_set_f1": 0.3644618244226642,
      "eval_arxiv_token_set_f1_sem": 0.00395826682458891,
      "eval_arxiv_token_set_precision": 0.31548204693126,
      "eval_arxiv_token_set_recall": 0.4482192322084978,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 165625
    },
    {
      "epoch": 31.8,
      "eval_python_code_alpaca_accuracy": 0.16290625,
      "eval_python_code_alpaca_bleu_score": 4.654106103477634,
      "eval_python_code_alpaca_bleu_score_sem": 0.15000589666999495,
      "eval_python_code_alpaca_emb_cos_sim": 0.7658255696296692,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008851702856226728,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.837296962738037,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.856,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.976,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.034,
      "eval_python_code_alpaca_num_pred_words": 44.628,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.069563456236036,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32679406629130714,
      "eval_python_code_alpaca_runtime": 12.2432,
      "eval_python_code_alpaca_samples_per_second": 40.839,
      "eval_python_code_alpaca_steps_per_second": 0.082,
      "eval_python_code_alpaca_token_set_f1": 0.4878536234581019,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0055525206255058164,
      "eval_python_code_alpaca_token_set_precision": 0.5359377455962341,
      "eval_python_code_alpaca_token_set_recall": 0.4689726526761777,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 165625
    },
    {
      "epoch": 31.8,
      "eval_wikibio_accuracy": 0.32546875,
      "eval_wikibio_bleu_score": 5.94630150273813,
      "eval_wikibio_bleu_score_sem": 0.20746401233977205,
      "eval_wikibio_emb_cos_sim": 0.7495725154876709,
      "eval_wikibio_emb_cos_sim_sem": 0.008162560394769607,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6943857669830322,
      "eval_wikibio_n_ngrams_match_1": 10.038,
      "eval_wikibio_n_ngrams_match_2": 3.39,
      "eval_wikibio_n_ngrams_match_3": 1.216,
      "eval_wikibio_num_pred_words": 35.714,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.22086001891623,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3580946561096108,
      "eval_wikibio_runtime": 12.0447,
      "eval_wikibio_samples_per_second": 41.512,
      "eval_wikibio_steps_per_second": 0.083,
      "eval_wikibio_token_set_f1": 0.32064573582917255,
      "eval_wikibio_token_set_f1_sem": 0.005352860187990176,
      "eval_wikibio_token_set_precision": 0.32857444213278747,
      "eval_wikibio_token_set_recall": 0.33130887166340145,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 165625
    },
    {
      "epoch": 31.8,
      "eval_nq_accuracy": 0.531375,
      "eval_nq_bleu_score": 11.929233576691335,
      "eval_nq_bleu_score_sem": 0.49077956369423203,
      "eval_nq_emb_cos_sim": 0.8397247791290283,
      "eval_nq_emb_cos_sim_sem": 0.007308006699430911,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1550819873809814,
      "eval_nq_n_ngrams_match_1": 23.258,
      "eval_nq_n_ngrams_match_2": 8.548,
      "eval_nq_n_ngrams_match_3": 3.98,
      "eval_nq_num_pred_words": 49.092,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.628597586086753,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4528662764298266,
      "eval_nq_runtime": 28.7433,
      "eval_nq_samples_per_second": 17.395,
      "eval_nq_steps_per_second": 0.035,
      "eval_nq_token_set_f1": 0.46646194474816904,
      "eval_nq_token_set_f1_sem": 0.004986225753192338,
      "eval_nq_token_set_precision": 0.4247259510045065,
      "eval_nq_token_set_recall": 0.5254762336280152,
      "eval_nq_true_num_tokens": 64.0,
      "step": 165625
    },
    {
      "epoch": 31.8,
      "learning_rate": 0.001,
      "loss": 2.5363,
      "step": 165636
    },
    {
      "epoch": 31.81,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 165648
    },
    {
      "epoch": 31.81,
      "learning_rate": 0.001,
      "loss": 2.523,
      "step": 165660
    },
    {
      "epoch": 31.81,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 165672
    },
    {
      "epoch": 31.81,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 165684
    },
    {
      "epoch": 31.82,
      "learning_rate": 0.001,
      "loss": 2.5306,
      "step": 165696
    },
    {
      "epoch": 31.82,
      "learning_rate": 0.001,
      "loss": 2.5225,
      "step": 165708
    },
    {
      "epoch": 31.82,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 165720
    },
    {
      "epoch": 31.82,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 165732
    },
    {
      "epoch": 31.82,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 165744
    },
    {
      "epoch": 31.83,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 165756
    },
    {
      "epoch": 31.83,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 165768
    },
    {
      "epoch": 31.83,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 165780
    },
    {
      "epoch": 31.83,
      "learning_rate": 0.001,
      "loss": 2.5247,
      "step": 165792
    },
    {
      "epoch": 31.84,
      "learning_rate": 0.001,
      "loss": 2.5305,
      "step": 165804
    },
    {
      "epoch": 31.84,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 165816
    },
    {
      "epoch": 31.84,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 165828
    },
    {
      "epoch": 31.84,
      "learning_rate": 0.001,
      "loss": 2.5353,
      "step": 165840
    },
    {
      "epoch": 31.85,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 165852
    },
    {
      "epoch": 31.85,
      "learning_rate": 0.001,
      "loss": 2.5262,
      "step": 165864
    },
    {
      "epoch": 31.85,
      "learning_rate": 0.001,
      "loss": 2.5312,
      "step": 165876
    },
    {
      "epoch": 31.85,
      "learning_rate": 0.001,
      "loss": 2.5247,
      "step": 165888
    },
    {
      "epoch": 31.85,
      "learning_rate": 0.001,
      "loss": 2.5369,
      "step": 165900
    },
    {
      "epoch": 31.86,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 165912
    },
    {
      "epoch": 31.86,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 165924
    },
    {
      "epoch": 31.86,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 165936
    },
    {
      "epoch": 31.86,
      "learning_rate": 0.001,
      "loss": 2.5311,
      "step": 165948
    },
    {
      "epoch": 31.87,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 165960
    },
    {
      "epoch": 31.87,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 165972
    },
    {
      "epoch": 31.87,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 165984
    },
    {
      "epoch": 31.87,
      "learning_rate": 0.001,
      "loss": 2.5431,
      "step": 165996
    },
    {
      "epoch": 31.88,
      "learning_rate": 0.001,
      "loss": 2.5352,
      "step": 166008
    },
    {
      "epoch": 31.88,
      "learning_rate": 0.001,
      "loss": 2.547,
      "step": 166020
    },
    {
      "epoch": 31.88,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 166032
    },
    {
      "epoch": 31.88,
      "learning_rate": 0.001,
      "loss": 2.5402,
      "step": 166044
    },
    {
      "epoch": 31.88,
      "learning_rate": 0.001,
      "loss": 2.5359,
      "step": 166056
    },
    {
      "epoch": 31.89,
      "learning_rate": 0.001,
      "loss": 2.54,
      "step": 166068
    },
    {
      "epoch": 31.89,
      "learning_rate": 0.001,
      "loss": 2.5332,
      "step": 166080
    },
    {
      "epoch": 31.89,
      "learning_rate": 0.001,
      "loss": 2.5321,
      "step": 166092
    },
    {
      "epoch": 31.89,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 166104
    },
    {
      "epoch": 31.9,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 166116
    },
    {
      "epoch": 31.9,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 166128
    },
    {
      "epoch": 31.9,
      "learning_rate": 0.001,
      "loss": 2.5349,
      "step": 166140
    },
    {
      "epoch": 31.9,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 166152
    },
    {
      "epoch": 31.91,
      "learning_rate": 0.001,
      "loss": 2.5259,
      "step": 166164
    },
    {
      "epoch": 31.91,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 166176
    },
    {
      "epoch": 31.91,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 166188
    },
    {
      "epoch": 31.91,
      "learning_rate": 0.001,
      "loss": 2.5321,
      "step": 166200
    },
    {
      "epoch": 31.91,
      "learning_rate": 0.001,
      "loss": 2.5422,
      "step": 166212
    },
    {
      "epoch": 31.92,
      "learning_rate": 0.001,
      "loss": 2.5342,
      "step": 166224
    },
    {
      "epoch": 31.92,
      "learning_rate": 0.001,
      "loss": 2.5329,
      "step": 166236
    },
    {
      "epoch": 31.92,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 166248
    },
    {
      "epoch": 31.92,
      "eval_ag_news_accuracy": 0.3256875,
      "eval_ag_news_bleu_score": 4.85846646317945,
      "eval_ag_news_bleu_score_sem": 0.15553541581902489,
      "eval_ag_news_emb_cos_sim": 0.8113851547241211,
      "eval_ag_news_emb_cos_sim_sem": 0.00794740534293305,
      "eval_ag_news_emb_top1_equal": 0.3046875,
      "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.525256872177124,
      "eval_ag_news_n_ngrams_match_1": 14.284,
      "eval_ag_news_n_ngrams_match_2": 3.108,
      "eval_ag_news_n_ngrams_match_3": 0.93,
      "eval_ag_news_num_pred_words": 46.848,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.96249651628512,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35519615332434323,
      "eval_ag_news_runtime": 12.6734,
      "eval_ag_news_samples_per_second": 39.453,
      "eval_ag_news_steps_per_second": 0.079,
      "eval_ag_news_token_set_f1": 0.3536932412810227,
      "eval_ag_news_token_set_f1_sem": 0.004645666303538005,
      "eval_ag_news_token_set_precision": 0.3404722191832777,
      "eval_ag_news_token_set_recall": 0.38095851057201485,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 166250
    },
    {
      "epoch": 31.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.115625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.144514809554743,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11103306250104761,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6765865087509155,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009198360026684322,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2287395000457764,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.254,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.968,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.928,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.24781203899228,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21664607644543715,
      "eval_anthropic_toxic_prompts_runtime": 11.9946,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.686,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.083,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3545282092160286,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066832747366794955,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4400092299813725,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3213217359727643,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 166250
    },
    {
      "epoch": 31.92,
      "eval_arxiv_accuracy": 0.3515625,
      "eval_arxiv_bleu_score": 4.306540054361915,
      "eval_arxiv_bleu_score_sem": 0.11640016404470645,
      "eval_arxiv_emb_cos_sim": 0.7607088088989258,
      "eval_arxiv_emb_cos_sim_sem": 0.008919630405136129,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3544704914093018,
      "eval_arxiv_n_ngrams_match_1": 15.138,
      "eval_arxiv_n_ngrams_match_2": 2.948,
      "eval_arxiv_n_ngrams_match_3": 0.618,
      "eval_arxiv_num_pred_words": 39.854,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.630440112311092,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36514489218200186,
      "eval_arxiv_runtime": 13.6292,
      "eval_arxiv_samples_per_second": 36.686,
      "eval_arxiv_steps_per_second": 0.073,
      "eval_arxiv_token_set_f1": 0.35790601478294864,
      "eval_arxiv_token_set_f1_sem": 0.004124732052069127,
      "eval_arxiv_token_set_precision": 0.3078270279633821,
      "eval_arxiv_token_set_recall": 0.44536002406991937,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 166250
    },
    {
      "epoch": 31.92,
      "eval_python_code_alpaca_accuracy": 0.16278125,
      "eval_python_code_alpaca_bleu_score": 4.766742375650273,
      "eval_python_code_alpaca_bleu_score_sem": 0.15655899201392967,
      "eval_python_code_alpaca_emb_cos_sim": 0.7535933256149292,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011082196397486737,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.870575189590454,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.866,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.9,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.0,
      "eval_python_code_alpaca_num_pred_words": 42.66,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.647165747214554,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.338864600954744,
      "eval_python_code_alpaca_runtime": 11.8655,
      "eval_python_code_alpaca_samples_per_second": 42.139,
      "eval_python_code_alpaca_steps_per_second": 0.084,
      "eval_python_code_alpaca_token_set_f1": 0.480203564277494,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005615297179799518,
      "eval_python_code_alpaca_token_set_precision": 0.538813981123486,
      "eval_python_code_alpaca_token_set_recall": 0.45205983122696125,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 166250
    },
    {
      "epoch": 31.92,
      "eval_wikibio_accuracy": 0.32484375,
      "eval_wikibio_bleu_score": 6.0773489546388095,
      "eval_wikibio_bleu_score_sem": 0.2138823830121203,
      "eval_wikibio_emb_cos_sim": 0.7354307174682617,
      "eval_wikibio_emb_cos_sim_sem": 0.01016705068051306,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7122159004211426,
      "eval_wikibio_n_ngrams_match_1": 10.266,
      "eval_wikibio_n_ngrams_match_2": 3.498,
      "eval_wikibio_n_ngrams_match_3": 1.278,
      "eval_wikibio_num_pred_words": 36.418,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.94443486868525,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36146900323814657,
      "eval_wikibio_runtime": 28.6893,
      "eval_wikibio_samples_per_second": 17.428,
      "eval_wikibio_steps_per_second": 0.035,
      "eval_wikibio_token_set_f1": 0.322545845530306,
      "eval_wikibio_token_set_f1_sem": 0.005367924037990663,
      "eval_wikibio_token_set_precision": 0.333561766673221,
      "eval_wikibio_token_set_recall": 0.32955301613349075,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 166250
    },
    {
      "epoch": 31.92,
      "eval_nq_accuracy": 0.53090625,
      "eval_nq_bleu_score": 12.106554087395727,
      "eval_nq_bleu_score_sem": 0.4882764762626856,
      "eval_nq_emb_cos_sim": 0.8306553363800049,
      "eval_nq_emb_cos_sim_sem": 0.00783127628730636,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1556053161621094,
      "eval_nq_n_ngrams_match_1": 23.284,
      "eval_nq_n_ngrams_match_2": 8.658,
      "eval_nq_n_ngrams_match_3": 4.052,
      "eval_nq_num_pred_words": 48.89,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.633114361320478,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45088475509557785,
      "eval_nq_runtime": 27.9785,
      "eval_nq_samples_per_second": 17.871,
      "eval_nq_steps_per_second": 0.036,
      "eval_nq_token_set_f1": 0.4677196245079925,
      "eval_nq_token_set_f1_sem": 0.005095961423859508,
      "eval_nq_token_set_precision": 0.4271423837654825,
      "eval_nq_token_set_recall": 0.525988069901891,
      "eval_nq_true_num_tokens": 64.0,
      "step": 166250
    },
    {
      "epoch": 31.92,
      "learning_rate": 0.001,
      "loss": 2.5329,
      "step": 166260
    },
    {
      "epoch": 31.93,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 166272
    },
    {
      "epoch": 31.93,
      "learning_rate": 0.001,
      "loss": 2.5256,
      "step": 166284
    },
    {
      "epoch": 31.93,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 166296
    },
    {
      "epoch": 31.93,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 166308
    },
    {
      "epoch": 31.94,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 166320
    },
    {
      "epoch": 31.94,
      "learning_rate": 0.001,
      "loss": 2.5284,
      "step": 166332
    },
    {
      "epoch": 31.94,
      "learning_rate": 0.001,
      "loss": 2.523,
      "step": 166344
    },
    {
      "epoch": 31.94,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 166356
    },
    {
      "epoch": 31.94,
      "learning_rate": 0.001,
      "loss": 2.5319,
      "step": 166368
    },
    {
      "epoch": 31.95,
      "learning_rate": 0.001,
      "loss": 2.5381,
      "step": 166380
    },
    {
      "epoch": 31.95,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 166392
    },
    {
      "epoch": 31.95,
      "learning_rate": 0.001,
      "loss": 2.532,
      "step": 166404
    },
    {
      "epoch": 31.95,
      "learning_rate": 0.001,
      "loss": 2.5363,
      "step": 166416
    },
    {
      "epoch": 31.96,
      "learning_rate": 0.001,
      "loss": 2.5377,
      "step": 166428
    },
    {
      "epoch": 31.96,
      "learning_rate": 0.001,
      "loss": 2.5264,
      "step": 166440
    },
    {
      "epoch": 31.96,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 166452
    },
    {
      "epoch": 31.96,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 166464
    },
    {
      "epoch": 31.97,
      "learning_rate": 0.001,
      "loss": 2.5351,
      "step": 166476
    },
    {
      "epoch": 31.97,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 166488
    },
    {
      "epoch": 31.97,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 166500
    },
    {
      "epoch": 31.97,
      "learning_rate": 0.001,
      "loss": 2.5386,
      "step": 166512
    },
    {
      "epoch": 31.97,
      "learning_rate": 0.001,
      "loss": 2.5247,
      "step": 166524
    },
    {
      "epoch": 31.98,
      "learning_rate": 0.001,
      "loss": 2.5293,
      "step": 166536
    },
    {
      "epoch": 31.98,
      "learning_rate": 0.001,
      "loss": 2.5418,
      "step": 166548
    },
    {
      "epoch": 31.98,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 166560
    },
    {
      "epoch": 31.98,
      "learning_rate": 0.001,
      "loss": 2.5324,
      "step": 166572
    },
    {
      "epoch": 31.99,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 166584
    },
    {
      "epoch": 31.99,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 166596
    },
    {
      "epoch": 31.99,
      "learning_rate": 0.001,
      "loss": 2.5225,
      "step": 166608
    },
    {
      "epoch": 31.99,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 166620
    },
    {
      "epoch": 32.0,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 166632
    },
    {
      "epoch": 32.0,
      "learning_rate": 0.001,
      "loss": 2.5334,
      "step": 166644
    },
    {
      "epoch": 32.0,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 166656
    },
    {
      "epoch": 32.0,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 166668
    },
    {
      "epoch": 32.0,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 166680
    },
    {
      "epoch": 32.01,
      "learning_rate": 0.001,
      "loss": 2.5223,
      "step": 166692
    },
    {
      "epoch": 32.01,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 166704
    },
    {
      "epoch": 32.01,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 166716
    },
    {
      "epoch": 32.01,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 166728
    },
    {
      "epoch": 32.02,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 166740
    },
    {
      "epoch": 32.02,
      "learning_rate": 0.001,
      "loss": 2.5182,
      "step": 166752
    },
    {
      "epoch": 32.02,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 166764
    },
    {
      "epoch": 32.02,
      "learning_rate": 0.001,
      "loss": 2.5256,
      "step": 166776
    },
    {
      "epoch": 32.03,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 166788
    },
    {
      "epoch": 32.03,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 166800
    },
    {
      "epoch": 32.03,
      "learning_rate": 0.001,
      "loss": 2.5106,
      "step": 166812
    },
    {
      "epoch": 32.03,
      "learning_rate": 0.001,
      "loss": 2.5163,
      "step": 166824
    },
    {
      "epoch": 32.03,
      "learning_rate": 0.001,
      "loss": 2.519,
      "step": 166836
    },
    {
      "epoch": 32.04,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 166848
    },
    {
      "epoch": 32.04,
      "learning_rate": 0.001,
      "loss": 2.5179,
      "step": 166860
    },
    {
      "epoch": 32.04,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 166872
    },
    {
      "epoch": 32.04,
      "eval_ag_news_accuracy": 0.32675,
      "eval_ag_news_bleu_score": 4.815627293562557,
      "eval_ag_news_bleu_score_sem": 0.16402280387455798,
      "eval_ag_news_emb_cos_sim": 0.8088845014572144,
      "eval_ag_news_emb_cos_sim_sem": 0.0069388740634603735,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.50780987739563,
      "eval_ag_news_n_ngrams_match_1": 13.994,
      "eval_ag_news_n_ngrams_match_2": 3.084,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.092,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.37509213766253,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3473461043947416,
      "eval_ag_news_runtime": 12.512,
      "eval_ag_news_samples_per_second": 39.962,
      "eval_ag_news_steps_per_second": 0.08,
      "eval_ag_news_token_set_f1": 0.34820317672812245,
      "eval_ag_news_token_set_f1_sem": 0.004580834913622929,
      "eval_ag_news_token_set_precision": 0.33548923470673075,
      "eval_ag_news_token_set_recall": 0.3783151195881536,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 166875
    },
    {
      "epoch": 32.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.1156875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.155059918492553,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12618107355206754,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6594028472900391,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009416124231861509,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2299771308898926,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.148,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.922,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.76,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.27907885431468,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21434134525971632,
      "eval_anthropic_toxic_prompts_runtime": 11.5179,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.411,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35164535975769423,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066668385835222015,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4329390426527947,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3215337314701285,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 166875
    },
    {
      "epoch": 32.04,
      "eval_arxiv_accuracy": 0.35075,
      "eval_arxiv_bleu_score": 4.3624678262222405,
      "eval_arxiv_bleu_score_sem": 0.12537337937640036,
      "eval_arxiv_emb_cos_sim": 0.7738143801689148,
      "eval_arxiv_emb_cos_sim_sem": 0.008658309407656458,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.365551233291626,
      "eval_arxiv_n_ngrams_match_1": 15.322,
      "eval_arxiv_n_ngrams_match_2": 2.996,
      "eval_arxiv_n_ngrams_match_3": 0.69,
      "eval_arxiv_num_pred_words": 40.72,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.949450802651754,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36366786647511495,
      "eval_arxiv_runtime": 28.815,
      "eval_arxiv_samples_per_second": 17.352,
      "eval_arxiv_steps_per_second": 0.035,
      "eval_arxiv_token_set_f1": 0.3565628150129613,
      "eval_arxiv_token_set_f1_sem": 0.004362686020897914,
      "eval_arxiv_token_set_precision": 0.30976948128980236,
      "eval_arxiv_token_set_recall": 0.43864209713035035,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 166875
    },
    {
      "epoch": 32.04,
      "eval_python_code_alpaca_accuracy": 0.1636875,
      "eval_python_code_alpaca_bleu_score": 4.95157793735963,
      "eval_python_code_alpaca_bleu_score_sem": 0.15942569667023843,
      "eval_python_code_alpaca_emb_cos_sim": 0.7661731243133545,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010553217887271586,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.812887668609619,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.052,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.18,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.166,
      "eval_python_code_alpaca_num_pred_words": 44.59,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.657951476923675,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.336732598944943,
      "eval_python_code_alpaca_runtime": 26.5719,
      "eval_python_code_alpaca_samples_per_second": 18.817,
      "eval_python_code_alpaca_steps_per_second": 0.038,
      "eval_python_code_alpaca_token_set_f1": 0.4829207570270869,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005589941155793038,
      "eval_python_code_alpaca_token_set_precision": 0.5525553475475216,
      "eval_python_code_alpaca_token_set_recall": 0.4492561548794006,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 166875
    },
    {
      "epoch": 32.04,
      "eval_wikibio_accuracy": 0.3248125,
      "eval_wikibio_bleu_score": 5.910099657655869,
      "eval_wikibio_bleu_score_sem": 0.208800568954724,
      "eval_wikibio_emb_cos_sim": 0.7427431344985962,
      "eval_wikibio_emb_cos_sim_sem": 0.009208849323607472,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7076754570007324,
      "eval_wikibio_n_ngrams_match_1": 10.084,
      "eval_wikibio_n_ngrams_match_2": 3.352,
      "eval_wikibio_n_ngrams_match_3": 1.194,
      "eval_wikibio_num_pred_words": 36.234,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.75895038833397,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3573101165393312,
      "eval_wikibio_runtime": 12.3768,
      "eval_wikibio_samples_per_second": 40.398,
      "eval_wikibio_steps_per_second": 0.081,
      "eval_wikibio_token_set_f1": 0.3206657889556288,
      "eval_wikibio_token_set_f1_sem": 0.0052431844915149205,
      "eval_wikibio_token_set_precision": 0.32849448153304733,
      "eval_wikibio_token_set_recall": 0.3282602416077145,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 166875
    },
    {
      "epoch": 32.04,
      "eval_nq_accuracy": 0.53203125,
      "eval_nq_bleu_score": 11.92770511794608,
      "eval_nq_bleu_score_sem": 0.49513546551515863,
      "eval_nq_emb_cos_sim": 0.8254275321960449,
      "eval_nq_emb_cos_sim_sem": 0.007631238124650545,
      "eval_nq_emb_top1_equal": 0.34375,
      "eval_nq_emb_top1_equal_sem": 0.04214578430296913,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.150421142578125,
      "eval_nq_n_ngrams_match_1": 23.146,
      "eval_nq_n_ngrams_match_2": 8.612,
      "eval_nq_n_ngrams_match_3": 3.982,
      "eval_nq_num_pred_words": 49.224,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.588474607992845,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.44980493575256,
      "eval_nq_runtime": 12.3261,
      "eval_nq_samples_per_second": 40.564,
      "eval_nq_steps_per_second": 0.081,
      "eval_nq_token_set_f1": 0.46537305773421,
      "eval_nq_token_set_f1_sem": 0.005133129392622436,
      "eval_nq_token_set_precision": 0.4241983766233702,
      "eval_nq_token_set_recall": 0.5235785693292351,
      "eval_nq_true_num_tokens": 64.0,
      "step": 166875
    },
    {
      "epoch": 32.04,
      "learning_rate": 0.001,
      "loss": 2.5168,
      "step": 166884
    },
    {
      "epoch": 32.05,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 166896
    },
    {
      "epoch": 32.05,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 166908
    },
    {
      "epoch": 32.05,
      "learning_rate": 0.001,
      "loss": 2.5206,
      "step": 166920
    },
    {
      "epoch": 32.05,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 166932
    },
    {
      "epoch": 32.06,
      "learning_rate": 0.001,
      "loss": 2.5213,
      "step": 166944
    },
    {
      "epoch": 32.06,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 166956
    },
    {
      "epoch": 32.06,
      "learning_rate": 0.001,
      "loss": 2.534,
      "step": 166968
    },
    {
      "epoch": 32.06,
      "learning_rate": 0.001,
      "loss": 2.5189,
      "step": 166980
    },
    {
      "epoch": 32.06,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 166992
    },
    {
      "epoch": 32.07,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 167004
    },
    {
      "epoch": 32.07,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 167016
    },
    {
      "epoch": 32.07,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 167028
    },
    {
      "epoch": 32.07,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 167040
    },
    {
      "epoch": 32.08,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 167052
    },
    {
      "epoch": 32.08,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 167064
    },
    {
      "epoch": 32.08,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 167076
    },
    {
      "epoch": 32.08,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 167088
    },
    {
      "epoch": 32.09,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 167100
    },
    {
      "epoch": 32.09,
      "learning_rate": 0.001,
      "loss": 2.517,
      "step": 167112
    },
    {
      "epoch": 32.09,
      "learning_rate": 0.001,
      "loss": 2.5168,
      "step": 167124
    },
    {
      "epoch": 32.09,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 167136
    },
    {
      "epoch": 32.09,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 167148
    },
    {
      "epoch": 32.1,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 167160
    },
    {
      "epoch": 32.1,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 167172
    },
    {
      "epoch": 32.1,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 167184
    },
    {
      "epoch": 32.1,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 167196
    },
    {
      "epoch": 32.11,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 167208
    },
    {
      "epoch": 32.11,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 167220
    },
    {
      "epoch": 32.11,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 167232
    },
    {
      "epoch": 32.11,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 167244
    },
    {
      "epoch": 32.12,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 167256
    },
    {
      "epoch": 32.12,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 167268
    },
    {
      "epoch": 32.12,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 167280
    },
    {
      "epoch": 32.12,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 167292
    },
    {
      "epoch": 32.12,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 167304
    },
    {
      "epoch": 32.13,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 167316
    },
    {
      "epoch": 32.13,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 167328
    },
    {
      "epoch": 32.13,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 167340
    },
    {
      "epoch": 32.13,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 167352
    },
    {
      "epoch": 32.14,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 167364
    },
    {
      "epoch": 32.14,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 167376
    },
    {
      "epoch": 32.14,
      "learning_rate": 0.001,
      "loss": 2.525,
      "step": 167388
    },
    {
      "epoch": 32.14,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 167400
    },
    {
      "epoch": 32.15,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 167412
    },
    {
      "epoch": 32.15,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 167424
    },
    {
      "epoch": 32.15,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 167436
    },
    {
      "epoch": 32.15,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 167448
    },
    {
      "epoch": 32.15,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 167460
    },
    {
      "epoch": 32.16,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 167472
    },
    {
      "epoch": 32.16,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 167484
    },
    {
      "epoch": 32.16,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 167496
    },
    {
      "epoch": 32.16,
      "eval_ag_news_accuracy": 0.326625,
      "eval_ag_news_bleu_score": 4.92835322917289,
      "eval_ag_news_bleu_score_sem": 0.1621984539919837,
      "eval_ag_news_emb_cos_sim": 0.80971759557724,
      "eval_ag_news_emb_cos_sim_sem": 0.006937972516028945,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5029375553131104,
      "eval_ag_news_n_ngrams_match_1": 14.248,
      "eval_ag_news_n_ngrams_match_2": 3.236,
      "eval_ag_news_n_ngrams_match_3": 0.942,
      "eval_ag_news_num_pred_words": 46.3,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.21287345099474,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35588709158423726,
      "eval_ag_news_runtime": 10.418,
      "eval_ag_news_samples_per_second": 47.994,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3550767604880276,
      "eval_ag_news_token_set_f1_sem": 0.004443144657969766,
      "eval_ag_news_token_set_precision": 0.3416093170153735,
      "eval_ag_news_token_set_recall": 0.3849158723004794,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 167500
    },
    {
      "epoch": 32.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.11590625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1738114586671893,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12247148216827992,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6785756349563599,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008992992263325403,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.220651388168335,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.14,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.118,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.04442851365031,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21202512794835326,
      "eval_anthropic_toxic_prompts_runtime": 20.5751,
      "eval_anthropic_toxic_prompts_samples_per_second": 24.301,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.049,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3519881583116291,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006428653429464225,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43315367549246775,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3211777126298439,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 167500
    },
    {
      "epoch": 32.16,
      "eval_arxiv_accuracy": 0.35021875,
      "eval_arxiv_bleu_score": 4.461599688737477,
      "eval_arxiv_bleu_score_sem": 0.11946554382757948,
      "eval_arxiv_emb_cos_sim": 0.7828381061553955,
      "eval_arxiv_emb_cos_sim_sem": 0.005931621677422201,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3561220169067383,
      "eval_arxiv_n_ngrams_match_1": 15.302,
      "eval_arxiv_n_ngrams_match_2": 3.006,
      "eval_arxiv_n_ngrams_match_3": 0.668,
      "eval_arxiv_num_pred_words": 40.338,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.677763080947802,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3712645859172659,
      "eval_arxiv_runtime": 16.8477,
      "eval_arxiv_samples_per_second": 29.678,
      "eval_arxiv_steps_per_second": 0.059,
      "eval_arxiv_token_set_f1": 0.3614134705511333,
      "eval_arxiv_token_set_f1_sem": 0.0038228164116966726,
      "eval_arxiv_token_set_precision": 0.31328299181723923,
      "eval_arxiv_token_set_recall": 0.4412616122048446,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 167500
    },
    {
      "epoch": 32.16,
      "eval_python_code_alpaca_accuracy": 0.16278125,
      "eval_python_code_alpaca_bleu_score": 4.8720891485655375,
      "eval_python_code_alpaca_bleu_score_sem": 0.15028522067517527,
      "eval_python_code_alpaca_emb_cos_sim": 0.7576694488525391,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007696727741791395,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8684589862823486,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.914,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.99,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.99,
      "eval_python_code_alpaca_num_pred_words": 41.864,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.60986024361883,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34480892010778996,
      "eval_python_code_alpaca_runtime": 15.1031,
      "eval_python_code_alpaca_samples_per_second": 33.106,
      "eval_python_code_alpaca_steps_per_second": 0.066,
      "eval_python_code_alpaca_token_set_f1": 0.4873847453916951,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005521386656550456,
      "eval_python_code_alpaca_token_set_precision": 0.5426401735832815,
      "eval_python_code_alpaca_token_set_recall": 0.4629082375473959,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 167500
    },
    {
      "epoch": 32.16,
      "eval_wikibio_accuracy": 0.32675,
      "eval_wikibio_bleu_score": 6.256609904913149,
      "eval_wikibio_bleu_score_sem": 0.21839463405041792,
      "eval_wikibio_emb_cos_sim": 0.7512813806533813,
      "eval_wikibio_emb_cos_sim_sem": 0.008049940722825959,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.665400505065918,
      "eval_wikibio_n_ngrams_match_1": 10.47,
      "eval_wikibio_n_ngrams_match_2": 3.518,
      "eval_wikibio_n_ngrams_match_3": 1.284,
      "eval_wikibio_num_pred_words": 36.594,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.07178147629232,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3721713376157594,
      "eval_wikibio_runtime": 12.7941,
      "eval_wikibio_samples_per_second": 39.08,
      "eval_wikibio_steps_per_second": 0.078,
      "eval_wikibio_token_set_f1": 0.33015638171528733,
      "eval_wikibio_token_set_f1_sem": 0.005078328166357356,
      "eval_wikibio_token_set_precision": 0.3405814091357953,
      "eval_wikibio_token_set_recall": 0.33360621096323245,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 167500
    },
    {
      "epoch": 32.16,
      "eval_nq_accuracy": 0.53203125,
      "eval_nq_bleu_score": 12.12346825477318,
      "eval_nq_bleu_score_sem": 0.4960565632802446,
      "eval_nq_emb_cos_sim": 0.8391648530960083,
      "eval_nq_emb_cos_sim_sem": 0.006984325488312325,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.147446393966675,
      "eval_nq_n_ngrams_match_1": 23.406,
      "eval_nq_n_ngrams_match_2": 8.64,
      "eval_nq_n_ngrams_match_3": 4.046,
      "eval_nq_num_pred_words": 48.85,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.562964017686994,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45469851661095806,
      "eval_nq_runtime": 10.427,
      "eval_nq_samples_per_second": 47.952,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4684891119014707,
      "eval_nq_token_set_f1_sem": 0.004811217973134483,
      "eval_nq_token_set_precision": 0.425919329426332,
      "eval_nq_token_set_recall": 0.5278613786993426,
      "eval_nq_true_num_tokens": 64.0,
      "step": 167500
    },
    {
      "epoch": 32.16,
      "learning_rate": 0.001,
      "loss": 2.5214,
      "step": 167508
    },
    {
      "epoch": 32.17,
      "learning_rate": 0.001,
      "loss": 2.5172,
      "step": 167520
    },
    {
      "epoch": 32.17,
      "learning_rate": 0.001,
      "loss": 2.5193,
      "step": 167532
    },
    {
      "epoch": 32.17,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 167544
    },
    {
      "epoch": 32.17,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 167556
    },
    {
      "epoch": 32.18,
      "learning_rate": 0.001,
      "loss": 2.5224,
      "step": 167568
    },
    {
      "epoch": 32.18,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 167580
    },
    {
      "epoch": 32.18,
      "learning_rate": 0.001,
      "loss": 2.5305,
      "step": 167592
    },
    {
      "epoch": 32.18,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 167604
    },
    {
      "epoch": 32.18,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 167616
    },
    {
      "epoch": 32.19,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 167628
    },
    {
      "epoch": 32.19,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 167640
    },
    {
      "epoch": 32.19,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 167652
    },
    {
      "epoch": 32.19,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 167664
    },
    {
      "epoch": 32.2,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 167676
    },
    {
      "epoch": 32.2,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 167688
    },
    {
      "epoch": 32.2,
      "learning_rate": 0.001,
      "loss": 2.5202,
      "step": 167700
    },
    {
      "epoch": 32.2,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 167712
    },
    {
      "epoch": 32.21,
      "learning_rate": 0.001,
      "loss": 2.523,
      "step": 167724
    },
    {
      "epoch": 32.21,
      "learning_rate": 0.001,
      "loss": 2.5274,
      "step": 167736
    },
    {
      "epoch": 32.21,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 167748
    },
    {
      "epoch": 32.21,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 167760
    },
    {
      "epoch": 32.21,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 167772
    },
    {
      "epoch": 32.22,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 167784
    },
    {
      "epoch": 32.22,
      "learning_rate": 0.001,
      "loss": 2.5282,
      "step": 167796
    },
    {
      "epoch": 32.22,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 167808
    },
    {
      "epoch": 32.22,
      "learning_rate": 0.001,
      "loss": 2.5202,
      "step": 167820
    },
    {
      "epoch": 32.23,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 167832
    },
    {
      "epoch": 32.23,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 167844
    },
    {
      "epoch": 32.23,
      "learning_rate": 0.001,
      "loss": 2.5201,
      "step": 167856
    },
    {
      "epoch": 32.23,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 167868
    },
    {
      "epoch": 32.24,
      "learning_rate": 0.001,
      "loss": 2.5246,
      "step": 167880
    },
    {
      "epoch": 32.24,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 167892
    },
    {
      "epoch": 32.24,
      "learning_rate": 0.001,
      "loss": 2.517,
      "step": 167904
    },
    {
      "epoch": 32.24,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 167916
    },
    {
      "epoch": 32.24,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 167928
    },
    {
      "epoch": 32.25,
      "learning_rate": 0.001,
      "loss": 2.523,
      "step": 167940
    },
    {
      "epoch": 32.25,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 167952
    },
    {
      "epoch": 32.25,
      "learning_rate": 0.001,
      "loss": 2.5134,
      "step": 167964
    },
    {
      "epoch": 32.25,
      "learning_rate": 0.001,
      "loss": 2.5227,
      "step": 167976
    },
    {
      "epoch": 32.26,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 167988
    },
    {
      "epoch": 32.26,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 168000
    },
    {
      "epoch": 32.26,
      "learning_rate": 0.001,
      "loss": 2.5153,
      "step": 168012
    },
    {
      "epoch": 32.26,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 168024
    },
    {
      "epoch": 32.26,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 168036
    },
    {
      "epoch": 32.27,
      "learning_rate": 0.001,
      "loss": 2.5209,
      "step": 168048
    },
    {
      "epoch": 32.27,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 168060
    },
    {
      "epoch": 32.27,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 168072
    },
    {
      "epoch": 32.27,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 168084
    },
    {
      "epoch": 32.28,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 168096
    },
    {
      "epoch": 32.28,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 168108
    },
    {
      "epoch": 32.28,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 168120
    },
    {
      "epoch": 32.28,
      "eval_ag_news_accuracy": 0.32609375,
      "eval_ag_news_bleu_score": 5.0874406981036895,
      "eval_ag_news_bleu_score_sem": 0.15433562718436986,
      "eval_ag_news_emb_cos_sim": 0.8175662755966187,
      "eval_ag_news_emb_cos_sim_sem": 0.007330140380344211,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5043346881866455,
      "eval_ag_news_n_ngrams_match_1": 14.428,
      "eval_ag_news_n_ngrams_match_2": 3.296,
      "eval_ag_news_n_ngrams_match_3": 0.978,
      "eval_ag_news_num_pred_words": 46.612,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.25930867885597,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3561177109035263,
      "eval_ag_news_runtime": 11.7183,
      "eval_ag_news_samples_per_second": 42.668,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.3555390501855418,
      "eval_ag_news_token_set_f1_sem": 0.004503538327020847,
      "eval_ag_news_token_set_precision": 0.3405395321643624,
      "eval_ag_news_token_set_recall": 0.385143822277539,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 168125
    },
    {
      "epoch": 32.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.1149375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2468605569546534,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11993232135348983,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6756578683853149,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008404350279231526,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2471721172332764,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.342,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.66,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.71751088391135,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21966116414130563,
      "eval_anthropic_toxic_prompts_runtime": 12.076,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.404,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.083,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3659020303469726,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006501680400489734,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44587466210498783,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.333862705076242,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 168125
    },
    {
      "epoch": 32.28,
      "eval_arxiv_accuracy": 0.35103125,
      "eval_arxiv_bleu_score": 4.466254601991632,
      "eval_arxiv_bleu_score_sem": 0.11707830527157961,
      "eval_arxiv_emb_cos_sim": 0.7804619073867798,
      "eval_arxiv_emb_cos_sim_sem": 0.006360185321060147,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3633198738098145,
      "eval_arxiv_n_ngrams_match_1": 15.728,
      "eval_arxiv_n_ngrams_match_2": 3.062,
      "eval_arxiv_n_ngrams_match_3": 0.682,
      "eval_arxiv_num_pred_words": 41.0,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.884926186688986,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3758620737668217,
      "eval_arxiv_runtime": 14.2801,
      "eval_arxiv_samples_per_second": 35.014,
      "eval_arxiv_steps_per_second": 0.07,
      "eval_arxiv_token_set_f1": 0.36698154951932493,
      "eval_arxiv_token_set_f1_sem": 0.0039975974750848025,
      "eval_arxiv_token_set_precision": 0.31998378835337576,
      "eval_arxiv_token_set_recall": 0.4445848631182827,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 168125
    },
    {
      "epoch": 32.28,
      "eval_python_code_alpaca_accuracy": 0.16246875,
      "eval_python_code_alpaca_bleu_score": 4.890429466066363,
      "eval_python_code_alpaca_bleu_score_sem": 0.15518511776298086,
      "eval_python_code_alpaca_emb_cos_sim": 0.7621194124221802,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009740857797103852,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.865048885345459,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.096,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.076,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.072,
      "eval_python_code_alpaca_num_pred_words": 43.386,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.54991111708448,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34246083271542194,
      "eval_python_code_alpaca_runtime": 12.7255,
      "eval_python_code_alpaca_samples_per_second": 39.291,
      "eval_python_code_alpaca_steps_per_second": 0.079,
      "eval_python_code_alpaca_token_set_f1": 0.49057156671525015,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005661150965871911,
      "eval_python_code_alpaca_token_set_precision": 0.5514028807233765,
      "eval_python_code_alpaca_token_set_recall": 0.4654982012385866,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 168125
    },
    {
      "epoch": 32.28,
      "eval_wikibio_accuracy": 0.3263125,
      "eval_wikibio_bleu_score": 6.191199690493633,
      "eval_wikibio_bleu_score_sem": 0.2117647776192703,
      "eval_wikibio_emb_cos_sim": 0.7437809109687805,
      "eval_wikibio_emb_cos_sim_sem": 0.009498421221935094,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.688481330871582,
      "eval_wikibio_n_ngrams_match_1": 10.192,
      "eval_wikibio_n_ngrams_match_2": 3.484,
      "eval_wikibio_n_ngrams_match_3": 1.33,
      "eval_wikibio_num_pred_words": 36.148,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.984078239927506,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3578335979464245,
      "eval_wikibio_runtime": 14.4519,
      "eval_wikibio_samples_per_second": 34.597,
      "eval_wikibio_steps_per_second": 0.069,
      "eval_wikibio_token_set_f1": 0.3218182986244908,
      "eval_wikibio_token_set_f1_sem": 0.005390369220073895,
      "eval_wikibio_token_set_precision": 0.33068107982948086,
      "eval_wikibio_token_set_recall": 0.32825675510883745,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 168125
    },
    {
      "epoch": 32.28,
      "eval_nq_accuracy": 0.53190625,
      "eval_nq_bleu_score": 12.000788674688687,
      "eval_nq_bleu_score_sem": 0.4910340221504707,
      "eval_nq_emb_cos_sim": 0.8362958431243896,
      "eval_nq_emb_cos_sim_sem": 0.007697480458164019,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.150165319442749,
      "eval_nq_n_ngrams_match_1": 23.484,
      "eval_nq_n_ngrams_match_2": 8.692,
      "eval_nq_n_ngrams_match_3": 3.99,
      "eval_nq_num_pred_words": 49.068,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.586277758504973,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45508165097849884,
      "eval_nq_runtime": 15.1578,
      "eval_nq_samples_per_second": 32.986,
      "eval_nq_steps_per_second": 0.066,
      "eval_nq_token_set_f1": 0.4681998475056242,
      "eval_nq_token_set_f1_sem": 0.004910346806228855,
      "eval_nq_token_set_precision": 0.426228832282288,
      "eval_nq_token_set_recall": 0.527553492732729,
      "eval_nq_true_num_tokens": 64.0,
      "step": 168125
    },
    {
      "epoch": 32.28,
      "learning_rate": 0.001,
      "loss": 2.5346,
      "step": 168132
    },
    {
      "epoch": 32.29,
      "learning_rate": 0.001,
      "loss": 2.5193,
      "step": 168144
    },
    {
      "epoch": 32.29,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 168156
    },
    {
      "epoch": 32.29,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 168168
    },
    {
      "epoch": 32.29,
      "learning_rate": 0.001,
      "loss": 2.5293,
      "step": 168180
    },
    {
      "epoch": 32.29,
      "learning_rate": 0.001,
      "loss": 2.5265,
      "step": 168192
    },
    {
      "epoch": 32.3,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 168204
    },
    {
      "epoch": 32.3,
      "learning_rate": 0.001,
      "loss": 2.517,
      "step": 168216
    },
    {
      "epoch": 32.3,
      "learning_rate": 0.001,
      "loss": 2.5182,
      "step": 168228
    },
    {
      "epoch": 32.3,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 168240
    },
    {
      "epoch": 32.31,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 168252
    },
    {
      "epoch": 32.31,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 168264
    },
    {
      "epoch": 32.31,
      "learning_rate": 0.001,
      "loss": 2.5227,
      "step": 168276
    },
    {
      "epoch": 32.31,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 168288
    },
    {
      "epoch": 32.32,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 168300
    },
    {
      "epoch": 32.32,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 168312
    },
    {
      "epoch": 32.32,
      "learning_rate": 0.001,
      "loss": 2.5239,
      "step": 168324
    },
    {
      "epoch": 32.32,
      "learning_rate": 0.001,
      "loss": 2.5248,
      "step": 168336
    },
    {
      "epoch": 32.32,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 168348
    },
    {
      "epoch": 32.33,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 168360
    },
    {
      "epoch": 32.33,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 168372
    },
    {
      "epoch": 32.33,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 168384
    },
    {
      "epoch": 32.33,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 168396
    },
    {
      "epoch": 32.34,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 168408
    },
    {
      "epoch": 32.34,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 168420
    },
    {
      "epoch": 32.34,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 168432
    },
    {
      "epoch": 32.34,
      "learning_rate": 0.001,
      "loss": 2.5189,
      "step": 168444
    },
    {
      "epoch": 32.35,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 168456
    },
    {
      "epoch": 32.35,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 168468
    },
    {
      "epoch": 32.35,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 168480
    },
    {
      "epoch": 32.35,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 168492
    },
    {
      "epoch": 32.35,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 168504
    },
    {
      "epoch": 32.36,
      "learning_rate": 0.001,
      "loss": 2.5231,
      "step": 168516
    },
    {
      "epoch": 32.36,
      "learning_rate": 0.001,
      "loss": 2.5231,
      "step": 168528
    },
    {
      "epoch": 32.36,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 168540
    },
    {
      "epoch": 32.36,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 168552
    },
    {
      "epoch": 32.37,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 168564
    },
    {
      "epoch": 32.37,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 168576
    },
    {
      "epoch": 32.37,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 168588
    },
    {
      "epoch": 32.37,
      "learning_rate": 0.001,
      "loss": 2.5212,
      "step": 168600
    },
    {
      "epoch": 32.38,
      "learning_rate": 0.001,
      "loss": 2.5272,
      "step": 168612
    },
    {
      "epoch": 32.38,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 168624
    },
    {
      "epoch": 32.38,
      "learning_rate": 0.001,
      "loss": 2.5171,
      "step": 168636
    },
    {
      "epoch": 32.38,
      "learning_rate": 0.001,
      "loss": 2.525,
      "step": 168648
    },
    {
      "epoch": 32.38,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 168660
    },
    {
      "epoch": 32.39,
      "learning_rate": 0.001,
      "loss": 2.5224,
      "step": 168672
    },
    {
      "epoch": 32.39,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 168684
    },
    {
      "epoch": 32.39,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 168696
    },
    {
      "epoch": 32.39,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 168708
    },
    {
      "epoch": 32.4,
      "learning_rate": 0.001,
      "loss": 2.521,
      "step": 168720
    },
    {
      "epoch": 32.4,
      "learning_rate": 0.001,
      "loss": 2.5229,
      "step": 168732
    },
    {
      "epoch": 32.4,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 168744
    },
    {
      "epoch": 32.4,
      "eval_ag_news_accuracy": 0.32546875,
      "eval_ag_news_bleu_score": 4.869080691834786,
      "eval_ag_news_bleu_score_sem": 0.15179364946824286,
      "eval_ag_news_emb_cos_sim": 0.8161188364028931,
      "eval_ag_news_emb_cos_sim_sem": 0.007580130461019531,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5068631172180176,
      "eval_ag_news_n_ngrams_match_1": 14.282,
      "eval_ag_news_n_ngrams_match_2": 3.158,
      "eval_ag_news_n_ngrams_match_3": 0.868,
      "eval_ag_news_num_pred_words": 46.71,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.34350888274561,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35231991460636314,
      "eval_ag_news_runtime": 12.5948,
      "eval_ag_news_samples_per_second": 39.699,
      "eval_ag_news_steps_per_second": 0.079,
      "eval_ag_news_token_set_f1": 0.3534651767529745,
      "eval_ag_news_token_set_f1_sem": 0.004549650043853658,
      "eval_ag_news_token_set_precision": 0.3410032798945537,
      "eval_ag_news_token_set_recall": 0.3826163708016629,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 168750
    },
    {
      "epoch": 32.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.1155625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1825666688922984,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1179177905676714,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6738834381103516,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00948207351306893,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2254481315612793,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.284,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.978,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.762,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.804,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.164848792348117,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21607119492143562,
      "eval_anthropic_toxic_prompts_runtime": 13.604,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.754,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.074,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.356748217188059,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006625345638714375,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4422180595472049,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3252196084496033,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 168750
    },
    {
      "epoch": 32.4,
      "eval_arxiv_accuracy": 0.349625,
      "eval_arxiv_bleu_score": 4.421865682590429,
      "eval_arxiv_bleu_score_sem": 0.12826819702096595,
      "eval_arxiv_emb_cos_sim": 0.7698716521263123,
      "eval_arxiv_emb_cos_sim_sem": 0.007192445309170104,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.365234613418579,
      "eval_arxiv_n_ngrams_match_1": 15.262,
      "eval_arxiv_n_ngrams_match_2": 2.952,
      "eval_arxiv_n_ngrams_match_3": 0.678,
      "eval_arxiv_num_pred_words": 40.434,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.94028628212506,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3666530889013224,
      "eval_arxiv_runtime": 12.2859,
      "eval_arxiv_samples_per_second": 40.697,
      "eval_arxiv_steps_per_second": 0.081,
      "eval_arxiv_token_set_f1": 0.358644789730894,
      "eval_arxiv_token_set_f1_sem": 0.004076908121318583,
      "eval_arxiv_token_set_precision": 0.3105454819226174,
      "eval_arxiv_token_set_recall": 0.4424866302835179,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 168750
    },
    {
      "epoch": 32.4,
      "eval_python_code_alpaca_accuracy": 0.16390625,
      "eval_python_code_alpaca_bleu_score": 4.9199026786020195,
      "eval_python_code_alpaca_bleu_score_sem": 0.15989313604744668,
      "eval_python_code_alpaca_emb_cos_sim": 0.7597689628601074,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007768276626950193,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.850010395050049,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.972,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.01,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.096,
      "eval_python_code_alpaca_num_pred_words": 43.196,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.28796154885914,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3386660346644988,
      "eval_python_code_alpaca_runtime": 12.7231,
      "eval_python_code_alpaca_samples_per_second": 39.298,
      "eval_python_code_alpaca_steps_per_second": 0.079,
      "eval_python_code_alpaca_token_set_f1": 0.4828110185435105,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00540864122200843,
      "eval_python_code_alpaca_token_set_precision": 0.5437709506675122,
      "eval_python_code_alpaca_token_set_recall": 0.4564491308932614,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 168750
    },
    {
      "epoch": 32.4,
      "eval_wikibio_accuracy": 0.3256875,
      "eval_wikibio_bleu_score": 6.000436389695106,
      "eval_wikibio_bleu_score_sem": 0.2145718380730183,
      "eval_wikibio_emb_cos_sim": 0.7437461614608765,
      "eval_wikibio_emb_cos_sim_sem": 0.008876078881189707,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6984331607818604,
      "eval_wikibio_n_ngrams_match_1": 10.09,
      "eval_wikibio_n_ngrams_match_2": 3.39,
      "eval_wikibio_n_ngrams_match_3": 1.228,
      "eval_wikibio_num_pred_words": 36.44,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.3839795601731,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3592824132988366,
      "eval_wikibio_runtime": 12.7274,
      "eval_wikibio_samples_per_second": 39.285,
      "eval_wikibio_steps_per_second": 0.079,
      "eval_wikibio_token_set_f1": 0.3206266138727259,
      "eval_wikibio_token_set_f1_sem": 0.005494052811641807,
      "eval_wikibio_token_set_precision": 0.328254799149372,
      "eval_wikibio_token_set_recall": 0.3300923242071554,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 168750
    },
    {
      "epoch": 32.4,
      "eval_nq_accuracy": 0.532125,
      "eval_nq_bleu_score": 11.94649739394824,
      "eval_nq_bleu_score_sem": 0.499829132336281,
      "eval_nq_emb_cos_sim": 0.833651065826416,
      "eval_nq_emb_cos_sim_sem": 0.007120498004583976,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.151710271835327,
      "eval_nq_n_ngrams_match_1": 23.33,
      "eval_nq_n_ngrams_match_2": 8.544,
      "eval_nq_n_ngrams_match_3": 3.994,
      "eval_nq_num_pred_words": 49.332,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.599553401348793,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4496376778436102,
      "eval_nq_runtime": 12.6782,
      "eval_nq_samples_per_second": 39.438,
      "eval_nq_steps_per_second": 0.079,
      "eval_nq_token_set_f1": 0.4658140135404253,
      "eval_nq_token_set_f1_sem": 0.004974203972118167,
      "eval_nq_token_set_precision": 0.4252537495430019,
      "eval_nq_token_set_recall": 0.5216959949925327,
      "eval_nq_true_num_tokens": 64.0,
      "step": 168750
    },
    {
      "epoch": 32.4,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 168756
    },
    {
      "epoch": 32.41,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 168768
    },
    {
      "epoch": 32.41,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 168780
    },
    {
      "epoch": 32.41,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 168792
    },
    {
      "epoch": 32.41,
      "learning_rate": 0.001,
      "loss": 2.5319,
      "step": 168804
    },
    {
      "epoch": 32.41,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 168816
    },
    {
      "epoch": 32.42,
      "learning_rate": 0.001,
      "loss": 2.5146,
      "step": 168828
    },
    {
      "epoch": 32.42,
      "learning_rate": 0.001,
      "loss": 2.5214,
      "step": 168840
    },
    {
      "epoch": 32.42,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 168852
    },
    {
      "epoch": 32.42,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 168864
    },
    {
      "epoch": 32.43,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 168876
    },
    {
      "epoch": 32.43,
      "learning_rate": 0.001,
      "loss": 2.5196,
      "step": 168888
    },
    {
      "epoch": 32.43,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 168900
    },
    {
      "epoch": 32.43,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 168912
    },
    {
      "epoch": 32.44,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 168924
    },
    {
      "epoch": 32.44,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 168936
    },
    {
      "epoch": 32.44,
      "learning_rate": 0.001,
      "loss": 2.517,
      "step": 168948
    },
    {
      "epoch": 32.44,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 168960
    },
    {
      "epoch": 32.44,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 168972
    },
    {
      "epoch": 32.45,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 168984
    },
    {
      "epoch": 32.45,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 168996
    },
    {
      "epoch": 32.45,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 169008
    },
    {
      "epoch": 32.45,
      "learning_rate": 0.001,
      "loss": 2.5224,
      "step": 169020
    },
    {
      "epoch": 32.46,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 169032
    },
    {
      "epoch": 32.46,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 169044
    },
    {
      "epoch": 32.46,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 169056
    },
    {
      "epoch": 32.46,
      "learning_rate": 0.001,
      "loss": 2.5202,
      "step": 169068
    },
    {
      "epoch": 32.47,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 169080
    },
    {
      "epoch": 32.47,
      "learning_rate": 0.001,
      "loss": 2.5134,
      "step": 169092
    },
    {
      "epoch": 32.47,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 169104
    },
    {
      "epoch": 32.47,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 169116
    },
    {
      "epoch": 32.47,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 169128
    },
    {
      "epoch": 32.48,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 169140
    },
    {
      "epoch": 32.48,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 169152
    },
    {
      "epoch": 32.48,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 169164
    },
    {
      "epoch": 32.48,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 169176
    },
    {
      "epoch": 32.49,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 169188
    },
    {
      "epoch": 32.49,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 169200
    },
    {
      "epoch": 32.49,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 169212
    },
    {
      "epoch": 32.49,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 169224
    },
    {
      "epoch": 32.5,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 169236
    },
    {
      "epoch": 32.5,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 169248
    },
    {
      "epoch": 32.5,
      "learning_rate": 0.001,
      "loss": 2.5297,
      "step": 169260
    },
    {
      "epoch": 32.5,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 169272
    },
    {
      "epoch": 32.5,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 169284
    },
    {
      "epoch": 32.51,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 169296
    },
    {
      "epoch": 32.51,
      "learning_rate": 0.001,
      "loss": 2.526,
      "step": 169308
    },
    {
      "epoch": 32.51,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 169320
    },
    {
      "epoch": 32.51,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 169332
    },
    {
      "epoch": 32.52,
      "learning_rate": 0.001,
      "loss": 2.5345,
      "step": 169344
    },
    {
      "epoch": 32.52,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 169356
    },
    {
      "epoch": 32.52,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 169368
    },
    {
      "epoch": 32.52,
      "eval_ag_news_accuracy": 0.3273125,
      "eval_ag_news_bleu_score": 4.726959521017791,
      "eval_ag_news_bleu_score_sem": 0.1453776583978361,
      "eval_ag_news_emb_cos_sim": 0.8186322450637817,
      "eval_ag_news_emb_cos_sim_sem": 0.007100898402937394,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.503650426864624,
      "eval_ag_news_n_ngrams_match_1": 14.148,
      "eval_ag_news_n_ngrams_match_2": 3.118,
      "eval_ag_news_n_ngrams_match_3": 0.864,
      "eval_ag_news_num_pred_words": 46.46,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.23655840478383,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3536052680982871,
      "eval_ag_news_runtime": 12.6821,
      "eval_ag_news_samples_per_second": 39.426,
      "eval_ag_news_steps_per_second": 0.079,
      "eval_ag_news_token_set_f1": 0.3508410630728675,
      "eval_ag_news_token_set_f1_sem": 0.004530332420736515,
      "eval_ag_news_token_set_precision": 0.3384757243196865,
      "eval_ag_news_token_set_recall": 0.37662257935250915,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 169375
    },
    {
      "epoch": 32.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.11609375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.227361249340591,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11751250371524187,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6760736703872681,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009089850331014995,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2216336727142334,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.988,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.06904135516801,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22063724410074512,
      "eval_anthropic_toxic_prompts_runtime": 11.9857,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.716,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.083,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3605956493953031,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006794638074546885,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44545998227792455,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3250501627613176,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 169375
    },
    {
      "epoch": 32.52,
      "eval_arxiv_accuracy": 0.3489375,
      "eval_arxiv_bleu_score": 4.4262191725177,
      "eval_arxiv_bleu_score_sem": 0.11920417950576172,
      "eval_arxiv_emb_cos_sim": 0.7817623615264893,
      "eval_arxiv_emb_cos_sim_sem": 0.007898044139337205,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.36584734916687,
      "eval_arxiv_n_ngrams_match_1": 15.45,
      "eval_arxiv_n_ngrams_match_2": 2.974,
      "eval_arxiv_n_ngrams_match_3": 0.68,
      "eval_arxiv_num_pred_words": 40.758,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.95802446394998,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3712272869795471,
      "eval_arxiv_runtime": 13.8651,
      "eval_arxiv_samples_per_second": 36.062,
      "eval_arxiv_steps_per_second": 0.072,
      "eval_arxiv_token_set_f1": 0.3618575120562036,
      "eval_arxiv_token_set_f1_sem": 0.004199419017338341,
      "eval_arxiv_token_set_precision": 0.31694719177648856,
      "eval_arxiv_token_set_recall": 0.4362523814849827,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 169375
    },
    {
      "epoch": 32.52,
      "eval_python_code_alpaca_accuracy": 0.16265625,
      "eval_python_code_alpaca_bleu_score": 4.795288258352828,
      "eval_python_code_alpaca_bleu_score_sem": 0.14700565146763137,
      "eval_python_code_alpaca_emb_cos_sim": 0.773827075958252,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007188469017430904,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8409552574157715,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.008,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.072,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.018,
      "eval_python_code_alpaca_num_pred_words": 43.362,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.132123310800242,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34068054770610645,
      "eval_python_code_alpaca_runtime": 11.7446,
      "eval_python_code_alpaca_samples_per_second": 42.573,
      "eval_python_code_alpaca_steps_per_second": 0.085,
      "eval_python_code_alpaca_token_set_f1": 0.4858328405216677,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0055085417523748365,
      "eval_python_code_alpaca_token_set_precision": 0.5473751159039163,
      "eval_python_code_alpaca_token_set_recall": 0.4575941964242227,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 169375
    },
    {
      "epoch": 32.52,
      "eval_wikibio_accuracy": 0.325,
      "eval_wikibio_bleu_score": 6.378139245886941,
      "eval_wikibio_bleu_score_sem": 0.2235415734227433,
      "eval_wikibio_emb_cos_sim": 0.7461855411529541,
      "eval_wikibio_emb_cos_sim_sem": 0.009127345879666007,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6928439140319824,
      "eval_wikibio_n_ngrams_match_1": 10.584,
      "eval_wikibio_n_ngrams_match_2": 3.638,
      "eval_wikibio_n_ngrams_match_3": 1.388,
      "eval_wikibio_num_pred_words": 37.274,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.1588931513773,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3694077329563741,
      "eval_wikibio_runtime": 14.0317,
      "eval_wikibio_samples_per_second": 35.634,
      "eval_wikibio_steps_per_second": 0.071,
      "eval_wikibio_token_set_f1": 0.33088383938587673,
      "eval_wikibio_token_set_f1_sem": 0.0052162764037644105,
      "eval_wikibio_token_set_precision": 0.3439744254658963,
      "eval_wikibio_token_set_recall": 0.33292363039390155,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 169375
    },
    {
      "epoch": 32.52,
      "eval_nq_accuracy": 0.5314375,
      "eval_nq_bleu_score": 12.025232572179775,
      "eval_nq_bleu_score_sem": 0.4855058227386262,
      "eval_nq_emb_cos_sim": 0.8393033742904663,
      "eval_nq_emb_cos_sim_sem": 0.006955296846400447,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.149290084838867,
      "eval_nq_n_ngrams_match_1": 23.456,
      "eval_nq_n_ngrams_match_2": 8.678,
      "eval_nq_n_ngrams_match_3": 4.002,
      "eval_nq_num_pred_words": 49.062,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.578766038830206,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4544787891648732,
      "eval_nq_runtime": 12.8276,
      "eval_nq_samples_per_second": 38.978,
      "eval_nq_steps_per_second": 0.078,
      "eval_nq_token_set_f1": 0.4673685126084771,
      "eval_nq_token_set_f1_sem": 0.004954060259206487,
      "eval_nq_token_set_precision": 0.4276777373946588,
      "eval_nq_token_set_recall": 0.5223366567606176,
      "eval_nq_true_num_tokens": 64.0,
      "step": 169375
    },
    {
      "epoch": 32.52,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 169380
    },
    {
      "epoch": 32.53,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 169392
    },
    {
      "epoch": 32.53,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 169404
    },
    {
      "epoch": 32.53,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 169416
    },
    {
      "epoch": 32.53,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 169428
    },
    {
      "epoch": 32.53,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 169440
    },
    {
      "epoch": 32.54,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 169452
    },
    {
      "epoch": 32.54,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 169464
    },
    {
      "epoch": 32.54,
      "learning_rate": 0.001,
      "loss": 2.526,
      "step": 169476
    },
    {
      "epoch": 32.54,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 169488
    },
    {
      "epoch": 32.55,
      "learning_rate": 0.001,
      "loss": 2.5219,
      "step": 169500
    },
    {
      "epoch": 32.55,
      "learning_rate": 0.001,
      "loss": 2.5209,
      "step": 169512
    },
    {
      "epoch": 32.55,
      "learning_rate": 0.001,
      "loss": 2.5262,
      "step": 169524
    },
    {
      "epoch": 32.55,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 169536
    },
    {
      "epoch": 32.56,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 169548
    },
    {
      "epoch": 32.56,
      "learning_rate": 0.001,
      "loss": 2.5332,
      "step": 169560
    },
    {
      "epoch": 32.56,
      "learning_rate": 0.001,
      "loss": 2.522,
      "step": 169572
    },
    {
      "epoch": 32.56,
      "learning_rate": 0.001,
      "loss": 2.5368,
      "step": 169584
    },
    {
      "epoch": 32.56,
      "learning_rate": 0.001,
      "loss": 2.5242,
      "step": 169596
    },
    {
      "epoch": 32.57,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 169608
    },
    {
      "epoch": 32.57,
      "learning_rate": 0.001,
      "loss": 2.5172,
      "step": 169620
    },
    {
      "epoch": 32.57,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 169632
    },
    {
      "epoch": 32.57,
      "learning_rate": 0.001,
      "loss": 2.5359,
      "step": 169644
    },
    {
      "epoch": 32.58,
      "learning_rate": 0.001,
      "loss": 2.5206,
      "step": 169656
    },
    {
      "epoch": 32.58,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 169668
    },
    {
      "epoch": 32.58,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 169680
    },
    {
      "epoch": 32.58,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 169692
    },
    {
      "epoch": 32.59,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 169704
    },
    {
      "epoch": 32.59,
      "learning_rate": 0.001,
      "loss": 2.5219,
      "step": 169716
    },
    {
      "epoch": 32.59,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 169728
    },
    {
      "epoch": 32.59,
      "learning_rate": 0.001,
      "loss": 2.5337,
      "step": 169740
    },
    {
      "epoch": 32.59,
      "learning_rate": 0.001,
      "loss": 2.5212,
      "step": 169752
    },
    {
      "epoch": 32.6,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 169764
    },
    {
      "epoch": 32.6,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 169776
    },
    {
      "epoch": 32.6,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 169788
    },
    {
      "epoch": 32.6,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 169800
    },
    {
      "epoch": 32.61,
      "learning_rate": 0.001,
      "loss": 2.53,
      "step": 169812
    },
    {
      "epoch": 32.61,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 169824
    },
    {
      "epoch": 32.61,
      "learning_rate": 0.001,
      "loss": 2.5256,
      "step": 169836
    },
    {
      "epoch": 32.61,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 169848
    },
    {
      "epoch": 32.62,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 169860
    },
    {
      "epoch": 32.62,
      "learning_rate": 0.001,
      "loss": 2.5371,
      "step": 169872
    },
    {
      "epoch": 32.62,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 169884
    },
    {
      "epoch": 32.62,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 169896
    },
    {
      "epoch": 32.62,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 169908
    },
    {
      "epoch": 32.63,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 169920
    },
    {
      "epoch": 32.63,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 169932
    },
    {
      "epoch": 32.63,
      "learning_rate": 0.001,
      "loss": 2.5223,
      "step": 169944
    },
    {
      "epoch": 32.63,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 169956
    },
    {
      "epoch": 32.64,
      "learning_rate": 0.001,
      "loss": 2.5259,
      "step": 169968
    },
    {
      "epoch": 32.64,
      "learning_rate": 0.001,
      "loss": 2.5272,
      "step": 169980
    },
    {
      "epoch": 32.64,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 169992
    },
    {
      "epoch": 32.64,
      "eval_ag_news_accuracy": 0.32609375,
      "eval_ag_news_bleu_score": 5.103588714402894,
      "eval_ag_news_bleu_score_sem": 0.15790990091704368,
      "eval_ag_news_emb_cos_sim": 0.8158586025238037,
      "eval_ag_news_emb_cos_sim_sem": 0.006535474064016955,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4988529682159424,
      "eval_ag_news_n_ngrams_match_1": 14.592,
      "eval_ag_news_n_ngrams_match_2": 3.32,
      "eval_ag_news_n_ngrams_match_3": 0.984,
      "eval_ag_news_num_pred_words": 46.78,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.077489259126004,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35906025776254014,
      "eval_ag_news_runtime": 11.8885,
      "eval_ag_news_samples_per_second": 42.057,
      "eval_ag_news_steps_per_second": 0.084,
      "eval_ag_news_token_set_f1": 0.36058742545039246,
      "eval_ag_news_token_set_f1_sem": 0.004244958120679543,
      "eval_ag_news_token_set_precision": 0.3475712214395737,
      "eval_ag_news_token_set_recall": 0.38892050994854466,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 170000
    },
    {
      "epoch": 32.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.116125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.182037965618906,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1169221128708404,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6759343147277832,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008286285102751369,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2131259441375732,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.972,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.13,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.856665454903688,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2181414961458868,
      "eval_anthropic_toxic_prompts_runtime": 11.6025,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.094,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.086,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36194357982938186,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065557976352004,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.443049473231829,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3315463960453458,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 170000
    },
    {
      "epoch": 32.64,
      "eval_arxiv_accuracy": 0.34803125,
      "eval_arxiv_bleu_score": 4.417925651773008,
      "eval_arxiv_bleu_score_sem": 0.1187334071204115,
      "eval_arxiv_emb_cos_sim": 0.7819305658340454,
      "eval_arxiv_emb_cos_sim_sem": 0.006849457548710988,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.366028308868408,
      "eval_arxiv_n_ngrams_match_1": 15.478,
      "eval_arxiv_n_ngrams_match_2": 3.022,
      "eval_arxiv_n_ngrams_match_3": 0.69,
      "eval_arxiv_num_pred_words": 40.71,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.96326517357843,
      "eval_arxiv_pred_num_tokens": 62.96875,
      "eval_arxiv_rouge_score": 0.3704510976994211,
      "eval_arxiv_runtime": 11.7231,
      "eval_arxiv_samples_per_second": 42.651,
      "eval_arxiv_steps_per_second": 0.085,
      "eval_arxiv_token_set_f1": 0.3632259435023624,
      "eval_arxiv_token_set_f1_sem": 0.0040308592469305285,
      "eval_arxiv_token_set_precision": 0.3144717841751199,
      "eval_arxiv_token_set_recall": 0.448385466760016,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 170000
    },
    {
      "epoch": 32.64,
      "eval_python_code_alpaca_accuracy": 0.16253125,
      "eval_python_code_alpaca_bleu_score": 4.970254213898489,
      "eval_python_code_alpaca_bleu_score_sem": 0.1582547434828246,
      "eval_python_code_alpaca_emb_cos_sim": 0.7549129724502563,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.010142988781280386,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8428986072540283,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.914,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.06,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.13,
      "eval_python_code_alpaca_num_pred_words": 43.058,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.16544939149375,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.337584587472073,
      "eval_python_code_alpaca_runtime": 11.4225,
      "eval_python_code_alpaca_samples_per_second": 43.773,
      "eval_python_code_alpaca_steps_per_second": 0.088,
      "eval_python_code_alpaca_token_set_f1": 0.4818881681938009,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00590458283299077,
      "eval_python_code_alpaca_token_set_precision": 0.543161543634205,
      "eval_python_code_alpaca_token_set_recall": 0.4554755920180267,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 170000
    },
    {
      "epoch": 32.64,
      "eval_wikibio_accuracy": 0.32665625,
      "eval_wikibio_bleu_score": 5.952713524474662,
      "eval_wikibio_bleu_score_sem": 0.21708734155055998,
      "eval_wikibio_emb_cos_sim": 0.7362420558929443,
      "eval_wikibio_emb_cos_sim_sem": 0.009733167314339628,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6685829162597656,
      "eval_wikibio_n_ngrams_match_1": 9.872,
      "eval_wikibio_n_ngrams_match_2": 3.352,
      "eval_wikibio_n_ngrams_match_3": 1.208,
      "eval_wikibio_num_pred_words": 35.764,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.1963220155196,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3513284428263801,
      "eval_wikibio_runtime": 11.4986,
      "eval_wikibio_samples_per_second": 43.483,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.31845039340589476,
      "eval_wikibio_token_set_f1_sem": 0.0054494845963791476,
      "eval_wikibio_token_set_precision": 0.32214312835742664,
      "eval_wikibio_token_set_recall": 0.3346979182046409,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 170000
    },
    {
      "epoch": 32.64,
      "eval_nq_accuracy": 0.5320625,
      "eval_nq_bleu_score": 11.87375527848572,
      "eval_nq_bleu_score_sem": 0.4889766435354102,
      "eval_nq_emb_cos_sim": 0.8338566422462463,
      "eval_nq_emb_cos_sim_sem": 0.007484752801383885,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.145758867263794,
      "eval_nq_n_ngrams_match_1": 23.3,
      "eval_nq_n_ngrams_match_2": 8.64,
      "eval_nq_n_ngrams_match_3": 4.014,
      "eval_nq_num_pred_words": 49.014,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.54852597297066,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4514813362329917,
      "eval_nq_runtime": 11.8327,
      "eval_nq_samples_per_second": 42.256,
      "eval_nq_steps_per_second": 0.085,
      "eval_nq_token_set_f1": 0.4651492914938751,
      "eval_nq_token_set_f1_sem": 0.004983244804647352,
      "eval_nq_token_set_precision": 0.42289744589278583,
      "eval_nq_token_set_recall": 0.5253804325590283,
      "eval_nq_true_num_tokens": 64.0,
      "step": 170000
    },
    {
      "epoch": 32.64,
      "learning_rate": 0.001,
      "loss": 2.509,
      "step": 170004
    },
    {
      "epoch": 32.65,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 170016
    },
    {
      "epoch": 32.65,
      "learning_rate": 0.001,
      "loss": 2.5225,
      "step": 170028
    },
    {
      "epoch": 32.65,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 170040
    },
    {
      "epoch": 32.65,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 170052
    },
    {
      "epoch": 32.65,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 170064
    },
    {
      "epoch": 32.66,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 170076
    },
    {
      "epoch": 32.66,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 170088
    },
    {
      "epoch": 32.66,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 170100
    },
    {
      "epoch": 32.66,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 170112
    },
    {
      "epoch": 32.67,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 170124
    },
    {
      "epoch": 32.67,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 170136
    },
    {
      "epoch": 32.67,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 170148
    },
    {
      "epoch": 32.67,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 170160
    },
    {
      "epoch": 32.68,
      "learning_rate": 0.001,
      "loss": 2.5315,
      "step": 170172
    },
    {
      "epoch": 32.68,
      "learning_rate": 0.001,
      "loss": 2.5251,
      "step": 170184
    },
    {
      "epoch": 32.68,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 170196
    },
    {
      "epoch": 32.68,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 170208
    },
    {
      "epoch": 32.68,
      "learning_rate": 0.001,
      "loss": 2.5342,
      "step": 170220
    },
    {
      "epoch": 32.69,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 170232
    },
    {
      "epoch": 32.69,
      "learning_rate": 0.001,
      "loss": 2.5193,
      "step": 170244
    },
    {
      "epoch": 32.69,
      "learning_rate": 0.001,
      "loss": 2.5339,
      "step": 170256
    },
    {
      "epoch": 32.69,
      "learning_rate": 0.001,
      "loss": 2.525,
      "step": 170268
    },
    {
      "epoch": 32.7,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 170280
    },
    {
      "epoch": 32.7,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 170292
    },
    {
      "epoch": 32.7,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 170304
    },
    {
      "epoch": 32.7,
      "learning_rate": 0.001,
      "loss": 2.5225,
      "step": 170316
    },
    {
      "epoch": 32.71,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 170328
    },
    {
      "epoch": 32.71,
      "learning_rate": 0.001,
      "loss": 2.5326,
      "step": 170340
    },
    {
      "epoch": 32.71,
      "learning_rate": 0.001,
      "loss": 2.5212,
      "step": 170352
    },
    {
      "epoch": 32.71,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 170364
    },
    {
      "epoch": 32.71,
      "learning_rate": 0.001,
      "loss": 2.5219,
      "step": 170376
    },
    {
      "epoch": 32.72,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 170388
    },
    {
      "epoch": 32.72,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 170400
    },
    {
      "epoch": 32.72,
      "learning_rate": 0.001,
      "loss": 2.5278,
      "step": 170412
    },
    {
      "epoch": 32.72,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 170424
    },
    {
      "epoch": 32.73,
      "learning_rate": 0.001,
      "loss": 2.5283,
      "step": 170436
    },
    {
      "epoch": 32.73,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 170448
    },
    {
      "epoch": 32.73,
      "learning_rate": 0.001,
      "loss": 2.5168,
      "step": 170460
    },
    {
      "epoch": 32.73,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 170472
    },
    {
      "epoch": 32.74,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 170484
    },
    {
      "epoch": 32.74,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 170496
    },
    {
      "epoch": 32.74,
      "learning_rate": 0.001,
      "loss": 2.5226,
      "step": 170508
    },
    {
      "epoch": 32.74,
      "learning_rate": 0.001,
      "loss": 2.5212,
      "step": 170520
    },
    {
      "epoch": 32.74,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 170532
    },
    {
      "epoch": 32.75,
      "learning_rate": 0.001,
      "loss": 2.5239,
      "step": 170544
    },
    {
      "epoch": 32.75,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 170556
    },
    {
      "epoch": 32.75,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 170568
    },
    {
      "epoch": 32.75,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 170580
    },
    {
      "epoch": 32.76,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 170592
    },
    {
      "epoch": 32.76,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 170604
    },
    {
      "epoch": 32.76,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 170616
    },
    {
      "epoch": 32.76,
      "eval_ag_news_accuracy": 0.326,
      "eval_ag_news_bleu_score": 4.856974187090668,
      "eval_ag_news_bleu_score_sem": 0.15180008377899748,
      "eval_ag_news_emb_cos_sim": 0.8178597688674927,
      "eval_ag_news_emb_cos_sim_sem": 0.006283727645138845,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5094616413116455,
      "eval_ag_news_n_ngrams_match_1": 14.18,
      "eval_ag_news_n_ngrams_match_2": 3.172,
      "eval_ag_news_n_ngrams_match_3": 0.916,
      "eval_ag_news_num_pred_words": 46.816,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.430265464660316,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3505884215736518,
      "eval_ag_news_runtime": 12.1688,
      "eval_ag_news_samples_per_second": 41.089,
      "eval_ag_news_steps_per_second": 0.082,
      "eval_ag_news_token_set_f1": 0.3522260030815371,
      "eval_ag_news_token_set_f1_sem": 0.004373883248593534,
      "eval_ag_news_token_set_precision": 0.3382308289845004,
      "eval_ag_news_token_set_recall": 0.3801282350636806,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 170625
    },
    {
      "epoch": 32.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.1154375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.102271431804127,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11575546645798614,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6726579666137695,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008999542805546308,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2119908332824707,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.91,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.914,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.828466391683374,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21454391369210693,
      "eval_anthropic_toxic_prompts_runtime": 11.6733,
      "eval_anthropic_toxic_prompts_samples_per_second": 42.833,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.086,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36399885158677264,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006690731797494028,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4445680156283615,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33535265358218763,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 170625
    },
    {
      "epoch": 32.76,
      "eval_arxiv_accuracy": 0.349375,
      "eval_arxiv_bleu_score": 4.463035169627046,
      "eval_arxiv_bleu_score_sem": 0.12904825556114927,
      "eval_arxiv_emb_cos_sim": 0.7773054242134094,
      "eval_arxiv_emb_cos_sim_sem": 0.006682008119577206,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.364659309387207,
      "eval_arxiv_n_ngrams_match_1": 15.38,
      "eval_arxiv_n_ngrams_match_2": 3.114,
      "eval_arxiv_n_ngrams_match_3": 0.692,
      "eval_arxiv_num_pred_words": 40.826,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.923641607091298,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3679257477380071,
      "eval_arxiv_runtime": 16.3425,
      "eval_arxiv_samples_per_second": 30.595,
      "eval_arxiv_steps_per_second": 0.061,
      "eval_arxiv_token_set_f1": 0.3583587119904286,
      "eval_arxiv_token_set_f1_sem": 0.004044247773056292,
      "eval_arxiv_token_set_precision": 0.31065965819744223,
      "eval_arxiv_token_set_recall": 0.4390801533458176,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 170625
    },
    {
      "epoch": 32.76,
      "eval_python_code_alpaca_accuracy": 0.1629375,
      "eval_python_code_alpaca_bleu_score": 4.83823739466449,
      "eval_python_code_alpaca_bleu_score_sem": 0.15965890443932776,
      "eval_python_code_alpaca_emb_cos_sim": 0.7576169967651367,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011544135076789247,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8417391777038574,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.948,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.96,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.076,
      "eval_python_code_alpaca_num_pred_words": 42.862,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.145558795327975,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3353055955473792,
      "eval_python_code_alpaca_runtime": 11.906,
      "eval_python_code_alpaca_samples_per_second": 41.996,
      "eval_python_code_alpaca_steps_per_second": 0.084,
      "eval_python_code_alpaca_token_set_f1": 0.47783723083060203,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0058067061539587425,
      "eval_python_code_alpaca_token_set_precision": 0.5402525688840687,
      "eval_python_code_alpaca_token_set_recall": 0.45255878676672123,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 170625
    },
    {
      "epoch": 32.76,
      "eval_wikibio_accuracy": 0.3281875,
      "eval_wikibio_bleu_score": 5.973474103548643,
      "eval_wikibio_bleu_score_sem": 0.21791608459900272,
      "eval_wikibio_emb_cos_sim": 0.7314437031745911,
      "eval_wikibio_emb_cos_sim_sem": 0.009586113667162579,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6576809883117676,
      "eval_wikibio_n_ngrams_match_1": 9.644,
      "eval_wikibio_n_ngrams_match_2": 3.216,
      "eval_wikibio_n_ngrams_match_3": 1.21,
      "eval_wikibio_num_pred_words": 34.786,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.77132737682593,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34238598380280894,
      "eval_wikibio_runtime": 11.8981,
      "eval_wikibio_samples_per_second": 42.023,
      "eval_wikibio_steps_per_second": 0.084,
      "eval_wikibio_token_set_f1": 0.31324390654108736,
      "eval_wikibio_token_set_f1_sem": 0.005682618559756961,
      "eval_wikibio_token_set_precision": 0.313996683749092,
      "eval_wikibio_token_set_recall": 0.3310759916162236,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 170625
    },
    {
      "epoch": 32.76,
      "eval_nq_accuracy": 0.53359375,
      "eval_nq_bleu_score": 12.06423858892232,
      "eval_nq_bleu_score_sem": 0.47737088317606996,
      "eval_nq_emb_cos_sim": 0.8339177966117859,
      "eval_nq_emb_cos_sim_sem": 0.007387219725573613,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1459178924560547,
      "eval_nq_n_ngrams_match_1": 23.386,
      "eval_nq_n_ngrams_match_2": 8.788,
      "eval_nq_n_ngrams_match_3": 4.052,
      "eval_nq_num_pred_words": 49.14,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.549885512054676,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45452192133628033,
      "eval_nq_runtime": 12.1468,
      "eval_nq_samples_per_second": 41.163,
      "eval_nq_steps_per_second": 0.082,
      "eval_nq_token_set_f1": 0.46843519800215117,
      "eval_nq_token_set_f1_sem": 0.004888333826128115,
      "eval_nq_token_set_precision": 0.42489588778155346,
      "eval_nq_token_set_recall": 0.5311409146401321,
      "eval_nq_true_num_tokens": 64.0,
      "step": 170625
    },
    {
      "epoch": 32.76,
      "learning_rate": 0.001,
      "loss": 2.5232,
      "step": 170628
    },
    {
      "epoch": 32.76,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 170640
    },
    {
      "epoch": 32.77,
      "learning_rate": 0.001,
      "loss": 2.5163,
      "step": 170652
    },
    {
      "epoch": 32.77,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 170664
    },
    {
      "epoch": 32.77,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 170676
    },
    {
      "epoch": 32.77,
      "learning_rate": 0.001,
      "loss": 2.5248,
      "step": 170688
    },
    {
      "epoch": 32.78,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 170700
    },
    {
      "epoch": 32.78,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 170712
    },
    {
      "epoch": 32.78,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 170724
    },
    {
      "epoch": 32.78,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 170736
    },
    {
      "epoch": 32.79,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 170748
    },
    {
      "epoch": 32.79,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 170760
    },
    {
      "epoch": 32.79,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 170772
    },
    {
      "epoch": 32.79,
      "learning_rate": 0.001,
      "loss": 2.5206,
      "step": 170784
    },
    {
      "epoch": 32.79,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 170796
    },
    {
      "epoch": 32.8,
      "learning_rate": 0.001,
      "loss": 2.5171,
      "step": 170808
    },
    {
      "epoch": 32.8,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 170820
    },
    {
      "epoch": 32.8,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 170832
    },
    {
      "epoch": 32.8,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 170844
    },
    {
      "epoch": 32.81,
      "learning_rate": 0.001,
      "loss": 2.5193,
      "step": 170856
    },
    {
      "epoch": 32.81,
      "learning_rate": 0.001,
      "loss": 2.526,
      "step": 170868
    },
    {
      "epoch": 32.81,
      "learning_rate": 0.001,
      "loss": 2.5226,
      "step": 170880
    },
    {
      "epoch": 32.81,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 170892
    },
    {
      "epoch": 32.82,
      "learning_rate": 0.001,
      "loss": 2.5307,
      "step": 170904
    },
    {
      "epoch": 32.82,
      "learning_rate": 0.001,
      "loss": 2.5209,
      "step": 170916
    },
    {
      "epoch": 32.82,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 170928
    },
    {
      "epoch": 32.82,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 170940
    },
    {
      "epoch": 32.82,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 170952
    },
    {
      "epoch": 32.83,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 170964
    },
    {
      "epoch": 32.83,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 170976
    },
    {
      "epoch": 32.83,
      "learning_rate": 0.001,
      "loss": 2.5287,
      "step": 170988
    },
    {
      "epoch": 32.83,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 171000
    },
    {
      "epoch": 32.84,
      "learning_rate": 0.001,
      "loss": 2.5409,
      "step": 171012
    },
    {
      "epoch": 32.84,
      "learning_rate": 0.001,
      "loss": 2.5265,
      "step": 171024
    },
    {
      "epoch": 32.84,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 171036
    },
    {
      "epoch": 32.84,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 171048
    },
    {
      "epoch": 32.85,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 171060
    },
    {
      "epoch": 32.85,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 171072
    },
    {
      "epoch": 32.85,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 171084
    },
    {
      "epoch": 32.85,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 171096
    },
    {
      "epoch": 32.85,
      "learning_rate": 0.001,
      "loss": 2.5342,
      "step": 171108
    },
    {
      "epoch": 32.86,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 171120
    },
    {
      "epoch": 32.86,
      "learning_rate": 0.001,
      "loss": 2.5299,
      "step": 171132
    },
    {
      "epoch": 32.86,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 171144
    },
    {
      "epoch": 32.86,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 171156
    },
    {
      "epoch": 32.87,
      "learning_rate": 0.001,
      "loss": 2.5259,
      "step": 171168
    },
    {
      "epoch": 32.87,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 171180
    },
    {
      "epoch": 32.87,
      "learning_rate": 0.001,
      "loss": 2.5264,
      "step": 171192
    },
    {
      "epoch": 32.87,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 171204
    },
    {
      "epoch": 32.88,
      "learning_rate": 0.001,
      "loss": 2.5247,
      "step": 171216
    },
    {
      "epoch": 32.88,
      "learning_rate": 0.001,
      "loss": 2.5284,
      "step": 171228
    },
    {
      "epoch": 32.88,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 171240
    },
    {
      "epoch": 32.88,
      "eval_ag_news_accuracy": 0.32809375,
      "eval_ag_news_bleu_score": 5.020681871710474,
      "eval_ag_news_bleu_score_sem": 0.15906025845681282,
      "eval_ag_news_emb_cos_sim": 0.816615104675293,
      "eval_ag_news_emb_cos_sim_sem": 0.007979907807124535,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4913876056671143,
      "eval_ag_news_n_ngrams_match_1": 14.416,
      "eval_ag_news_n_ngrams_match_2": 3.374,
      "eval_ag_news_n_ngrams_match_3": 0.984,
      "eval_ag_news_num_pred_words": 47.11,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.831473251518325,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3536024718794171,
      "eval_ag_news_runtime": 10.8362,
      "eval_ag_news_samples_per_second": 46.142,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.3571776617999564,
      "eval_ag_news_token_set_f1_sem": 0.0043720717154677935,
      "eval_ag_news_token_set_precision": 0.34509287064370947,
      "eval_ag_news_token_set_recall": 0.38349111843030853,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 171250
    },
    {
      "epoch": 32.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.11575,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2108571731562097,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12656372884784825,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6791132688522339,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010473322609018615,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2023022174835205,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.338,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.706,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.506,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.589074480688563,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21903306242132764,
      "eval_anthropic_toxic_prompts_runtime": 10.2545,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.759,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36427545760431707,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006703274535485368,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.444038474671246,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3347992232530233,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 171250
    },
    {
      "epoch": 32.88,
      "eval_arxiv_accuracy": 0.35146875,
      "eval_arxiv_bleu_score": 4.529828456965386,
      "eval_arxiv_bleu_score_sem": 0.13127557612397123,
      "eval_arxiv_emb_cos_sim": 0.7781928181648254,
      "eval_arxiv_emb_cos_sim_sem": 0.006602027230538197,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3569257259368896,
      "eval_arxiv_n_ngrams_match_1": 15.466,
      "eval_arxiv_n_ngrams_match_2": 3.05,
      "eval_arxiv_n_ngrams_match_3": 0.716,
      "eval_arxiv_num_pred_words": 40.812,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.70082092275716,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36981225705842213,
      "eval_arxiv_runtime": 10.3627,
      "eval_arxiv_samples_per_second": 48.25,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.36204740011173986,
      "eval_arxiv_token_set_f1_sem": 0.004320431712320082,
      "eval_arxiv_token_set_precision": 0.3152460824655595,
      "eval_arxiv_token_set_recall": 0.43975130406007196,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 171250
    },
    {
      "epoch": 32.88,
      "eval_python_code_alpaca_accuracy": 0.1644375,
      "eval_python_code_alpaca_bleu_score": 4.743622967198449,
      "eval_python_code_alpaca_bleu_score_sem": 0.1552756917165026,
      "eval_python_code_alpaca_emb_cos_sim": 0.7592819929122925,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008080794191569457,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8483197689056396,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.836,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.992,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.058,
      "eval_python_code_alpaca_num_pred_words": 43.094,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.25875876153463,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.332390215647945,
      "eval_python_code_alpaca_runtime": 10.4448,
      "eval_python_code_alpaca_samples_per_second": 47.871,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.4826877469573501,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0058548081650853295,
      "eval_python_code_alpaca_token_set_precision": 0.5389869087933613,
      "eval_python_code_alpaca_token_set_recall": 0.46343555470418046,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 171250
    },
    {
      "epoch": 32.88,
      "eval_wikibio_accuracy": 0.3256875,
      "eval_wikibio_bleu_score": 6.215087865876436,
      "eval_wikibio_bleu_score_sem": 0.22410510272224404,
      "eval_wikibio_emb_cos_sim": 0.7564117908477783,
      "eval_wikibio_emb_cos_sim_sem": 0.008450591957493057,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6940345764160156,
      "eval_wikibio_n_ngrams_match_1": 10.506,
      "eval_wikibio_n_ngrams_match_2": 3.588,
      "eval_wikibio_n_ngrams_match_3": 1.37,
      "eval_wikibio_num_pred_words": 37.344,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.20673731230611,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36541968914994144,
      "eval_wikibio_runtime": 25.6666,
      "eval_wikibio_samples_per_second": 19.481,
      "eval_wikibio_steps_per_second": 0.039,
      "eval_wikibio_token_set_f1": 0.32750053489029707,
      "eval_wikibio_token_set_f1_sem": 0.005215283465512357,
      "eval_wikibio_token_set_precision": 0.33889135443982826,
      "eval_wikibio_token_set_recall": 0.33166598755298143,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 171250
    },
    {
      "epoch": 32.88,
      "eval_nq_accuracy": 0.53184375,
      "eval_nq_bleu_score": 12.131813804947837,
      "eval_nq_bleu_score_sem": 0.48372117846921714,
      "eval_nq_emb_cos_sim": 0.8388272523880005,
      "eval_nq_emb_cos_sim_sem": 0.006887052537582705,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.14902925491333,
      "eval_nq_n_ngrams_match_1": 23.478,
      "eval_nq_n_ngrams_match_2": 8.732,
      "eval_nq_n_ngrams_match_3": 4.068,
      "eval_nq_num_pred_words": 49.476,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.576528731714106,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45394393092860086,
      "eval_nq_runtime": 21.5612,
      "eval_nq_samples_per_second": 23.19,
      "eval_nq_steps_per_second": 0.046,
      "eval_nq_token_set_f1": 0.4685044679630783,
      "eval_nq_token_set_f1_sem": 0.004989990913179821,
      "eval_nq_token_set_precision": 0.42823506640878917,
      "eval_nq_token_set_recall": 0.5255791990724895,
      "eval_nq_true_num_tokens": 64.0,
      "step": 171250
    },
    {
      "epoch": 32.88,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 171252
    },
    {
      "epoch": 32.88,
      "learning_rate": 0.001,
      "loss": 2.5224,
      "step": 171264
    },
    {
      "epoch": 32.89,
      "learning_rate": 0.001,
      "loss": 2.5308,
      "step": 171276
    },
    {
      "epoch": 32.89,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 171288
    },
    {
      "epoch": 32.89,
      "learning_rate": 0.001,
      "loss": 2.5283,
      "step": 171300
    },
    {
      "epoch": 32.89,
      "learning_rate": 0.001,
      "loss": 2.5339,
      "step": 171312
    },
    {
      "epoch": 32.9,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 171324
    },
    {
      "epoch": 32.9,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 171336
    },
    {
      "epoch": 32.9,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 171348
    },
    {
      "epoch": 32.9,
      "learning_rate": 0.001,
      "loss": 2.5271,
      "step": 171360
    },
    {
      "epoch": 32.91,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 171372
    },
    {
      "epoch": 32.91,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 171384
    },
    {
      "epoch": 32.91,
      "learning_rate": 0.001,
      "loss": 2.5269,
      "step": 171396
    },
    {
      "epoch": 32.91,
      "learning_rate": 0.001,
      "loss": 2.5327,
      "step": 171408
    },
    {
      "epoch": 32.91,
      "learning_rate": 0.001,
      "loss": 2.525,
      "step": 171420
    },
    {
      "epoch": 32.92,
      "learning_rate": 0.001,
      "loss": 2.5333,
      "step": 171432
    },
    {
      "epoch": 32.92,
      "learning_rate": 0.001,
      "loss": 2.5284,
      "step": 171444
    },
    {
      "epoch": 32.92,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 171456
    },
    {
      "epoch": 32.92,
      "learning_rate": 0.001,
      "loss": 2.5347,
      "step": 171468
    },
    {
      "epoch": 32.93,
      "learning_rate": 0.001,
      "loss": 2.5296,
      "step": 171480
    },
    {
      "epoch": 32.93,
      "learning_rate": 0.001,
      "loss": 2.5278,
      "step": 171492
    },
    {
      "epoch": 32.93,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 171504
    },
    {
      "epoch": 32.93,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 171516
    },
    {
      "epoch": 32.94,
      "learning_rate": 0.001,
      "loss": 2.5209,
      "step": 171528
    },
    {
      "epoch": 32.94,
      "learning_rate": 0.001,
      "loss": 2.5344,
      "step": 171540
    },
    {
      "epoch": 32.94,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 171552
    },
    {
      "epoch": 32.94,
      "learning_rate": 0.001,
      "loss": 2.5361,
      "step": 171564
    },
    {
      "epoch": 32.94,
      "learning_rate": 0.001,
      "loss": 2.5317,
      "step": 171576
    },
    {
      "epoch": 32.95,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 171588
    },
    {
      "epoch": 32.95,
      "learning_rate": 0.001,
      "loss": 2.5313,
      "step": 171600
    },
    {
      "epoch": 32.95,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 171612
    },
    {
      "epoch": 32.95,
      "learning_rate": 0.001,
      "loss": 2.5274,
      "step": 171624
    },
    {
      "epoch": 32.96,
      "learning_rate": 0.001,
      "loss": 2.5298,
      "step": 171636
    },
    {
      "epoch": 32.96,
      "learning_rate": 0.001,
      "loss": 2.5351,
      "step": 171648
    },
    {
      "epoch": 32.96,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 171660
    },
    {
      "epoch": 32.96,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 171672
    },
    {
      "epoch": 32.97,
      "learning_rate": 0.001,
      "loss": 2.5376,
      "step": 171684
    },
    {
      "epoch": 32.97,
      "learning_rate": 0.001,
      "loss": 2.5312,
      "step": 171696
    },
    {
      "epoch": 32.97,
      "learning_rate": 0.001,
      "loss": 2.5319,
      "step": 171708
    },
    {
      "epoch": 32.97,
      "learning_rate": 0.001,
      "loss": 2.526,
      "step": 171720
    },
    {
      "epoch": 32.97,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 171732
    },
    {
      "epoch": 32.98,
      "learning_rate": 0.001,
      "loss": 2.5342,
      "step": 171744
    },
    {
      "epoch": 32.98,
      "learning_rate": 0.001,
      "loss": 2.5354,
      "step": 171756
    },
    {
      "epoch": 32.98,
      "learning_rate": 0.001,
      "loss": 2.5343,
      "step": 171768
    },
    {
      "epoch": 32.98,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 171780
    },
    {
      "epoch": 32.99,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 171792
    },
    {
      "epoch": 32.99,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 171804
    },
    {
      "epoch": 32.99,
      "learning_rate": 0.001,
      "loss": 2.5351,
      "step": 171816
    },
    {
      "epoch": 32.99,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 171828
    },
    {
      "epoch": 33.0,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 171840
    },
    {
      "epoch": 33.0,
      "learning_rate": 0.001,
      "loss": 2.5219,
      "step": 171852
    },
    {
      "epoch": 33.0,
      "learning_rate": 0.001,
      "loss": 2.5295,
      "step": 171864
    },
    {
      "epoch": 33.0,
      "eval_ag_news_accuracy": 0.32559375,
      "eval_ag_news_bleu_score": 4.9235026984387655,
      "eval_ag_news_bleu_score_sem": 0.16109972817183485,
      "eval_ag_news_emb_cos_sim": 0.8212471604347229,
      "eval_ag_news_emb_cos_sim_sem": 0.006564521145175057,
      "eval_ag_news_emb_top1_equal": 0.1953125,
      "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.508206605911255,
      "eval_ag_news_n_ngrams_match_1": 14.346,
      "eval_ag_news_n_ngrams_match_2": 3.2,
      "eval_ag_news_n_ngrams_match_3": 0.93,
      "eval_ag_news_num_pred_words": 46.906,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.388335615284056,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35506901925484513,
      "eval_ag_news_runtime": 10.3829,
      "eval_ag_news_samples_per_second": 48.156,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35708858837041757,
      "eval_ag_news_token_set_f1_sem": 0.004292817297193934,
      "eval_ag_news_token_set_precision": 0.3429343159509728,
      "eval_ag_news_token_set_recall": 0.38618130192334754,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 171875
    },
    {
      "epoch": 33.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.1151875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.157861456491082,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12697962927166806,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6729145050048828,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009848224259507321,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2260899543762207,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.236,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.9,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.704,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.644,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.181005350706855,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21613554276009106,
      "eval_anthropic_toxic_prompts_runtime": 10.1711,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.159,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3608151069441283,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00668865426346986,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43906316172736687,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3348320089793444,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 171875
    },
    {
      "epoch": 33.0,
      "eval_arxiv_accuracy": 0.34865625,
      "eval_arxiv_bleu_score": 4.489403465644735,
      "eval_arxiv_bleu_score_sem": 0.12713303055628591,
      "eval_arxiv_emb_cos_sim": 0.7804597616195679,
      "eval_arxiv_emb_cos_sim_sem": 0.006157709885180288,
      "eval_arxiv_emb_top1_equal": 0.328125,
      "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.37156081199646,
      "eval_arxiv_n_ngrams_match_1": 15.386,
      "eval_arxiv_n_ngrams_match_2": 3.04,
      "eval_arxiv_n_ngrams_match_3": 0.71,
      "eval_arxiv_num_pred_words": 41.134,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.12394860969874,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36819498306324844,
      "eval_arxiv_runtime": 10.3302,
      "eval_arxiv_samples_per_second": 48.402,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3609062302540924,
      "eval_arxiv_token_set_f1_sem": 0.004053174006897004,
      "eval_arxiv_token_set_precision": 0.3138377129826513,
      "eval_arxiv_token_set_recall": 0.4429384331972991,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 171875
    },
    {
      "epoch": 33.0,
      "eval_python_code_alpaca_accuracy": 0.161875,
      "eval_python_code_alpaca_bleu_score": 4.62960173006862,
      "eval_python_code_alpaca_bleu_score_sem": 0.14550535826787744,
      "eval_python_code_alpaca_emb_cos_sim": 0.7594995498657227,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00745378738007403,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8776822090148926,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.834,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.83,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.964,
      "eval_python_code_alpaca_num_pred_words": 42.994,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.773031231401315,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33606171295275333,
      "eval_python_code_alpaca_runtime": 25.4205,
      "eval_python_code_alpaca_samples_per_second": 19.669,
      "eval_python_code_alpaca_steps_per_second": 0.039,
      "eval_python_code_alpaca_token_set_f1": 0.4748090821307063,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005542073959793082,
      "eval_python_code_alpaca_token_set_precision": 0.5367738513795776,
      "eval_python_code_alpaca_token_set_recall": 0.4488405713810996,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 171875
    },
    {
      "epoch": 33.0,
      "eval_wikibio_accuracy": 0.32353125,
      "eval_wikibio_bleu_score": 6.051798141522513,
      "eval_wikibio_bleu_score_sem": 0.2129569468297189,
      "eval_wikibio_emb_cos_sim": 0.7415202856063843,
      "eval_wikibio_emb_cos_sim_sem": 0.007917238736111377,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.70955753326416,
      "eval_wikibio_n_ngrams_match_1": 10.354,
      "eval_wikibio_n_ngrams_match_2": 3.46,
      "eval_wikibio_n_ngrams_match_3": 1.258,
      "eval_wikibio_num_pred_words": 36.76,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.83573407509388,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36204102161015883,
      "eval_wikibio_runtime": 25.1312,
      "eval_wikibio_samples_per_second": 19.896,
      "eval_wikibio_steps_per_second": 0.04,
      "eval_wikibio_token_set_f1": 0.3242490389331767,
      "eval_wikibio_token_set_f1_sem": 0.0053147265560793844,
      "eval_wikibio_token_set_precision": 0.33364966270567153,
      "eval_wikibio_token_set_recall": 0.3312595464034135,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 171875
    },
    {
      "epoch": 33.0,
      "eval_nq_accuracy": 0.53425,
      "eval_nq_bleu_score": 12.233642418000283,
      "eval_nq_bleu_score_sem": 0.4961011223190279,
      "eval_nq_emb_cos_sim": 0.8402355909347534,
      "eval_nq_emb_cos_sim_sem": 0.006836898446399249,
      "eval_nq_emb_top1_equal": 0.3984375,
      "eval_nq_emb_top1_equal_sem": 0.04344287990767221,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.146106243133545,
      "eval_nq_n_ngrams_match_1": 23.392,
      "eval_nq_n_ngrams_match_2": 8.764,
      "eval_nq_n_ngrams_match_3": 4.098,
      "eval_nq_num_pred_words": 49.17,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.551496040450631,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4530372491260065,
      "eval_nq_runtime": 10.76,
      "eval_nq_samples_per_second": 46.468,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.46608364421916054,
      "eval_nq_token_set_f1_sem": 0.004913049429546021,
      "eval_nq_token_set_precision": 0.42432459845450177,
      "eval_nq_token_set_recall": 0.5255722036592672,
      "eval_nq_true_num_tokens": 64.0,
      "step": 171875
    },
    {
      "epoch": 33.0,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 171876
    },
    {
      "epoch": 33.0,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 171888
    },
    {
      "epoch": 33.01,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 171900
    },
    {
      "epoch": 33.01,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 171912
    },
    {
      "epoch": 33.01,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 171924
    },
    {
      "epoch": 33.01,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 171936
    },
    {
      "epoch": 33.02,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 171948
    },
    {
      "epoch": 33.02,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 171960
    },
    {
      "epoch": 33.02,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 171972
    },
    {
      "epoch": 33.02,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 171984
    },
    {
      "epoch": 33.03,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 171996
    },
    {
      "epoch": 33.03,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 172008
    },
    {
      "epoch": 33.03,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 172020
    },
    {
      "epoch": 33.03,
      "learning_rate": 0.001,
      "loss": 2.5153,
      "step": 172032
    },
    {
      "epoch": 33.03,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 172044
    },
    {
      "epoch": 33.04,
      "learning_rate": 0.001,
      "loss": 2.5139,
      "step": 172056
    },
    {
      "epoch": 33.04,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 172068
    },
    {
      "epoch": 33.04,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 172080
    },
    {
      "epoch": 33.04,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 172092
    },
    {
      "epoch": 33.05,
      "learning_rate": 0.001,
      "loss": 2.5168,
      "step": 172104
    },
    {
      "epoch": 33.05,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 172116
    },
    {
      "epoch": 33.05,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 172128
    },
    {
      "epoch": 33.05,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 172140
    },
    {
      "epoch": 33.06,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 172152
    },
    {
      "epoch": 33.06,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 172164
    },
    {
      "epoch": 33.06,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 172176
    },
    {
      "epoch": 33.06,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 172188
    },
    {
      "epoch": 33.06,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 172200
    },
    {
      "epoch": 33.07,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 172212
    },
    {
      "epoch": 33.07,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 172224
    },
    {
      "epoch": 33.07,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 172236
    },
    {
      "epoch": 33.07,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 172248
    },
    {
      "epoch": 33.08,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 172260
    },
    {
      "epoch": 33.08,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 172272
    },
    {
      "epoch": 33.08,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 172284
    },
    {
      "epoch": 33.08,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 172296
    },
    {
      "epoch": 33.09,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 172308
    },
    {
      "epoch": 33.09,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 172320
    },
    {
      "epoch": 33.09,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 172332
    },
    {
      "epoch": 33.09,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 172344
    },
    {
      "epoch": 33.09,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 172356
    },
    {
      "epoch": 33.1,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 172368
    },
    {
      "epoch": 33.1,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 172380
    },
    {
      "epoch": 33.1,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 172392
    },
    {
      "epoch": 33.1,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 172404
    },
    {
      "epoch": 33.11,
      "learning_rate": 0.001,
      "loss": 2.5161,
      "step": 172416
    },
    {
      "epoch": 33.11,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 172428
    },
    {
      "epoch": 33.11,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 172440
    },
    {
      "epoch": 33.11,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 172452
    },
    {
      "epoch": 33.12,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 172464
    },
    {
      "epoch": 33.12,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 172476
    },
    {
      "epoch": 33.12,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 172488
    },
    {
      "epoch": 33.12,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 172500
    },
    {
      "epoch": 33.12,
      "eval_ag_news_accuracy": 0.327,
      "eval_ag_news_bleu_score": 4.840854296628451,
      "eval_ag_news_bleu_score_sem": 0.14392723937593332,
      "eval_ag_news_emb_cos_sim": 0.8134829998016357,
      "eval_ag_news_emb_cos_sim_sem": 0.007196347185379901,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.519099235534668,
      "eval_ag_news_n_ngrams_match_1": 14.202,
      "eval_ag_news_n_ngrams_match_2": 3.218,
      "eval_ag_news_n_ngrams_match_3": 0.9,
      "eval_ag_news_num_pred_words": 46.586,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.75401035304114,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3528690318938258,
      "eval_ag_news_runtime": 10.7349,
      "eval_ag_news_samples_per_second": 46.577,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.35344379295386363,
      "eval_ag_news_token_set_f1_sem": 0.004393905665406953,
      "eval_ag_news_token_set_precision": 0.33958679540526815,
      "eval_ag_news_token_set_recall": 0.38186312938018047,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 172500
    },
    {
      "epoch": 33.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.1149375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.191643144304772,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1184516723759307,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.673818826675415,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008044502462279036,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2498018741607666,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.218,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.978,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.766,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.804,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.785230690604468,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2134038238842198,
      "eval_anthropic_toxic_prompts_runtime": 10.2652,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.708,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3589227578981022,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006480609859653643,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.442542947179868,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3279915647536028,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 172500
    },
    {
      "epoch": 33.12,
      "eval_arxiv_accuracy": 0.3498125,
      "eval_arxiv_bleu_score": 4.455771918575809,
      "eval_arxiv_bleu_score_sem": 0.13182251487229993,
      "eval_arxiv_emb_cos_sim": 0.7759593725204468,
      "eval_arxiv_emb_cos_sim_sem": 0.006880422970020844,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3673794269561768,
      "eval_arxiv_n_ngrams_match_1": 15.38,
      "eval_arxiv_n_ngrams_match_2": 3.056,
      "eval_arxiv_n_ngrams_match_3": 0.718,
      "eval_arxiv_num_pred_words": 40.688,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.002424413456776,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3675317301340385,
      "eval_arxiv_runtime": 10.7617,
      "eval_arxiv_samples_per_second": 46.461,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.36071689594917433,
      "eval_arxiv_token_set_f1_sem": 0.004312948057243856,
      "eval_arxiv_token_set_precision": 0.3123242844667363,
      "eval_arxiv_token_set_recall": 0.44326443833838713,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 172500
    },
    {
      "epoch": 33.12,
      "eval_python_code_alpaca_accuracy": 0.1641875,
      "eval_python_code_alpaca_bleu_score": 4.6513737917458675,
      "eval_python_code_alpaca_bleu_score_sem": 0.14811492889988476,
      "eval_python_code_alpaca_emb_cos_sim": 0.7585639953613281,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008046173848240298,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8754916191101074,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.97,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.036,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.026,
      "eval_python_code_alpaca_num_pred_words": 43.902,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.734140421068687,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3336501299400956,
      "eval_python_code_alpaca_runtime": 10.5101,
      "eval_python_code_alpaca_samples_per_second": 47.573,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.4796706126926143,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005703290595550568,
      "eval_python_code_alpaca_token_set_precision": 0.5477300841077309,
      "eval_python_code_alpaca_token_set_recall": 0.4519090648944119,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 172500
    },
    {
      "epoch": 33.12,
      "eval_wikibio_accuracy": 0.32640625,
      "eval_wikibio_bleu_score": 6.154386102374677,
      "eval_wikibio_bleu_score_sem": 0.21504046650238473,
      "eval_wikibio_emb_cos_sim": 0.7461612224578857,
      "eval_wikibio_emb_cos_sim_sem": 0.007946979264636384,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.726470947265625,
      "eval_wikibio_n_ngrams_match_1": 10.404,
      "eval_wikibio_n_ngrams_match_2": 3.528,
      "eval_wikibio_n_ngrams_match_3": 1.28,
      "eval_wikibio_num_pred_words": 37.06,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.53227962860505,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3653571718231684,
      "eval_wikibio_runtime": 10.6177,
      "eval_wikibio_samples_per_second": 47.091,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.3282029153977967,
      "eval_wikibio_token_set_f1_sem": 0.005138240614149754,
      "eval_wikibio_token_set_precision": 0.33698672929848705,
      "eval_wikibio_token_set_recall": 0.33781476723709897,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 172500
    },
    {
      "epoch": 33.12,
      "eval_nq_accuracy": 0.53278125,
      "eval_nq_bleu_score": 11.941494776015876,
      "eval_nq_bleu_score_sem": 0.4903975075066142,
      "eval_nq_emb_cos_sim": 0.8348308801651001,
      "eval_nq_emb_cos_sim_sem": 0.007084477595630845,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.149851083755493,
      "eval_nq_n_ngrams_match_1": 23.2,
      "eval_nq_n_ngrams_match_2": 8.58,
      "eval_nq_n_ngrams_match_3": 4.014,
      "eval_nq_num_pred_words": 49.454,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.583580067490153,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4492532235681832,
      "eval_nq_runtime": 10.8861,
      "eval_nq_samples_per_second": 45.93,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.4636222701951168,
      "eval_nq_token_set_f1_sem": 0.004804832178364878,
      "eval_nq_token_set_precision": 0.4221118825741288,
      "eval_nq_token_set_recall": 0.522550337010352,
      "eval_nq_true_num_tokens": 64.0,
      "step": 172500
    },
    {
      "epoch": 33.12,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 172512
    },
    {
      "epoch": 33.13,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 172524
    },
    {
      "epoch": 33.13,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 172536
    },
    {
      "epoch": 33.13,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 172548
    },
    {
      "epoch": 33.13,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 172560
    },
    {
      "epoch": 33.14,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 172572
    },
    {
      "epoch": 33.14,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 172584
    },
    {
      "epoch": 33.14,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 172596
    },
    {
      "epoch": 33.14,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 172608
    },
    {
      "epoch": 33.15,
      "learning_rate": 0.001,
      "loss": 2.5134,
      "step": 172620
    },
    {
      "epoch": 33.15,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 172632
    },
    {
      "epoch": 33.15,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 172644
    },
    {
      "epoch": 33.15,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 172656
    },
    {
      "epoch": 33.15,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 172668
    },
    {
      "epoch": 33.16,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 172680
    },
    {
      "epoch": 33.16,
      "learning_rate": 0.001,
      "loss": 2.5167,
      "step": 172692
    },
    {
      "epoch": 33.16,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 172704
    },
    {
      "epoch": 33.16,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 172716
    },
    {
      "epoch": 33.17,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 172728
    },
    {
      "epoch": 33.17,
      "learning_rate": 0.001,
      "loss": 2.5167,
      "step": 172740
    },
    {
      "epoch": 33.17,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 172752
    },
    {
      "epoch": 33.17,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 172764
    },
    {
      "epoch": 33.18,
      "learning_rate": 0.001,
      "loss": 2.5288,
      "step": 172776
    },
    {
      "epoch": 33.18,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 172788
    },
    {
      "epoch": 33.18,
      "learning_rate": 0.001,
      "loss": 2.5339,
      "step": 172800
    },
    {
      "epoch": 33.18,
      "learning_rate": 0.001,
      "loss": 2.5219,
      "step": 172812
    },
    {
      "epoch": 33.18,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 172824
    },
    {
      "epoch": 33.19,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 172836
    },
    {
      "epoch": 33.19,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 172848
    },
    {
      "epoch": 33.19,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 172860
    },
    {
      "epoch": 33.19,
      "learning_rate": 0.001,
      "loss": 2.5206,
      "step": 172872
    },
    {
      "epoch": 33.2,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 172884
    },
    {
      "epoch": 33.2,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 172896
    },
    {
      "epoch": 33.2,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 172908
    },
    {
      "epoch": 33.2,
      "learning_rate": 0.001,
      "loss": 2.5172,
      "step": 172920
    },
    {
      "epoch": 33.21,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 172932
    },
    {
      "epoch": 33.21,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 172944
    },
    {
      "epoch": 33.21,
      "learning_rate": 0.001,
      "loss": 2.5201,
      "step": 172956
    },
    {
      "epoch": 33.21,
      "learning_rate": 0.001,
      "loss": 2.5225,
      "step": 172968
    },
    {
      "epoch": 33.21,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 172980
    },
    {
      "epoch": 33.22,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 172992
    },
    {
      "epoch": 33.22,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 173004
    },
    {
      "epoch": 33.22,
      "learning_rate": 0.001,
      "loss": 2.5229,
      "step": 173016
    },
    {
      "epoch": 33.22,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 173028
    },
    {
      "epoch": 33.23,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 173040
    },
    {
      "epoch": 33.23,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 173052
    },
    {
      "epoch": 33.23,
      "learning_rate": 0.001,
      "loss": 2.5121,
      "step": 173064
    },
    {
      "epoch": 33.23,
      "learning_rate": 0.001,
      "loss": 2.5196,
      "step": 173076
    },
    {
      "epoch": 33.24,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 173088
    },
    {
      "epoch": 33.24,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 173100
    },
    {
      "epoch": 33.24,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 173112
    },
    {
      "epoch": 33.24,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 173124
    },
    {
      "epoch": 33.24,
      "eval_ag_news_accuracy": 0.32746875,
      "eval_ag_news_bleu_score": 4.899918911197241,
      "eval_ag_news_bleu_score_sem": 0.1538101148685072,
      "eval_ag_news_emb_cos_sim": 0.814886212348938,
      "eval_ag_news_emb_cos_sim_sem": 0.007141201985370445,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.494994640350342,
      "eval_ag_news_n_ngrams_match_1": 14.312,
      "eval_ag_news_n_ngrams_match_2": 3.136,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.688,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.950111351478306,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35478775454072764,
      "eval_ag_news_runtime": 10.5804,
      "eval_ag_news_samples_per_second": 47.257,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.35651615031167716,
      "eval_ag_news_token_set_f1_sem": 0.004362219936826826,
      "eval_ag_news_token_set_precision": 0.34187248033445666,
      "eval_ag_news_token_set_recall": 0.3866185172661851,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 173125
    },
    {
      "epoch": 33.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.1155625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.22189957256252,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11831276614066606,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6785842180252075,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008799396641591184,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2247121334075928,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.42,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.006,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.432,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.146334324242083,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2187582190487244,
      "eval_anthropic_toxic_prompts_runtime": 10.3065,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.513,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.097,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3664681323025575,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006596368235438082,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4523588585250943,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33454868325187875,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 173125
    },
    {
      "epoch": 33.24,
      "eval_arxiv_accuracy": 0.35078125,
      "eval_arxiv_bleu_score": 4.580869799666467,
      "eval_arxiv_bleu_score_sem": 0.134405210250314,
      "eval_arxiv_emb_cos_sim": 0.7647984623908997,
      "eval_arxiv_emb_cos_sim_sem": 0.008891873412669166,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.350621223449707,
      "eval_arxiv_n_ngrams_match_1": 15.376,
      "eval_arxiv_n_ngrams_match_2": 3.124,
      "eval_arxiv_n_ngrams_match_3": 0.728,
      "eval_arxiv_num_pred_words": 40.592,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.520445711293767,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36878700185813307,
      "eval_arxiv_runtime": 11.3099,
      "eval_arxiv_samples_per_second": 44.209,
      "eval_arxiv_steps_per_second": 0.088,
      "eval_arxiv_token_set_f1": 0.35959364145006717,
      "eval_arxiv_token_set_f1_sem": 0.004325707467276858,
      "eval_arxiv_token_set_precision": 0.31183180094137114,
      "eval_arxiv_token_set_recall": 0.4392956192196279,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 173125
    },
    {
      "epoch": 33.24,
      "eval_python_code_alpaca_accuracy": 0.16321875,
      "eval_python_code_alpaca_bleu_score": 4.789714930698208,
      "eval_python_code_alpaca_bleu_score_sem": 0.15492731405422203,
      "eval_python_code_alpaca_emb_cos_sim": 0.7496691942214966,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008979071949515666,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.852346897125244,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.786,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.95,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.056,
      "eval_python_code_alpaca_num_pred_words": 42.616,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.328402133250716,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3365390822665639,
      "eval_python_code_alpaca_runtime": 10.1107,
      "eval_python_code_alpaca_samples_per_second": 49.453,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.47571060384262936,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00581499096546508,
      "eval_python_code_alpaca_token_set_precision": 0.5364937773593781,
      "eval_python_code_alpaca_token_set_recall": 0.45230257993175715,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 173125
    },
    {
      "epoch": 33.24,
      "eval_wikibio_accuracy": 0.3283125,
      "eval_wikibio_bleu_score": 6.253845379841241,
      "eval_wikibio_bleu_score_sem": 0.22551707085226777,
      "eval_wikibio_emb_cos_sim": 0.7445352673530579,
      "eval_wikibio_emb_cos_sim_sem": 0.009703821253974903,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6684389114379883,
      "eval_wikibio_n_ngrams_match_1": 10.174,
      "eval_wikibio_n_ngrams_match_2": 3.44,
      "eval_wikibio_n_ngrams_match_3": 1.29,
      "eval_wikibio_num_pred_words": 35.898,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.1906779625486,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35983269969875353,
      "eval_wikibio_runtime": 10.2797,
      "eval_wikibio_samples_per_second": 48.64,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3271597122141529,
      "eval_wikibio_token_set_f1_sem": 0.005313285045508303,
      "eval_wikibio_token_set_precision": 0.33182161640259683,
      "eval_wikibio_token_set_recall": 0.33768863930955123,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 173125
    },
    {
      "epoch": 33.24,
      "eval_nq_accuracy": 0.5326875,
      "eval_nq_bleu_score": 11.935532159876267,
      "eval_nq_bleu_score_sem": 0.47433546106004976,
      "eval_nq_emb_cos_sim": 0.8320028781890869,
      "eval_nq_emb_cos_sim_sem": 0.007365523976758122,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.14473295211792,
      "eval_nq_n_ngrams_match_1": 23.232,
      "eval_nq_n_ngrams_match_2": 8.596,
      "eval_nq_n_ngrams_match_3": 3.97,
      "eval_nq_num_pred_words": 49.198,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.539760407831926,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4508228084144806,
      "eval_nq_runtime": 10.5952,
      "eval_nq_samples_per_second": 47.191,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.4640798159498465,
      "eval_nq_token_set_f1_sem": 0.004961801434360699,
      "eval_nq_token_set_precision": 0.4216964755775736,
      "eval_nq_token_set_recall": 0.524348383098691,
      "eval_nq_true_num_tokens": 64.0,
      "step": 173125
    },
    {
      "epoch": 33.24,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 173136
    },
    {
      "epoch": 33.25,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 173148
    },
    {
      "epoch": 33.25,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 173160
    },
    {
      "epoch": 33.25,
      "learning_rate": 0.001,
      "loss": 2.5171,
      "step": 173172
    },
    {
      "epoch": 33.25,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 173184
    },
    {
      "epoch": 33.26,
      "learning_rate": 0.001,
      "loss": 2.5134,
      "step": 173196
    },
    {
      "epoch": 33.26,
      "learning_rate": 0.001,
      "loss": 2.531,
      "step": 173208
    },
    {
      "epoch": 33.26,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 173220
    },
    {
      "epoch": 33.26,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 173232
    },
    {
      "epoch": 33.26,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 173244
    },
    {
      "epoch": 33.27,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 173256
    },
    {
      "epoch": 33.27,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 173268
    },
    {
      "epoch": 33.27,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 173280
    },
    {
      "epoch": 33.27,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 173292
    },
    {
      "epoch": 33.28,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 173304
    },
    {
      "epoch": 33.28,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 173316
    },
    {
      "epoch": 33.28,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 173328
    },
    {
      "epoch": 33.28,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 173340
    },
    {
      "epoch": 33.29,
      "learning_rate": 0.001,
      "loss": 2.5146,
      "step": 173352
    },
    {
      "epoch": 33.29,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 173364
    },
    {
      "epoch": 33.29,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 173376
    },
    {
      "epoch": 33.29,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 173388
    },
    {
      "epoch": 33.29,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 173400
    },
    {
      "epoch": 33.3,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 173412
    },
    {
      "epoch": 33.3,
      "learning_rate": 0.001,
      "loss": 2.5163,
      "step": 173424
    },
    {
      "epoch": 33.3,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 173436
    },
    {
      "epoch": 33.3,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 173448
    },
    {
      "epoch": 33.31,
      "learning_rate": 0.001,
      "loss": 2.521,
      "step": 173460
    },
    {
      "epoch": 33.31,
      "learning_rate": 0.001,
      "loss": 2.523,
      "step": 173472
    },
    {
      "epoch": 33.31,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 173484
    },
    {
      "epoch": 33.31,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 173496
    },
    {
      "epoch": 33.32,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 173508
    },
    {
      "epoch": 33.32,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 173520
    },
    {
      "epoch": 33.32,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 173532
    },
    {
      "epoch": 33.32,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 173544
    },
    {
      "epoch": 33.32,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 173556
    },
    {
      "epoch": 33.33,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 173568
    },
    {
      "epoch": 33.33,
      "learning_rate": 0.001,
      "loss": 2.527,
      "step": 173580
    },
    {
      "epoch": 33.33,
      "learning_rate": 0.001,
      "loss": 2.525,
      "step": 173592
    },
    {
      "epoch": 33.33,
      "learning_rate": 0.001,
      "loss": 2.5186,
      "step": 173604
    },
    {
      "epoch": 33.34,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 173616
    },
    {
      "epoch": 33.34,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 173628
    },
    {
      "epoch": 33.34,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 173640
    },
    {
      "epoch": 33.34,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 173652
    },
    {
      "epoch": 33.35,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 173664
    },
    {
      "epoch": 33.35,
      "learning_rate": 0.001,
      "loss": 2.5134,
      "step": 173676
    },
    {
      "epoch": 33.35,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 173688
    },
    {
      "epoch": 33.35,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 173700
    },
    {
      "epoch": 33.35,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 173712
    },
    {
      "epoch": 33.36,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 173724
    },
    {
      "epoch": 33.36,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 173736
    },
    {
      "epoch": 33.36,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 173748
    },
    {
      "epoch": 33.36,
      "eval_ag_news_accuracy": 0.32675,
      "eval_ag_news_bleu_score": 4.965034493484933,
      "eval_ag_news_bleu_score_sem": 0.15584535953177964,
      "eval_ag_news_emb_cos_sim": 0.8194319009780884,
      "eval_ag_news_emb_cos_sim_sem": 0.006557308107179123,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.509377956390381,
      "eval_ag_news_n_ngrams_match_1": 14.434,
      "eval_ag_news_n_ngrams_match_2": 3.226,
      "eval_ag_news_n_ngrams_match_3": 0.918,
      "eval_ag_news_num_pred_words": 47.338,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.42746797258263,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35893226864744426,
      "eval_ag_news_runtime": 10.6405,
      "eval_ag_news_samples_per_second": 46.99,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.356872847590079,
      "eval_ag_news_token_set_f1_sem": 0.0041785754917403415,
      "eval_ag_news_token_set_precision": 0.34607354411306496,
      "eval_ag_news_token_set_recall": 0.3816919589951471,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 173750
    },
    {
      "epoch": 33.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.11678125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.134337231638774,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11876808220698369,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6827963590621948,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008412511621318956,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2089335918426514,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.942,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.738,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.884,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.752675689614016,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2180556985886582,
      "eval_anthropic_toxic_prompts_runtime": 10.4927,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.652,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36476377372728713,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006297880303976672,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44839266035232345,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3328900841915803,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 173750
    },
    {
      "epoch": 33.36,
      "eval_arxiv_accuracy": 0.3509375,
      "eval_arxiv_bleu_score": 4.434420430612437,
      "eval_arxiv_bleu_score_sem": 0.12258376722549405,
      "eval_arxiv_emb_cos_sim": 0.7793766856193542,
      "eval_arxiv_emb_cos_sim_sem": 0.006540039094546292,
      "eval_arxiv_emb_top1_equal": 0.328125,
      "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.35927677154541,
      "eval_arxiv_n_ngrams_match_1": 15.562,
      "eval_arxiv_n_ngrams_match_2": 3.024,
      "eval_arxiv_n_ngrams_match_3": 0.674,
      "eval_arxiv_num_pred_words": 41.224,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.76837724462782,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37095354797061697,
      "eval_arxiv_runtime": 10.351,
      "eval_arxiv_samples_per_second": 48.305,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3624521232998928,
      "eval_arxiv_token_set_f1_sem": 0.0041198916159214645,
      "eval_arxiv_token_set_precision": 0.3152315986668038,
      "eval_arxiv_token_set_recall": 0.4412362093246703,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 173750
    },
    {
      "epoch": 33.36,
      "eval_python_code_alpaca_accuracy": 0.16228125,
      "eval_python_code_alpaca_bleu_score": 4.8026209937932345,
      "eval_python_code_alpaca_bleu_score_sem": 0.15575429695317172,
      "eval_python_code_alpaca_emb_cos_sim": 0.7655423283576965,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007853884778809914,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8448219299316406,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.094,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.068,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.104,
      "eval_python_code_alpaca_num_pred_words": 44.546,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.198495858902863,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3385839956346942,
      "eval_python_code_alpaca_runtime": 10.2532,
      "eval_python_code_alpaca_samples_per_second": 48.765,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.48839037843729227,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005526841539587616,
      "eval_python_code_alpaca_token_set_precision": 0.5533478780694027,
      "eval_python_code_alpaca_token_set_recall": 0.45824260790696586,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 173750
    },
    {
      "epoch": 33.36,
      "eval_wikibio_accuracy": 0.326625,
      "eval_wikibio_bleu_score": 6.135505009485993,
      "eval_wikibio_bleu_score_sem": 0.20818576480427142,
      "eval_wikibio_emb_cos_sim": 0.7598357200622559,
      "eval_wikibio_emb_cos_sim_sem": 0.007382571718436271,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.7037172317504883,
      "eval_wikibio_n_ngrams_match_1": 10.618,
      "eval_wikibio_n_ngrams_match_2": 3.62,
      "eval_wikibio_n_ngrams_match_3": 1.302,
      "eval_wikibio_num_pred_words": 37.622,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.59793615725575,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3699478244719296,
      "eval_wikibio_runtime": 10.1056,
      "eval_wikibio_samples_per_second": 49.477,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3317120549497993,
      "eval_wikibio_token_set_f1_sem": 0.004876042724365511,
      "eval_wikibio_token_set_precision": 0.3431560172842182,
      "eval_wikibio_token_set_recall": 0.33365987201723285,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 173750
    },
    {
      "epoch": 33.36,
      "eval_nq_accuracy": 0.53240625,
      "eval_nq_bleu_score": 11.973528795799655,
      "eval_nq_bleu_score_sem": 0.4804154515319206,
      "eval_nq_emb_cos_sim": 0.8428808450698853,
      "eval_nq_emb_cos_sim_sem": 0.006753537906945273,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1437206268310547,
      "eval_nq_n_ngrams_match_1": 23.466,
      "eval_nq_n_ngrams_match_2": 8.726,
      "eval_nq_n_ngrams_match_3": 3.968,
      "eval_nq_num_pred_words": 49.554,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.531119766734959,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45280346244420744,
      "eval_nq_runtime": 11.604,
      "eval_nq_samples_per_second": 43.088,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4666227583664684,
      "eval_nq_token_set_f1_sem": 0.004900215885111777,
      "eval_nq_token_set_precision": 0.4270406572741068,
      "eval_nq_token_set_recall": 0.5212560592669406,
      "eval_nq_true_num_tokens": 64.0,
      "step": 173750
    },
    {
      "epoch": 33.36,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 173760
    },
    {
      "epoch": 33.37,
      "learning_rate": 0.001,
      "loss": 2.527,
      "step": 173772
    },
    {
      "epoch": 33.37,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 173784
    },
    {
      "epoch": 33.37,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 173796
    },
    {
      "epoch": 33.37,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 173808
    },
    {
      "epoch": 33.38,
      "learning_rate": 0.001,
      "loss": 2.5312,
      "step": 173820
    },
    {
      "epoch": 33.38,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 173832
    },
    {
      "epoch": 33.38,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 173844
    },
    {
      "epoch": 33.38,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 173856
    },
    {
      "epoch": 33.38,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 173868
    },
    {
      "epoch": 33.39,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 173880
    },
    {
      "epoch": 33.39,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 173892
    },
    {
      "epoch": 33.39,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 173904
    },
    {
      "epoch": 33.39,
      "learning_rate": 0.001,
      "loss": 2.5228,
      "step": 173916
    },
    {
      "epoch": 33.4,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 173928
    },
    {
      "epoch": 33.4,
      "learning_rate": 0.001,
      "loss": 2.5179,
      "step": 173940
    },
    {
      "epoch": 33.4,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 173952
    },
    {
      "epoch": 33.4,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 173964
    },
    {
      "epoch": 33.41,
      "learning_rate": 0.001,
      "loss": 2.5269,
      "step": 173976
    },
    {
      "epoch": 33.41,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 173988
    },
    {
      "epoch": 33.41,
      "learning_rate": 0.001,
      "loss": 2.5339,
      "step": 174000
    },
    {
      "epoch": 33.41,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 174012
    },
    {
      "epoch": 33.41,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 174024
    },
    {
      "epoch": 33.42,
      "learning_rate": 0.001,
      "loss": 2.5229,
      "step": 174036
    },
    {
      "epoch": 33.42,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 174048
    },
    {
      "epoch": 33.42,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 174060
    },
    {
      "epoch": 33.42,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 174072
    },
    {
      "epoch": 33.43,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 174084
    },
    {
      "epoch": 33.43,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 174096
    },
    {
      "epoch": 33.43,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 174108
    },
    {
      "epoch": 33.43,
      "learning_rate": 0.001,
      "loss": 2.5229,
      "step": 174120
    },
    {
      "epoch": 33.44,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 174132
    },
    {
      "epoch": 33.44,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 174144
    },
    {
      "epoch": 33.44,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 174156
    },
    {
      "epoch": 33.44,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 174168
    },
    {
      "epoch": 33.44,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 174180
    },
    {
      "epoch": 33.45,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 174192
    },
    {
      "epoch": 33.45,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 174204
    },
    {
      "epoch": 33.45,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 174216
    },
    {
      "epoch": 33.45,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 174228
    },
    {
      "epoch": 33.46,
      "learning_rate": 0.001,
      "loss": 2.5196,
      "step": 174240
    },
    {
      "epoch": 33.46,
      "learning_rate": 0.001,
      "loss": 2.5193,
      "step": 174252
    },
    {
      "epoch": 33.46,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 174264
    },
    {
      "epoch": 33.46,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 174276
    },
    {
      "epoch": 33.47,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 174288
    },
    {
      "epoch": 33.47,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 174300
    },
    {
      "epoch": 33.47,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 174312
    },
    {
      "epoch": 33.47,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 174324
    },
    {
      "epoch": 33.47,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 174336
    },
    {
      "epoch": 33.48,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 174348
    },
    {
      "epoch": 33.48,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 174360
    },
    {
      "epoch": 33.48,
      "learning_rate": 0.001,
      "loss": 2.5363,
      "step": 174372
    },
    {
      "epoch": 33.48,
      "eval_ag_news_accuracy": 0.32703125,
      "eval_ag_news_bleu_score": 4.906854796195355,
      "eval_ag_news_bleu_score_sem": 0.1534652775521715,
      "eval_ag_news_emb_cos_sim": 0.821617066860199,
      "eval_ag_news_emb_cos_sim_sem": 0.006243848823913444,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5055830478668213,
      "eval_ag_news_n_ngrams_match_1": 14.448,
      "eval_ag_news_n_ngrams_match_2": 3.238,
      "eval_ag_news_n_ngrams_match_3": 0.926,
      "eval_ag_news_num_pred_words": 46.97,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.300854185273344,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3578224676505867,
      "eval_ag_news_runtime": 10.4357,
      "eval_ag_news_samples_per_second": 47.912,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3567334792661159,
      "eval_ag_news_token_set_f1_sem": 0.004295701854635979,
      "eval_ag_news_token_set_precision": 0.3452890637362383,
      "eval_ag_news_token_set_recall": 0.38267987839055256,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 174375
    },
    {
      "epoch": 33.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.11584375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.165334139202723,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11743674992686823,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6743996143341064,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008732528690795126,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2037386894226074,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.244,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.906,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.942,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.624421377522857,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21463547049141593,
      "eval_anthropic_toxic_prompts_runtime": 10.1653,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.187,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3632020696418693,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006562672979982537,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4453997781712462,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33430944384037714,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 174375
    },
    {
      "epoch": 33.48,
      "eval_arxiv_accuracy": 0.34984375,
      "eval_arxiv_bleu_score": 4.493597327860133,
      "eval_arxiv_bleu_score_sem": 0.12371786867659083,
      "eval_arxiv_emb_cos_sim": 0.7805147171020508,
      "eval_arxiv_emb_cos_sim_sem": 0.006847012372952848,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3685925006866455,
      "eval_arxiv_n_ngrams_match_1": 15.398,
      "eval_arxiv_n_ngrams_match_2": 3.024,
      "eval_arxiv_n_ngrams_match_3": 0.694,
      "eval_arxiv_num_pred_words": 41.036,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.037627840491965,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3680955459281964,
      "eval_arxiv_runtime": 10.1808,
      "eval_arxiv_samples_per_second": 49.112,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.36035618965034755,
      "eval_arxiv_token_set_f1_sem": 0.004020412666983252,
      "eval_arxiv_token_set_precision": 0.31368359341177604,
      "eval_arxiv_token_set_recall": 0.4378062231189222,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 174375
    },
    {
      "epoch": 33.48,
      "eval_python_code_alpaca_accuracy": 0.16334375,
      "eval_python_code_alpaca_bleu_score": 4.86654463528897,
      "eval_python_code_alpaca_bleu_score_sem": 0.15655118955654437,
      "eval_python_code_alpaca_emb_cos_sim": 0.7589184045791626,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007545616801900665,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8595287799835205,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.852,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.054,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.082,
      "eval_python_code_alpaca_num_pred_words": 43.25,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.453300653915925,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3357447579434674,
      "eval_python_code_alpaca_runtime": 9.8106,
      "eval_python_code_alpaca_samples_per_second": 50.965,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4799576337029294,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005466198786460032,
      "eval_python_code_alpaca_token_set_precision": 0.5423816177033097,
      "eval_python_code_alpaca_token_set_recall": 0.4552521696149737,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 174375
    },
    {
      "epoch": 33.48,
      "eval_wikibio_accuracy": 0.325375,
      "eval_wikibio_bleu_score": 6.0644674525720985,
      "eval_wikibio_bleu_score_sem": 0.21806740005371358,
      "eval_wikibio_emb_cos_sim": 0.7400978803634644,
      "eval_wikibio_emb_cos_sim_sem": 0.010156600237715882,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6715779304504395,
      "eval_wikibio_n_ngrams_match_1": 9.984,
      "eval_wikibio_n_ngrams_match_2": 3.378,
      "eval_wikibio_n_ngrams_match_3": 1.254,
      "eval_wikibio_num_pred_words": 35.596,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.31389152947537,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3534394322253904,
      "eval_wikibio_runtime": 10.7865,
      "eval_wikibio_samples_per_second": 46.354,
      "eval_wikibio_steps_per_second": 0.093,
      "eval_wikibio_token_set_f1": 0.31820052923439396,
      "eval_wikibio_token_set_f1_sem": 0.005603772208673508,
      "eval_wikibio_token_set_precision": 0.32455164773925194,
      "eval_wikibio_token_set_recall": 0.32870129898104994,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 174375
    },
    {
      "epoch": 33.48,
      "eval_nq_accuracy": 0.53415625,
      "eval_nq_bleu_score": 12.095765188391509,
      "eval_nq_bleu_score_sem": 0.49374744347138455,
      "eval_nq_emb_cos_sim": 0.8349359631538391,
      "eval_nq_emb_cos_sim_sem": 0.007024838487898914,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1431634426116943,
      "eval_nq_n_ngrams_match_1": 23.498,
      "eval_nq_n_ngrams_match_2": 8.78,
      "eval_nq_n_ngrams_match_3": 4.044,
      "eval_nq_num_pred_words": 49.158,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.526367685442755,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45607291509430736,
      "eval_nq_runtime": 10.3651,
      "eval_nq_samples_per_second": 48.239,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4686542732313417,
      "eval_nq_token_set_f1_sem": 0.005069800159174794,
      "eval_nq_token_set_precision": 0.42843504562473606,
      "eval_nq_token_set_recall": 0.5235385607916649,
      "eval_nq_true_num_tokens": 64.0,
      "step": 174375
    },
    {
      "epoch": 33.48,
      "learning_rate": 0.001,
      "loss": 2.517,
      "step": 174384
    },
    {
      "epoch": 33.49,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 174396
    },
    {
      "epoch": 33.49,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 174408
    },
    {
      "epoch": 33.49,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 174420
    },
    {
      "epoch": 33.49,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 174432
    },
    {
      "epoch": 33.5,
      "learning_rate": 0.001,
      "loss": 2.5231,
      "step": 174444
    },
    {
      "epoch": 33.5,
      "learning_rate": 0.001,
      "loss": 2.5167,
      "step": 174456
    },
    {
      "epoch": 33.5,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 174468
    },
    {
      "epoch": 33.5,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 174480
    },
    {
      "epoch": 33.5,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 174492
    },
    {
      "epoch": 33.51,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 174504
    },
    {
      "epoch": 33.51,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 174516
    },
    {
      "epoch": 33.51,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 174528
    },
    {
      "epoch": 33.51,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 174540
    },
    {
      "epoch": 33.52,
      "learning_rate": 0.001,
      "loss": 2.5279,
      "step": 174552
    },
    {
      "epoch": 33.52,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 174564
    },
    {
      "epoch": 33.52,
      "learning_rate": 0.001,
      "loss": 2.5202,
      "step": 174576
    },
    {
      "epoch": 33.52,
      "learning_rate": 0.001,
      "loss": 2.5189,
      "step": 174588
    },
    {
      "epoch": 33.53,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 174600
    },
    {
      "epoch": 33.53,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 174612
    },
    {
      "epoch": 33.53,
      "learning_rate": 0.001,
      "loss": 2.5246,
      "step": 174624
    },
    {
      "epoch": 33.53,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 174636
    },
    {
      "epoch": 33.53,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 174648
    },
    {
      "epoch": 33.54,
      "learning_rate": 0.001,
      "loss": 2.5189,
      "step": 174660
    },
    {
      "epoch": 33.54,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 174672
    },
    {
      "epoch": 33.54,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 174684
    },
    {
      "epoch": 33.54,
      "learning_rate": 0.001,
      "loss": 2.5231,
      "step": 174696
    },
    {
      "epoch": 33.55,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 174708
    },
    {
      "epoch": 33.55,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 174720
    },
    {
      "epoch": 33.55,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 174732
    },
    {
      "epoch": 33.55,
      "learning_rate": 0.001,
      "loss": 2.5186,
      "step": 174744
    },
    {
      "epoch": 33.56,
      "learning_rate": 0.001,
      "loss": 2.5246,
      "step": 174756
    },
    {
      "epoch": 33.56,
      "learning_rate": 0.001,
      "loss": 2.5262,
      "step": 174768
    },
    {
      "epoch": 33.56,
      "learning_rate": 0.001,
      "loss": 2.527,
      "step": 174780
    },
    {
      "epoch": 33.56,
      "learning_rate": 0.001,
      "loss": 2.5262,
      "step": 174792
    },
    {
      "epoch": 33.56,
      "learning_rate": 0.001,
      "loss": 2.5246,
      "step": 174804
    },
    {
      "epoch": 33.57,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 174816
    },
    {
      "epoch": 33.57,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 174828
    },
    {
      "epoch": 33.57,
      "learning_rate": 0.001,
      "loss": 2.5201,
      "step": 174840
    },
    {
      "epoch": 33.57,
      "learning_rate": 0.001,
      "loss": 2.5164,
      "step": 174852
    },
    {
      "epoch": 33.58,
      "learning_rate": 0.001,
      "loss": 2.5302,
      "step": 174864
    },
    {
      "epoch": 33.58,
      "learning_rate": 0.001,
      "loss": 2.522,
      "step": 174876
    },
    {
      "epoch": 33.58,
      "learning_rate": 0.001,
      "loss": 2.5232,
      "step": 174888
    },
    {
      "epoch": 33.58,
      "learning_rate": 0.001,
      "loss": 2.5201,
      "step": 174900
    },
    {
      "epoch": 33.59,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 174912
    },
    {
      "epoch": 33.59,
      "learning_rate": 0.001,
      "loss": 2.5219,
      "step": 174924
    },
    {
      "epoch": 33.59,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 174936
    },
    {
      "epoch": 33.59,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 174948
    },
    {
      "epoch": 33.59,
      "learning_rate": 0.001,
      "loss": 2.5167,
      "step": 174960
    },
    {
      "epoch": 33.6,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 174972
    },
    {
      "epoch": 33.6,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 174984
    },
    {
      "epoch": 33.6,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 174996
    },
    {
      "epoch": 33.6,
      "eval_ag_news_accuracy": 0.327,
      "eval_ag_news_bleu_score": 5.029946715449138,
      "eval_ag_news_bleu_score_sem": 0.156727994509488,
      "eval_ag_news_emb_cos_sim": 0.8179185390472412,
      "eval_ag_news_emb_cos_sim_sem": 0.006700141801149018,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4984447956085205,
      "eval_ag_news_n_ngrams_match_1": 14.454,
      "eval_ag_news_n_ngrams_match_2": 3.238,
      "eval_ag_news_n_ngrams_match_3": 0.922,
      "eval_ag_news_num_pred_words": 46.614,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.063990689148795,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3587236885662435,
      "eval_ag_news_runtime": 10.2337,
      "eval_ag_news_samples_per_second": 48.858,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3615319419488924,
      "eval_ag_news_token_set_f1_sem": 0.00426277962036405,
      "eval_ag_news_token_set_precision": 0.3477667685348069,
      "eval_ag_news_token_set_recall": 0.39019556812546885,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 175000
    },
    {
      "epoch": 33.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.11571875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1958276110554644,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12273843427219648,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6793828010559082,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009504407391512655,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2424569129943848,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.29,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.964,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.728,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.772,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.59653300941431,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2163873068690737,
      "eval_anthropic_toxic_prompts_runtime": 9.8575,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.723,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3563779747433364,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006569863064058106,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44309226529630547,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3245975095000013,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 175000
    },
    {
      "epoch": 33.6,
      "eval_arxiv_accuracy": 0.3494375,
      "eval_arxiv_bleu_score": 4.624642327957384,
      "eval_arxiv_bleu_score_sem": 0.13825152891220188,
      "eval_arxiv_emb_cos_sim": 0.779121458530426,
      "eval_arxiv_emb_cos_sim_sem": 0.0069886106374257686,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3809525966644287,
      "eval_arxiv_n_ngrams_match_1": 15.512,
      "eval_arxiv_n_ngrams_match_2": 3.106,
      "eval_arxiv_n_ngrams_match_3": 0.766,
      "eval_arxiv_num_pred_words": 40.718,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 29.398762942228352,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3727240665286775,
      "eval_arxiv_runtime": 10.3388,
      "eval_arxiv_samples_per_second": 48.362,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.36257993640903136,
      "eval_arxiv_token_set_f1_sem": 0.004277633830519311,
      "eval_arxiv_token_set_precision": 0.31370067286713244,
      "eval_arxiv_token_set_recall": 0.44458743740003237,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 175000
    },
    {
      "epoch": 33.6,
      "eval_python_code_alpaca_accuracy": 0.16403125,
      "eval_python_code_alpaca_bleu_score": 4.810836075629939,
      "eval_python_code_alpaca_bleu_score_sem": 0.1453644665987554,
      "eval_python_code_alpaca_emb_cos_sim": 0.7554128170013428,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009746200568697115,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.856598138809204,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.954,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.094,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.08,
      "eval_python_code_alpaca_num_pred_words": 43.588,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.402226169440514,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33799230051064105,
      "eval_python_code_alpaca_runtime": 9.9203,
      "eval_python_code_alpaca_samples_per_second": 50.401,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4858620096599266,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005469811336237878,
      "eval_python_code_alpaca_token_set_precision": 0.5479346926760923,
      "eval_python_code_alpaca_token_set_recall": 0.46111429326864967,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 175000
    },
    {
      "epoch": 33.6,
      "eval_wikibio_accuracy": 0.3253125,
      "eval_wikibio_bleu_score": 6.021814688120567,
      "eval_wikibio_bleu_score_sem": 0.20294861340148074,
      "eval_wikibio_emb_cos_sim": 0.7521679401397705,
      "eval_wikibio_emb_cos_sim_sem": 0.008066229162686404,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6546378135681152,
      "eval_wikibio_n_ngrams_match_1": 10.244,
      "eval_wikibio_n_ngrams_match_2": 3.438,
      "eval_wikibio_n_ngrams_match_3": 1.236,
      "eval_wikibio_num_pred_words": 35.88,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.65351879953629,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36374613330735894,
      "eval_wikibio_runtime": 9.9831,
      "eval_wikibio_samples_per_second": 50.085,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.32760757770848686,
      "eval_wikibio_token_set_f1_sem": 0.005100098169329544,
      "eval_wikibio_token_set_precision": 0.33468626927912914,
      "eval_wikibio_token_set_recall": 0.3363332916495507,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 175000
    },
    {
      "epoch": 33.6,
      "eval_nq_accuracy": 0.5323125,
      "eval_nq_bleu_score": 12.059236473751525,
      "eval_nq_bleu_score_sem": 0.5007571574064967,
      "eval_nq_emb_cos_sim": 0.838355541229248,
      "eval_nq_emb_cos_sim_sem": 0.006863833575320251,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1420786380767822,
      "eval_nq_n_ngrams_match_1": 23.284,
      "eval_nq_n_ngrams_match_2": 8.682,
      "eval_nq_n_ngrams_match_3": 4.052,
      "eval_nq_num_pred_words": 49.132,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.517123258216118,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45210621431924725,
      "eval_nq_runtime": 11.0339,
      "eval_nq_samples_per_second": 45.315,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4652564768416831,
      "eval_nq_token_set_f1_sem": 0.005004650064946198,
      "eval_nq_token_set_precision": 0.42371902368203906,
      "eval_nq_token_set_recall": 0.5252669319679266,
      "eval_nq_true_num_tokens": 64.0,
      "step": 175000
    },
    {
      "epoch": 33.6,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 175008
    },
    {
      "epoch": 33.61,
      "learning_rate": 0.001,
      "loss": 2.5273,
      "step": 175020
    },
    {
      "epoch": 33.61,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 175032
    },
    {
      "epoch": 33.61,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 175044
    },
    {
      "epoch": 33.61,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 175056
    },
    {
      "epoch": 33.62,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 175068
    },
    {
      "epoch": 33.62,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 175080
    },
    {
      "epoch": 33.62,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 175092
    },
    {
      "epoch": 33.62,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 175104
    },
    {
      "epoch": 33.62,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 175116
    },
    {
      "epoch": 33.63,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 175128
    },
    {
      "epoch": 33.63,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 175140
    },
    {
      "epoch": 33.63,
      "learning_rate": 0.001,
      "loss": 2.5278,
      "step": 175152
    },
    {
      "epoch": 33.63,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 175164
    },
    {
      "epoch": 33.64,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 175176
    },
    {
      "epoch": 33.64,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 175188
    },
    {
      "epoch": 33.64,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 175200
    },
    {
      "epoch": 33.64,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 175212
    },
    {
      "epoch": 33.65,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 175224
    },
    {
      "epoch": 33.65,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 175236
    },
    {
      "epoch": 33.65,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 175248
    },
    {
      "epoch": 33.65,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 175260
    },
    {
      "epoch": 33.65,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 175272
    },
    {
      "epoch": 33.66,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 175284
    },
    {
      "epoch": 33.66,
      "learning_rate": 0.001,
      "loss": 2.5171,
      "step": 175296
    },
    {
      "epoch": 33.66,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 175308
    },
    {
      "epoch": 33.66,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 175320
    },
    {
      "epoch": 33.67,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 175332
    },
    {
      "epoch": 33.67,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 175344
    },
    {
      "epoch": 33.67,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 175356
    },
    {
      "epoch": 33.67,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 175368
    },
    {
      "epoch": 33.68,
      "learning_rate": 0.001,
      "loss": 2.5251,
      "step": 175380
    },
    {
      "epoch": 33.68,
      "learning_rate": 0.001,
      "loss": 2.5294,
      "step": 175392
    },
    {
      "epoch": 33.68,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 175404
    },
    {
      "epoch": 33.68,
      "learning_rate": 0.001,
      "loss": 2.525,
      "step": 175416
    },
    {
      "epoch": 33.68,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 175428
    },
    {
      "epoch": 33.69,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 175440
    },
    {
      "epoch": 33.69,
      "learning_rate": 0.001,
      "loss": 2.519,
      "step": 175452
    },
    {
      "epoch": 33.69,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 175464
    },
    {
      "epoch": 33.69,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 175476
    },
    {
      "epoch": 33.7,
      "learning_rate": 0.001,
      "loss": 2.5262,
      "step": 175488
    },
    {
      "epoch": 33.7,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 175500
    },
    {
      "epoch": 33.7,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 175512
    },
    {
      "epoch": 33.7,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 175524
    },
    {
      "epoch": 33.71,
      "learning_rate": 0.001,
      "loss": 2.5276,
      "step": 175536
    },
    {
      "epoch": 33.71,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 175548
    },
    {
      "epoch": 33.71,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 175560
    },
    {
      "epoch": 33.71,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 175572
    },
    {
      "epoch": 33.71,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 175584
    },
    {
      "epoch": 33.72,
      "learning_rate": 0.001,
      "loss": 2.5206,
      "step": 175596
    },
    {
      "epoch": 33.72,
      "learning_rate": 0.001,
      "loss": 2.5264,
      "step": 175608
    },
    {
      "epoch": 33.72,
      "learning_rate": 0.001,
      "loss": 2.5301,
      "step": 175620
    },
    {
      "epoch": 33.72,
      "eval_ag_news_accuracy": 0.32684375,
      "eval_ag_news_bleu_score": 4.855373379192692,
      "eval_ag_news_bleu_score_sem": 0.14698318106273012,
      "eval_ag_news_emb_cos_sim": 0.8197398781776428,
      "eval_ag_news_emb_cos_sim_sem": 0.006081692116990319,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4985544681549072,
      "eval_ag_news_n_ngrams_match_1": 14.406,
      "eval_ag_news_n_ngrams_match_2": 3.152,
      "eval_ag_news_n_ngrams_match_3": 0.878,
      "eval_ag_news_num_pred_words": 46.648,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.06761710005661,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.359196732256724,
      "eval_ag_news_runtime": 10.7933,
      "eval_ag_news_samples_per_second": 46.325,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.3588689648036829,
      "eval_ag_news_token_set_f1_sem": 0.004311211936397491,
      "eval_ag_news_token_set_precision": 0.34603148979227266,
      "eval_ag_news_token_set_recall": 0.3859730557537426,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 175625
    },
    {
      "epoch": 33.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.11371875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.301903666975883,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12393022775966903,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.683284342288971,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008835675858535952,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.238818883895874,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.312,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.78,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.702,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.503581260608513,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21895779495330242,
      "eval_anthropic_toxic_prompts_runtime": 9.8158,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.938,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3550808478742661,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006314207547264958,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4456959496262504,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3198729423060755,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 175625
    },
    {
      "epoch": 33.72,
      "eval_arxiv_accuracy": 0.34953125,
      "eval_arxiv_bleu_score": 4.331818024119469,
      "eval_arxiv_bleu_score_sem": 0.12788145618654687,
      "eval_arxiv_emb_cos_sim": 0.7702836394309998,
      "eval_arxiv_emb_cos_sim_sem": 0.007456664561029406,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.35262393951416,
      "eval_arxiv_n_ngrams_match_1": 15.19,
      "eval_arxiv_n_ngrams_match_2": 2.916,
      "eval_arxiv_n_ngrams_match_3": 0.644,
      "eval_arxiv_num_pred_words": 40.098,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.57762130021002,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36624769462691664,
      "eval_arxiv_runtime": 9.9939,
      "eval_arxiv_samples_per_second": 50.031,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.357499176963368,
      "eval_arxiv_token_set_f1_sem": 0.004310979759387343,
      "eval_arxiv_token_set_precision": 0.3097404851695088,
      "eval_arxiv_token_set_recall": 0.4439020351908504,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 175625
    },
    {
      "epoch": 33.72,
      "eval_python_code_alpaca_accuracy": 0.163375,
      "eval_python_code_alpaca_bleu_score": 4.969876834086027,
      "eval_python_code_alpaca_bleu_score_sem": 0.15598222238486606,
      "eval_python_code_alpaca_emb_cos_sim": 0.7668463587760925,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008543491514145162,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.864105463027954,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.132,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.104,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.106,
      "eval_python_code_alpaca_num_pred_words": 43.472,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.53336194692209,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3447611111215496,
      "eval_python_code_alpaca_runtime": 10.0089,
      "eval_python_code_alpaca_samples_per_second": 49.956,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.48897683964688116,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005347935129504301,
      "eval_python_code_alpaca_token_set_precision": 0.5561481516871388,
      "eval_python_code_alpaca_token_set_recall": 0.4571091325355401,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 175625
    },
    {
      "epoch": 33.72,
      "eval_wikibio_accuracy": 0.325375,
      "eval_wikibio_bleu_score": 6.221671890476333,
      "eval_wikibio_bleu_score_sem": 0.22448468653591203,
      "eval_wikibio_emb_cos_sim": 0.7493537068367004,
      "eval_wikibio_emb_cos_sim_sem": 0.008246055277303212,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6775736808776855,
      "eval_wikibio_n_ngrams_match_1": 10.252,
      "eval_wikibio_n_ngrams_match_2": 3.5,
      "eval_wikibio_n_ngrams_match_3": 1.326,
      "eval_wikibio_num_pred_words": 35.868,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.550315873823486,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36321856530567737,
      "eval_wikibio_runtime": 9.9853,
      "eval_wikibio_samples_per_second": 50.073,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3252007598847834,
      "eval_wikibio_token_set_f1_sem": 0.005536074977434512,
      "eval_wikibio_token_set_precision": 0.3323976521740242,
      "eval_wikibio_token_set_recall": 0.33309783714640956,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 175625
    },
    {
      "epoch": 33.72,
      "eval_nq_accuracy": 0.533,
      "eval_nq_bleu_score": 12.243123045485252,
      "eval_nq_bleu_score_sem": 0.4840684804050873,
      "eval_nq_emb_cos_sim": 0.8390569686889648,
      "eval_nq_emb_cos_sim_sem": 0.007764245017472323,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1423661708831787,
      "eval_nq_n_ngrams_match_1": 23.632,
      "eval_nq_n_ngrams_match_2": 8.802,
      "eval_nq_n_ngrams_match_3": 4.058,
      "eval_nq_num_pred_words": 49.206,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.519572562679796,
      "eval_nq_pred_num_tokens": 62.9921875,
      "eval_nq_rouge_score": 0.4584658117502919,
      "eval_nq_runtime": 10.4933,
      "eval_nq_samples_per_second": 47.649,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.4710025770467909,
      "eval_nq_token_set_f1_sem": 0.004918792561906478,
      "eval_nq_token_set_precision": 0.42980353589918835,
      "eval_nq_token_set_recall": 0.5285762419652889,
      "eval_nq_true_num_tokens": 64.0,
      "step": 175625
    },
    {
      "epoch": 33.72,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 175632
    },
    {
      "epoch": 33.73,
      "learning_rate": 0.001,
      "loss": 2.522,
      "step": 175644
    },
    {
      "epoch": 33.73,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 175656
    },
    {
      "epoch": 33.73,
      "learning_rate": 0.001,
      "loss": 2.5232,
      "step": 175668
    },
    {
      "epoch": 33.73,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 175680
    },
    {
      "epoch": 33.74,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 175692
    },
    {
      "epoch": 33.74,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 175704
    },
    {
      "epoch": 33.74,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 175716
    },
    {
      "epoch": 33.74,
      "learning_rate": 0.001,
      "loss": 2.5267,
      "step": 175728
    },
    {
      "epoch": 33.74,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 175740
    },
    {
      "epoch": 33.75,
      "learning_rate": 0.001,
      "loss": 2.5208,
      "step": 175752
    },
    {
      "epoch": 33.75,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 175764
    },
    {
      "epoch": 33.75,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 175776
    },
    {
      "epoch": 33.75,
      "learning_rate": 0.001,
      "loss": 2.5247,
      "step": 175788
    },
    {
      "epoch": 33.76,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 175800
    },
    {
      "epoch": 33.76,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 175812
    },
    {
      "epoch": 33.76,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 175824
    },
    {
      "epoch": 33.76,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 175836
    },
    {
      "epoch": 33.76,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 175848
    },
    {
      "epoch": 33.77,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 175860
    },
    {
      "epoch": 33.77,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 175872
    },
    {
      "epoch": 33.77,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 175884
    },
    {
      "epoch": 33.77,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 175896
    },
    {
      "epoch": 33.78,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 175908
    },
    {
      "epoch": 33.78,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 175920
    },
    {
      "epoch": 33.78,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 175932
    },
    {
      "epoch": 33.78,
      "learning_rate": 0.001,
      "loss": 2.5248,
      "step": 175944
    },
    {
      "epoch": 33.79,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 175956
    },
    {
      "epoch": 33.79,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 175968
    },
    {
      "epoch": 33.79,
      "learning_rate": 0.001,
      "loss": 2.5243,
      "step": 175980
    },
    {
      "epoch": 33.79,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 175992
    },
    {
      "epoch": 33.79,
      "learning_rate": 0.001,
      "loss": 2.5314,
      "step": 176004
    },
    {
      "epoch": 33.8,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 176016
    },
    {
      "epoch": 33.8,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 176028
    },
    {
      "epoch": 33.8,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 176040
    },
    {
      "epoch": 33.8,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 176052
    },
    {
      "epoch": 33.81,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 176064
    },
    {
      "epoch": 33.81,
      "learning_rate": 0.001,
      "loss": 2.5202,
      "step": 176076
    },
    {
      "epoch": 33.81,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 176088
    },
    {
      "epoch": 33.81,
      "learning_rate": 0.001,
      "loss": 2.5224,
      "step": 176100
    },
    {
      "epoch": 33.82,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 176112
    },
    {
      "epoch": 33.82,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 176124
    },
    {
      "epoch": 33.82,
      "learning_rate": 0.001,
      "loss": 2.5163,
      "step": 176136
    },
    {
      "epoch": 33.82,
      "learning_rate": 0.001,
      "loss": 2.5309,
      "step": 176148
    },
    {
      "epoch": 33.82,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 176160
    },
    {
      "epoch": 33.83,
      "learning_rate": 0.001,
      "loss": 2.5286,
      "step": 176172
    },
    {
      "epoch": 33.83,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 176184
    },
    {
      "epoch": 33.83,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 176196
    },
    {
      "epoch": 33.83,
      "learning_rate": 0.001,
      "loss": 2.524,
      "step": 176208
    },
    {
      "epoch": 33.84,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 176220
    },
    {
      "epoch": 33.84,
      "learning_rate": 0.001,
      "loss": 2.5331,
      "step": 176232
    },
    {
      "epoch": 33.84,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 176244
    },
    {
      "epoch": 33.84,
      "eval_ag_news_accuracy": 0.32653125,
      "eval_ag_news_bleu_score": 5.022316794581068,
      "eval_ag_news_bleu_score_sem": 0.1610563486577137,
      "eval_ag_news_emb_cos_sim": 0.8117334246635437,
      "eval_ag_news_emb_cos_sim_sem": 0.007804921336096892,
      "eval_ag_news_emb_top1_equal": 0.15625,
      "eval_ag_news_emb_top1_equal_sem": 0.03221922156442571,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4926795959472656,
      "eval_ag_news_n_ngrams_match_1": 14.332,
      "eval_ag_news_n_ngrams_match_2": 3.17,
      "eval_ag_news_n_ngrams_match_3": 0.962,
      "eval_ag_news_num_pred_words": 46.34,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.87391860943298,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35827660389767435,
      "eval_ag_news_runtime": 10.3068,
      "eval_ag_news_samples_per_second": 48.512,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3569641546797145,
      "eval_ag_news_token_set_f1_sem": 0.004364326287541326,
      "eval_ag_news_token_set_precision": 0.34392343268458236,
      "eval_ag_news_token_set_recall": 0.38552168292441336,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 176250
    },
    {
      "epoch": 33.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.11565625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.101782117487336,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11736410045728099,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6723465919494629,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008566576133327989,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.19783353805542,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.206,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.898,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.23,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.479438934898205,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21430135132486505,
      "eval_anthropic_toxic_prompts_runtime": 10.1328,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.345,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35964724783009494,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006470879887319756,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4402183209964053,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32932754974213424,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 176250
    },
    {
      "epoch": 33.84,
      "eval_arxiv_accuracy": 0.349875,
      "eval_arxiv_bleu_score": 4.333335033387184,
      "eval_arxiv_bleu_score_sem": 0.13070468884873127,
      "eval_arxiv_emb_cos_sim": 0.7708592414855957,
      "eval_arxiv_emb_cos_sim_sem": 0.008004529469188995,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.363433599472046,
      "eval_arxiv_n_ngrams_match_1": 15.024,
      "eval_arxiv_n_ngrams_match_2": 2.974,
      "eval_arxiv_n_ngrams_match_3": 0.684,
      "eval_arxiv_num_pred_words": 39.702,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.88821133084713,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3638282134047197,
      "eval_arxiv_runtime": 10.4254,
      "eval_arxiv_samples_per_second": 47.96,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.3550458552555655,
      "eval_arxiv_token_set_f1_sem": 0.004273149787825975,
      "eval_arxiv_token_set_precision": 0.3055640869524656,
      "eval_arxiv_token_set_recall": 0.4463338959390171,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 176250
    },
    {
      "epoch": 33.84,
      "eval_python_code_alpaca_accuracy": 0.1631875,
      "eval_python_code_alpaca_bleu_score": 4.882627397241164,
      "eval_python_code_alpaca_bleu_score_sem": 0.1474335988021144,
      "eval_python_code_alpaca_emb_cos_sim": 0.7622191905975342,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008958193454434582,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.862119197845459,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.102,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.11,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.114,
      "eval_python_code_alpaca_num_pred_words": 43.886,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.498570604409352,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34009863909271487,
      "eval_python_code_alpaca_runtime": 10.0054,
      "eval_python_code_alpaca_samples_per_second": 49.973,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4912797406295639,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005359606041822242,
      "eval_python_code_alpaca_token_set_precision": 0.5515130652188471,
      "eval_python_code_alpaca_token_set_recall": 0.46187727605691214,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 176250
    },
    {
      "epoch": 33.84,
      "eval_wikibio_accuracy": 0.32375,
      "eval_wikibio_bleu_score": 5.862255681674758,
      "eval_wikibio_bleu_score_sem": 0.22913003328137857,
      "eval_wikibio_emb_cos_sim": 0.7266998291015625,
      "eval_wikibio_emb_cos_sim_sem": 0.010587300858282231,
      "eval_wikibio_emb_top1_equal": 0.140625,
      "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6650583744049072,
      "eval_wikibio_n_ngrams_match_1": 9.756,
      "eval_wikibio_n_ngrams_match_2": 3.254,
      "eval_wikibio_n_ngrams_match_3": 1.182,
      "eval_wikibio_num_pred_words": 34.968,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.05841610835043,
      "eval_wikibio_pred_num_tokens": 62.984375,
      "eval_wikibio_rouge_score": 0.346696465532531,
      "eval_wikibio_runtime": 10.0086,
      "eval_wikibio_samples_per_second": 49.957,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3127898146298991,
      "eval_wikibio_token_set_f1_sem": 0.005959211422264519,
      "eval_wikibio_token_set_precision": 0.31750633023827374,
      "eval_wikibio_token_set_recall": 0.32692533528118073,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 176250
    },
    {
      "epoch": 33.84,
      "eval_nq_accuracy": 0.53265625,
      "eval_nq_bleu_score": 12.032671736687595,
      "eval_nq_bleu_score_sem": 0.4864826348076925,
      "eval_nq_emb_cos_sim": 0.8336796760559082,
      "eval_nq_emb_cos_sim_sem": 0.007405400163805492,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.142033100128174,
      "eval_nq_n_ngrams_match_1": 23.406,
      "eval_nq_n_ngrams_match_2": 8.724,
      "eval_nq_n_ngrams_match_3": 4.014,
      "eval_nq_num_pred_words": 49.216,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.51673541472576,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4559461107007099,
      "eval_nq_runtime": 10.4438,
      "eval_nq_samples_per_second": 47.875,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46876594569524405,
      "eval_nq_token_set_f1_sem": 0.004906758692664311,
      "eval_nq_token_set_precision": 0.42689880034459693,
      "eval_nq_token_set_recall": 0.5284977305367066,
      "eval_nq_true_num_tokens": 64.0,
      "step": 176250
    },
    {
      "epoch": 33.84,
      "learning_rate": 0.001,
      "loss": 2.5226,
      "step": 176256
    },
    {
      "epoch": 33.85,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 176268
    },
    {
      "epoch": 33.85,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 176280
    },
    {
      "epoch": 33.85,
      "learning_rate": 0.001,
      "loss": 2.523,
      "step": 176292
    },
    {
      "epoch": 33.85,
      "learning_rate": 0.001,
      "loss": 2.5268,
      "step": 176304
    },
    {
      "epoch": 33.85,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 176316
    },
    {
      "epoch": 33.86,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 176328
    },
    {
      "epoch": 33.86,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 176340
    },
    {
      "epoch": 33.86,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 176352
    },
    {
      "epoch": 33.86,
      "learning_rate": 0.001,
      "loss": 2.5274,
      "step": 176364
    },
    {
      "epoch": 33.87,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 176376
    },
    {
      "epoch": 33.87,
      "learning_rate": 0.001,
      "loss": 2.5209,
      "step": 176388
    },
    {
      "epoch": 33.87,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 176400
    },
    {
      "epoch": 33.87,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 176412
    },
    {
      "epoch": 33.88,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 176424
    },
    {
      "epoch": 33.88,
      "learning_rate": 0.001,
      "loss": 2.5167,
      "step": 176436
    },
    {
      "epoch": 33.88,
      "learning_rate": 0.001,
      "loss": 2.5321,
      "step": 176448
    },
    {
      "epoch": 33.88,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 176460
    },
    {
      "epoch": 33.88,
      "learning_rate": 0.001,
      "loss": 2.5189,
      "step": 176472
    },
    {
      "epoch": 33.89,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 176484
    },
    {
      "epoch": 33.89,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 176496
    },
    {
      "epoch": 33.89,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 176508
    },
    {
      "epoch": 33.89,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 176520
    },
    {
      "epoch": 33.9,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 176532
    },
    {
      "epoch": 33.9,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 176544
    },
    {
      "epoch": 33.9,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 176556
    },
    {
      "epoch": 33.9,
      "learning_rate": 0.001,
      "loss": 2.5261,
      "step": 176568
    },
    {
      "epoch": 33.91,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 176580
    },
    {
      "epoch": 33.91,
      "learning_rate": 0.001,
      "loss": 2.5228,
      "step": 176592
    },
    {
      "epoch": 33.91,
      "learning_rate": 0.001,
      "loss": 2.5232,
      "step": 176604
    },
    {
      "epoch": 33.91,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 176616
    },
    {
      "epoch": 33.91,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 176628
    },
    {
      "epoch": 33.92,
      "learning_rate": 0.001,
      "loss": 2.5224,
      "step": 176640
    },
    {
      "epoch": 33.92,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 176652
    },
    {
      "epoch": 33.92,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 176664
    },
    {
      "epoch": 33.92,
      "learning_rate": 0.001,
      "loss": 2.5281,
      "step": 176676
    },
    {
      "epoch": 33.93,
      "learning_rate": 0.001,
      "loss": 2.5231,
      "step": 176688
    },
    {
      "epoch": 33.93,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 176700
    },
    {
      "epoch": 33.93,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 176712
    },
    {
      "epoch": 33.93,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 176724
    },
    {
      "epoch": 33.94,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 176736
    },
    {
      "epoch": 33.94,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 176748
    },
    {
      "epoch": 33.94,
      "learning_rate": 0.001,
      "loss": 2.5285,
      "step": 176760
    },
    {
      "epoch": 33.94,
      "learning_rate": 0.001,
      "loss": 2.5292,
      "step": 176772
    },
    {
      "epoch": 33.94,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 176784
    },
    {
      "epoch": 33.95,
      "learning_rate": 0.001,
      "loss": 2.5315,
      "step": 176796
    },
    {
      "epoch": 33.95,
      "learning_rate": 0.001,
      "loss": 2.5208,
      "step": 176808
    },
    {
      "epoch": 33.95,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 176820
    },
    {
      "epoch": 33.95,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 176832
    },
    {
      "epoch": 33.96,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 176844
    },
    {
      "epoch": 33.96,
      "learning_rate": 0.001,
      "loss": 2.5233,
      "step": 176856
    },
    {
      "epoch": 33.96,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 176868
    },
    {
      "epoch": 33.96,
      "eval_ag_news_accuracy": 0.3281875,
      "eval_ag_news_bleu_score": 4.925127791586091,
      "eval_ag_news_bleu_score_sem": 0.15694905014873736,
      "eval_ag_news_emb_cos_sim": 0.8147540092468262,
      "eval_ag_news_emb_cos_sim_sem": 0.006866652145814417,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.496150016784668,
      "eval_ag_news_n_ngrams_match_1": 14.384,
      "eval_ag_news_n_ngrams_match_2": 3.19,
      "eval_ag_news_n_ngrams_match_3": 0.906,
      "eval_ag_news_num_pred_words": 46.64,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.988203134579116,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3561577784761314,
      "eval_ag_news_runtime": 10.2941,
      "eval_ag_news_samples_per_second": 48.572,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3600030201437264,
      "eval_ag_news_token_set_f1_sem": 0.00448956066480172,
      "eval_ag_news_token_set_precision": 0.3439573073316057,
      "eval_ag_news_token_set_recall": 0.39386164250470707,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 176875
    },
    {
      "epoch": 33.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.1148125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1461440631615756,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1162112442430475,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6758161783218384,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008089936106429272,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2554593086242676,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.216,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.86,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.931522370968956,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21552557670242806,
      "eval_anthropic_toxic_prompts_runtime": 10.0285,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.858,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36147746707622597,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006401626003123946,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4391229271077412,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33594247230708824,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 176875
    },
    {
      "epoch": 33.96,
      "eval_arxiv_accuracy": 0.3485,
      "eval_arxiv_bleu_score": 4.475956653190206,
      "eval_arxiv_bleu_score_sem": 0.1307632635874717,
      "eval_arxiv_emb_cos_sim": 0.7735968232154846,
      "eval_arxiv_emb_cos_sim_sem": 0.007195954692853144,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3651084899902344,
      "eval_arxiv_n_ngrams_match_1": 15.386,
      "eval_arxiv_n_ngrams_match_2": 3.042,
      "eval_arxiv_n_ngrams_match_3": 0.698,
      "eval_arxiv_num_pred_words": 40.688,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.936636464170498,
      "eval_arxiv_pred_num_tokens": 62.9921875,
      "eval_arxiv_rouge_score": 0.36923529217214174,
      "eval_arxiv_runtime": 10.7478,
      "eval_arxiv_samples_per_second": 46.521,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.36256383516191165,
      "eval_arxiv_token_set_f1_sem": 0.004078160752144186,
      "eval_arxiv_token_set_precision": 0.3135278337275999,
      "eval_arxiv_token_set_recall": 0.4472464725676267,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 176875
    },
    {
      "epoch": 33.96,
      "eval_python_code_alpaca_accuracy": 0.1638125,
      "eval_python_code_alpaca_bleu_score": 4.7387667116625645,
      "eval_python_code_alpaca_bleu_score_sem": 0.14172375487910976,
      "eval_python_code_alpaca_emb_cos_sim": 0.7565352916717529,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009347629676130734,
      "eval_python_code_alpaca_emb_top1_equal": 0.078125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.86161208152771,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.958,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.994,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.052,
      "eval_python_code_alpaca_num_pred_words": 44.364,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.489699043365377,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3354474476398809,
      "eval_python_code_alpaca_runtime": 9.7477,
      "eval_python_code_alpaca_samples_per_second": 51.294,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.4876155805606222,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0053307150896694236,
      "eval_python_code_alpaca_token_set_precision": 0.5457988485903871,
      "eval_python_code_alpaca_token_set_recall": 0.4602532240033791,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 176875
    },
    {
      "epoch": 33.96,
      "eval_wikibio_accuracy": 0.3266875,
      "eval_wikibio_bleu_score": 5.686736670012528,
      "eval_wikibio_bleu_score_sem": 0.20163567985024022,
      "eval_wikibio_emb_cos_sim": 0.7425272464752197,
      "eval_wikibio_emb_cos_sim_sem": 0.010202726670688516,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.669208288192749,
      "eval_wikibio_n_ngrams_match_1": 9.97,
      "eval_wikibio_n_ngrams_match_2": 3.244,
      "eval_wikibio_n_ngrams_match_3": 1.132,
      "eval_wikibio_num_pred_words": 35.54,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.22084196142814,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35275426820844197,
      "eval_wikibio_runtime": 10.0048,
      "eval_wikibio_samples_per_second": 49.976,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.316749368723803,
      "eval_wikibio_token_set_f1_sem": 0.0054844982113824185,
      "eval_wikibio_token_set_precision": 0.32155038904725625,
      "eval_wikibio_token_set_recall": 0.3311863377336578,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 176875
    },
    {
      "epoch": 33.96,
      "eval_nq_accuracy": 0.5338125,
      "eval_nq_bleu_score": 11.958889977560315,
      "eval_nq_bleu_score_sem": 0.49241591516146427,
      "eval_nq_emb_cos_sim": 0.826998233795166,
      "eval_nq_emb_cos_sim_sem": 0.007967327631388542,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1436386108398438,
      "eval_nq_n_ngrams_match_1": 23.222,
      "eval_nq_n_ngrams_match_2": 8.592,
      "eval_nq_n_ngrams_match_3": 4.026,
      "eval_nq_num_pred_words": 48.864,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.53042010718318,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45182487939989135,
      "eval_nq_runtime": 10.4291,
      "eval_nq_samples_per_second": 47.943,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46485089366770055,
      "eval_nq_token_set_f1_sem": 0.005049191163962152,
      "eval_nq_token_set_precision": 0.42159101839303115,
      "eval_nq_token_set_recall": 0.5278695553695189,
      "eval_nq_true_num_tokens": 64.0,
      "step": 176875
    },
    {
      "epoch": 33.96,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 176880
    },
    {
      "epoch": 33.97,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 176892
    },
    {
      "epoch": 33.97,
      "learning_rate": 0.001,
      "loss": 2.5186,
      "step": 176904
    },
    {
      "epoch": 33.97,
      "learning_rate": 0.001,
      "loss": 2.5316,
      "step": 176916
    },
    {
      "epoch": 33.97,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 176928
    },
    {
      "epoch": 33.97,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 176940
    },
    {
      "epoch": 33.98,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 176952
    },
    {
      "epoch": 33.98,
      "learning_rate": 0.001,
      "loss": 2.5273,
      "step": 176964
    },
    {
      "epoch": 33.98,
      "learning_rate": 0.001,
      "loss": 2.5193,
      "step": 176976
    },
    {
      "epoch": 33.98,
      "learning_rate": 0.001,
      "loss": 2.5289,
      "step": 176988
    },
    {
      "epoch": 33.99,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 177000
    },
    {
      "epoch": 33.99,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 177012
    },
    {
      "epoch": 33.99,
      "learning_rate": 0.001,
      "loss": 2.5223,
      "step": 177024
    },
    {
      "epoch": 33.99,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 177036
    },
    {
      "epoch": 34.0,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 177048
    },
    {
      "epoch": 34.0,
      "learning_rate": 0.001,
      "loss": 2.5228,
      "step": 177060
    },
    {
      "epoch": 34.0,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 177072
    },
    {
      "epoch": 34.0,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 177084
    },
    {
      "epoch": 34.0,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 177096
    },
    {
      "epoch": 34.01,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 177108
    },
    {
      "epoch": 34.01,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 177120
    },
    {
      "epoch": 34.01,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 177132
    },
    {
      "epoch": 34.01,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 177144
    },
    {
      "epoch": 34.02,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 177156
    },
    {
      "epoch": 34.02,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 177168
    },
    {
      "epoch": 34.02,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 177180
    },
    {
      "epoch": 34.02,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 177192
    },
    {
      "epoch": 34.03,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 177204
    },
    {
      "epoch": 34.03,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 177216
    },
    {
      "epoch": 34.03,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 177228
    },
    {
      "epoch": 34.03,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 177240
    },
    {
      "epoch": 34.03,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 177252
    },
    {
      "epoch": 34.04,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 177264
    },
    {
      "epoch": 34.04,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 177276
    },
    {
      "epoch": 34.04,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 177288
    },
    {
      "epoch": 34.04,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 177300
    },
    {
      "epoch": 34.05,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 177312
    },
    {
      "epoch": 34.05,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 177324
    },
    {
      "epoch": 34.05,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 177336
    },
    {
      "epoch": 34.05,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 177348
    },
    {
      "epoch": 34.06,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 177360
    },
    {
      "epoch": 34.06,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 177372
    },
    {
      "epoch": 34.06,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 177384
    },
    {
      "epoch": 34.06,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 177396
    },
    {
      "epoch": 34.06,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 177408
    },
    {
      "epoch": 34.07,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 177420
    },
    {
      "epoch": 34.07,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 177432
    },
    {
      "epoch": 34.07,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 177444
    },
    {
      "epoch": 34.07,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 177456
    },
    {
      "epoch": 34.08,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 177468
    },
    {
      "epoch": 34.08,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 177480
    },
    {
      "epoch": 34.08,
      "learning_rate": 0.001,
      "loss": 2.5231,
      "step": 177492
    },
    {
      "epoch": 34.08,
      "eval_ag_news_accuracy": 0.3284375,
      "eval_ag_news_bleu_score": 4.9737798732915905,
      "eval_ag_news_bleu_score_sem": 0.16287397778811172,
      "eval_ag_news_emb_cos_sim": 0.8183648586273193,
      "eval_ag_news_emb_cos_sim_sem": 0.006624846456284306,
      "eval_ag_news_emb_top1_equal": 0.1796875,
      "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.50203800201416,
      "eval_ag_news_n_ngrams_match_1": 14.354,
      "eval_ag_news_n_ngrams_match_2": 3.19,
      "eval_ag_news_n_ngrams_match_3": 0.952,
      "eval_ag_news_num_pred_words": 46.748,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.183010134950294,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35565082562172046,
      "eval_ag_news_runtime": 10.3408,
      "eval_ag_news_samples_per_second": 48.352,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3544618815318247,
      "eval_ag_news_token_set_f1_sem": 0.0045137701411784685,
      "eval_ag_news_token_set_precision": 0.34262575645219906,
      "eval_ag_news_token_set_recall": 0.3806784263670829,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 177500
    },
    {
      "epoch": 34.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11478125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.071852098732515,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11282711917908927,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.673026978969574,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00923623621406095,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2352213859558105,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.16,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.668,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.088,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.411997015399496,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21148223741805833,
      "eval_anthropic_toxic_prompts_runtime": 9.8661,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.679,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3574934733935281,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006395347285549812,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43499871099049386,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3298577965119157,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 177500
    },
    {
      "epoch": 34.08,
      "eval_arxiv_accuracy": 0.3490625,
      "eval_arxiv_bleu_score": 4.496504092248576,
      "eval_arxiv_bleu_score_sem": 0.13611944155457134,
      "eval_arxiv_emb_cos_sim": 0.7688542604446411,
      "eval_arxiv_emb_cos_sim_sem": 0.007601529864806429,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3651444911956787,
      "eval_arxiv_n_ngrams_match_1": 15.204,
      "eval_arxiv_n_ngrams_match_2": 3.032,
      "eval_arxiv_n_ngrams_match_3": 0.71,
      "eval_arxiv_num_pred_words": 40.39,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.937678236717133,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3635921477805558,
      "eval_arxiv_runtime": 10.1523,
      "eval_arxiv_samples_per_second": 49.25,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.3564108125492474,
      "eval_arxiv_token_set_f1_sem": 0.004195536513763176,
      "eval_arxiv_token_set_precision": 0.3090003919823345,
      "eval_arxiv_token_set_recall": 0.4416617371632523,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 177500
    },
    {
      "epoch": 34.08,
      "eval_python_code_alpaca_accuracy": 0.16446875,
      "eval_python_code_alpaca_bleu_score": 4.789573884652132,
      "eval_python_code_alpaca_bleu_score_sem": 0.14805177833650254,
      "eval_python_code_alpaca_emb_cos_sim": 0.7696361541748047,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007880690174462844,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.837475061416626,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.036,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.09,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.108,
      "eval_python_code_alpaca_num_pred_words": 44.442,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.07260379366368,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33634832860525293,
      "eval_python_code_alpaca_runtime": 9.8934,
      "eval_python_code_alpaca_samples_per_second": 50.539,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.49063320634887125,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054646803767432,
      "eval_python_code_alpaca_token_set_precision": 0.5487526530466039,
      "eval_python_code_alpaca_token_set_recall": 0.4669653512892527,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 177500
    },
    {
      "epoch": 34.08,
      "eval_wikibio_accuracy": 0.325,
      "eval_wikibio_bleu_score": 5.767346121615908,
      "eval_wikibio_bleu_score_sem": 0.21379359768355632,
      "eval_wikibio_emb_cos_sim": 0.7263277769088745,
      "eval_wikibio_emb_cos_sim_sem": 0.011270721885053998,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6769955158233643,
      "eval_wikibio_n_ngrams_match_1": 9.558,
      "eval_wikibio_n_ngrams_match_2": 3.196,
      "eval_wikibio_n_ngrams_match_3": 1.15,
      "eval_wikibio_num_pred_words": 35.01,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.52745587236166,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3406878190148007,
      "eval_wikibio_runtime": 9.9711,
      "eval_wikibio_samples_per_second": 50.145,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.30822373431140476,
      "eval_wikibio_token_set_f1_sem": 0.0058045013490851845,
      "eval_wikibio_token_set_precision": 0.3104517335616784,
      "eval_wikibio_token_set_recall": 0.3290589404976437,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 177500
    },
    {
      "epoch": 34.08,
      "eval_nq_accuracy": 0.53259375,
      "eval_nq_bleu_score": 11.95770555791102,
      "eval_nq_bleu_score_sem": 0.4855122276489385,
      "eval_nq_emb_cos_sim": 0.8312137126922607,
      "eval_nq_emb_cos_sim_sem": 0.0075484274698607344,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1430771350860596,
      "eval_nq_n_ngrams_match_1": 23.364,
      "eval_nq_n_ngrams_match_2": 8.626,
      "eval_nq_n_ngrams_match_3": 3.964,
      "eval_nq_num_pred_words": 49.032,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.525631827500668,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45439529064672496,
      "eval_nq_runtime": 10.8732,
      "eval_nq_samples_per_second": 45.985,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.46639776023598045,
      "eval_nq_token_set_f1_sem": 0.00483672933589931,
      "eval_nq_token_set_precision": 0.4247555007065695,
      "eval_nq_token_set_recall": 0.5265175502371234,
      "eval_nq_true_num_tokens": 64.0,
      "step": 177500
    },
    {
      "epoch": 34.08,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 177504
    },
    {
      "epoch": 34.09,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 177516
    },
    {
      "epoch": 34.09,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 177528
    },
    {
      "epoch": 34.09,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 177540
    },
    {
      "epoch": 34.09,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 177552
    },
    {
      "epoch": 34.09,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 177564
    },
    {
      "epoch": 34.1,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 177576
    },
    {
      "epoch": 34.1,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 177588
    },
    {
      "epoch": 34.1,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 177600
    },
    {
      "epoch": 34.1,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 177612
    },
    {
      "epoch": 34.11,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 177624
    },
    {
      "epoch": 34.11,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 177636
    },
    {
      "epoch": 34.11,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 177648
    },
    {
      "epoch": 34.11,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 177660
    },
    {
      "epoch": 34.12,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 177672
    },
    {
      "epoch": 34.12,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 177684
    },
    {
      "epoch": 34.12,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 177696
    },
    {
      "epoch": 34.12,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 177708
    },
    {
      "epoch": 34.12,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 177720
    },
    {
      "epoch": 34.13,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 177732
    },
    {
      "epoch": 34.13,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 177744
    },
    {
      "epoch": 34.13,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 177756
    },
    {
      "epoch": 34.13,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 177768
    },
    {
      "epoch": 34.14,
      "learning_rate": 0.001,
      "loss": 2.5213,
      "step": 177780
    },
    {
      "epoch": 34.14,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 177792
    },
    {
      "epoch": 34.14,
      "learning_rate": 0.001,
      "loss": 2.5168,
      "step": 177804
    },
    {
      "epoch": 34.14,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 177816
    },
    {
      "epoch": 34.15,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 177828
    },
    {
      "epoch": 34.15,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 177840
    },
    {
      "epoch": 34.15,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 177852
    },
    {
      "epoch": 34.15,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 177864
    },
    {
      "epoch": 34.15,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 177876
    },
    {
      "epoch": 34.16,
      "learning_rate": 0.001,
      "loss": 2.5139,
      "step": 177888
    },
    {
      "epoch": 34.16,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 177900
    },
    {
      "epoch": 34.16,
      "learning_rate": 0.001,
      "loss": 2.5202,
      "step": 177912
    },
    {
      "epoch": 34.16,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 177924
    },
    {
      "epoch": 34.17,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 177936
    },
    {
      "epoch": 34.17,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 177948
    },
    {
      "epoch": 34.17,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 177960
    },
    {
      "epoch": 34.17,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 177972
    },
    {
      "epoch": 34.18,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 177984
    },
    {
      "epoch": 34.18,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 177996
    },
    {
      "epoch": 34.18,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 178008
    },
    {
      "epoch": 34.18,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 178020
    },
    {
      "epoch": 34.18,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 178032
    },
    {
      "epoch": 34.19,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 178044
    },
    {
      "epoch": 34.19,
      "learning_rate": 0.001,
      "loss": 2.5119,
      "step": 178056
    },
    {
      "epoch": 34.19,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 178068
    },
    {
      "epoch": 34.19,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 178080
    },
    {
      "epoch": 34.2,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 178092
    },
    {
      "epoch": 34.2,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 178104
    },
    {
      "epoch": 34.2,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 178116
    },
    {
      "epoch": 34.2,
      "eval_ag_news_accuracy": 0.327,
      "eval_ag_news_bleu_score": 4.914052912154212,
      "eval_ag_news_bleu_score_sem": 0.15733180499147514,
      "eval_ag_news_emb_cos_sim": 0.8192166090011597,
      "eval_ag_news_emb_cos_sim_sem": 0.00686424516563774,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5027143955230713,
      "eval_ag_news_n_ngrams_match_1": 14.314,
      "eval_ag_news_n_ngrams_match_2": 3.288,
      "eval_ag_news_n_ngrams_match_3": 0.962,
      "eval_ag_news_num_pred_words": 47.322,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.205462500072706,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35335455342206823,
      "eval_ag_news_runtime": 10.6415,
      "eval_ag_news_samples_per_second": 46.986,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.3532114353394258,
      "eval_ag_news_token_set_f1_sem": 0.004458795364485328,
      "eval_ag_news_token_set_precision": 0.34206116848091156,
      "eval_ag_news_token_set_recall": 0.3817815058903693,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 178125
    },
    {
      "epoch": 34.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.11540625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.194002793452142,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1215497287537557,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6790552139282227,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008896723592970458,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.218350648880005,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.418,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.98687404731438,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9453125,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2158776896291413,
      "eval_anthropic_toxic_prompts_runtime": 10.4954,
      "eval_anthropic_toxic_prompts_samples_per_second": 47.64,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.095,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35868913066859653,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006494699480647151,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4506319484675694,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3226648135822138,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 178125
    },
    {
      "epoch": 34.2,
      "eval_arxiv_accuracy": 0.3518125,
      "eval_arxiv_bleu_score": 4.451697055242033,
      "eval_arxiv_bleu_score_sem": 0.12911423376119463,
      "eval_arxiv_emb_cos_sim": 0.7778573036193848,
      "eval_arxiv_emb_cos_sim_sem": 0.007019332397267671,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3617093563079834,
      "eval_arxiv_n_ngrams_match_1": 15.358,
      "eval_arxiv_n_ngrams_match_2": 3.064,
      "eval_arxiv_n_ngrams_match_3": 0.676,
      "eval_arxiv_num_pred_words": 41.464,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.83844394780286,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3649077414622171,
      "eval_arxiv_runtime": 10.3311,
      "eval_arxiv_samples_per_second": 48.398,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.35651892697480697,
      "eval_arxiv_token_set_f1_sem": 0.004187807776777443,
      "eval_arxiv_token_set_precision": 0.312031586661256,
      "eval_arxiv_token_set_recall": 0.430126470096774,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 178125
    },
    {
      "epoch": 34.2,
      "eval_python_code_alpaca_accuracy": 0.16640625,
      "eval_python_code_alpaca_bleu_score": 4.853427273184483,
      "eval_python_code_alpaca_bleu_score_sem": 0.15388962455015562,
      "eval_python_code_alpaca_emb_cos_sim": 0.7732113599777222,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006844760809280055,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8228678703308105,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.13,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.188,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.154,
      "eval_python_code_alpaca_num_pred_words": 45.034,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.825033562543794,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3365812517837736,
      "eval_python_code_alpaca_runtime": 9.9033,
      "eval_python_code_alpaca_samples_per_second": 50.488,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4955358063801914,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005506109246406978,
      "eval_python_code_alpaca_token_set_precision": 0.5595442026847773,
      "eval_python_code_alpaca_token_set_recall": 0.4605743329054844,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 178125
    },
    {
      "epoch": 34.2,
      "eval_wikibio_accuracy": 0.327875,
      "eval_wikibio_bleu_score": 5.955574538064505,
      "eval_wikibio_bleu_score_sem": 0.2046288521089413,
      "eval_wikibio_emb_cos_sim": 0.7384626865386963,
      "eval_wikibio_emb_cos_sim_sem": 0.009992279553539526,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.701720952987671,
      "eval_wikibio_n_ngrams_match_1": 9.982,
      "eval_wikibio_n_ngrams_match_2": 3.394,
      "eval_wikibio_n_ngrams_match_3": 1.244,
      "eval_wikibio_num_pred_words": 36.434,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.51697219969297,
      "eval_wikibio_pred_num_tokens": 62.8828125,
      "eval_wikibio_rouge_score": 0.35804891031961483,
      "eval_wikibio_runtime": 10.0943,
      "eval_wikibio_samples_per_second": 49.533,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.31604749809136645,
      "eval_wikibio_token_set_f1_sem": 0.005386330428306996,
      "eval_wikibio_token_set_precision": 0.3246392858455307,
      "eval_wikibio_token_set_recall": 0.32454571621705997,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 178125
    },
    {
      "epoch": 34.2,
      "eval_nq_accuracy": 0.53396875,
      "eval_nq_bleu_score": 11.914236254970884,
      "eval_nq_bleu_score_sem": 0.4796626034235853,
      "eval_nq_emb_cos_sim": 0.8397883772850037,
      "eval_nq_emb_cos_sim_sem": 0.0069352573370225325,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.144157648086548,
      "eval_nq_n_ngrams_match_1": 23.282,
      "eval_nq_n_ngrams_match_2": 8.626,
      "eval_nq_n_ngrams_match_3": 3.986,
      "eval_nq_num_pred_words": 49.268,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.534848862193819,
      "eval_nq_pred_num_tokens": 62.984375,
      "eval_nq_rouge_score": 0.4510546538336935,
      "eval_nq_runtime": 10.4038,
      "eval_nq_samples_per_second": 48.06,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46602718976386026,
      "eval_nq_token_set_f1_sem": 0.005039947097998777,
      "eval_nq_token_set_precision": 0.42424083976081683,
      "eval_nq_token_set_recall": 0.526921316066187,
      "eval_nq_true_num_tokens": 64.0,
      "step": 178125
    },
    {
      "epoch": 34.2,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 178128
    },
    {
      "epoch": 34.21,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 178140
    },
    {
      "epoch": 34.21,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 178152
    },
    {
      "epoch": 34.21,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 178164
    },
    {
      "epoch": 34.21,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 178176
    },
    {
      "epoch": 34.21,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 178188
    },
    {
      "epoch": 34.22,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 178200
    },
    {
      "epoch": 34.22,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 178212
    },
    {
      "epoch": 34.22,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 178224
    },
    {
      "epoch": 34.22,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 178236
    },
    {
      "epoch": 34.23,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 178248
    },
    {
      "epoch": 34.23,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 178260
    },
    {
      "epoch": 34.23,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 178272
    },
    {
      "epoch": 34.23,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 178284
    },
    {
      "epoch": 34.24,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 178296
    },
    {
      "epoch": 34.24,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 178308
    },
    {
      "epoch": 34.24,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 178320
    },
    {
      "epoch": 34.24,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 178332
    },
    {
      "epoch": 34.24,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 178344
    },
    {
      "epoch": 34.25,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 178356
    },
    {
      "epoch": 34.25,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 178368
    },
    {
      "epoch": 34.25,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 178380
    },
    {
      "epoch": 34.25,
      "learning_rate": 0.001,
      "loss": 2.5264,
      "step": 178392
    },
    {
      "epoch": 34.26,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 178404
    },
    {
      "epoch": 34.26,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 178416
    },
    {
      "epoch": 34.26,
      "learning_rate": 0.001,
      "loss": 2.5163,
      "step": 178428
    },
    {
      "epoch": 34.26,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 178440
    },
    {
      "epoch": 34.26,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 178452
    },
    {
      "epoch": 34.27,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 178464
    },
    {
      "epoch": 34.27,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 178476
    },
    {
      "epoch": 34.27,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 178488
    },
    {
      "epoch": 34.27,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 178500
    },
    {
      "epoch": 34.28,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 178512
    },
    {
      "epoch": 34.28,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 178524
    },
    {
      "epoch": 34.28,
      "learning_rate": 0.001,
      "loss": 2.51,
      "step": 178536
    },
    {
      "epoch": 34.28,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 178548
    },
    {
      "epoch": 34.29,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 178560
    },
    {
      "epoch": 34.29,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 178572
    },
    {
      "epoch": 34.29,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 178584
    },
    {
      "epoch": 34.29,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 178596
    },
    {
      "epoch": 34.29,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 178608
    },
    {
      "epoch": 34.3,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 178620
    },
    {
      "epoch": 34.3,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 178632
    },
    {
      "epoch": 34.3,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 178644
    },
    {
      "epoch": 34.3,
      "learning_rate": 0.001,
      "loss": 2.5208,
      "step": 178656
    },
    {
      "epoch": 34.31,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 178668
    },
    {
      "epoch": 34.31,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 178680
    },
    {
      "epoch": 34.31,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 178692
    },
    {
      "epoch": 34.31,
      "learning_rate": 0.001,
      "loss": 2.5231,
      "step": 178704
    },
    {
      "epoch": 34.32,
      "learning_rate": 0.001,
      "loss": 2.5257,
      "step": 178716
    },
    {
      "epoch": 34.32,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 178728
    },
    {
      "epoch": 34.32,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 178740
    },
    {
      "epoch": 34.32,
      "eval_ag_news_accuracy": 0.3275625,
      "eval_ag_news_bleu_score": 4.813433813581762,
      "eval_ag_news_bleu_score_sem": 0.1592644508946142,
      "eval_ag_news_emb_cos_sim": 0.820677638053894,
      "eval_ag_news_emb_cos_sim_sem": 0.006047129067033354,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4964663982391357,
      "eval_ag_news_n_ngrams_match_1": 14.3,
      "eval_ag_news_n_ngrams_match_2": 3.108,
      "eval_ag_news_n_ngrams_match_3": 0.894,
      "eval_ag_news_num_pred_words": 47.088,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.99864164145504,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3539372001697687,
      "eval_ag_news_runtime": 10.6254,
      "eval_ag_news_samples_per_second": 47.057,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.35522350129310926,
      "eval_ag_news_token_set_f1_sem": 0.004463897894500045,
      "eval_ag_news_token_set_precision": 0.3422590908037412,
      "eval_ag_news_token_set_recall": 0.3831179052121366,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 178750
    },
    {
      "epoch": 34.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.11690625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.048533923277592,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12074231629676362,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6788101196289062,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00922757174396942,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2201855182647705,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.2,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.876,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.676,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.39,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.032763785487692,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21349213177239149,
      "eval_anthropic_toxic_prompts_runtime": 9.6998,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.548,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3580416059249323,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006788697257763802,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4383428301480111,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33051103029251566,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 178750
    },
    {
      "epoch": 34.32,
      "eval_arxiv_accuracy": 0.35159375,
      "eval_arxiv_bleu_score": 4.4936295243756685,
      "eval_arxiv_bleu_score_sem": 0.1313267234231422,
      "eval_arxiv_emb_cos_sim": 0.7800787687301636,
      "eval_arxiv_emb_cos_sim_sem": 0.007207780176582989,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3529574871063232,
      "eval_arxiv_n_ngrams_match_1": 15.434,
      "eval_arxiv_n_ngrams_match_2": 3.048,
      "eval_arxiv_n_ngrams_match_3": 0.698,
      "eval_arxiv_num_pred_words": 41.006,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.5871548868485,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36792086512289285,
      "eval_arxiv_runtime": 10.0689,
      "eval_arxiv_samples_per_second": 49.658,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.35973660019467957,
      "eval_arxiv_token_set_f1_sem": 0.004231735129604242,
      "eval_arxiv_token_set_precision": 0.31408898428209076,
      "eval_arxiv_token_set_recall": 0.4372305467644753,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 178750
    },
    {
      "epoch": 34.32,
      "eval_python_code_alpaca_accuracy": 0.16346875,
      "eval_python_code_alpaca_bleu_score": 4.642145102517188,
      "eval_python_code_alpaca_bleu_score_sem": 0.14502167212739653,
      "eval_python_code_alpaca_emb_cos_sim": 0.7653725743293762,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008972558285787208,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8381497859954834,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.966,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.026,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.026,
      "eval_python_code_alpaca_num_pred_words": 44.538,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.084126986121788,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33300856539045176,
      "eval_python_code_alpaca_runtime": 10.5922,
      "eval_python_code_alpaca_samples_per_second": 47.204,
      "eval_python_code_alpaca_steps_per_second": 0.094,
      "eval_python_code_alpaca_token_set_f1": 0.4863844161296202,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0055966509281519805,
      "eval_python_code_alpaca_token_set_precision": 0.5473867815551546,
      "eval_python_code_alpaca_token_set_recall": 0.4620974978021023,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 178750
    },
    {
      "epoch": 34.32,
      "eval_wikibio_accuracy": 0.3239375,
      "eval_wikibio_bleu_score": 5.974667265588757,
      "eval_wikibio_bleu_score_sem": 0.21420723370734615,
      "eval_wikibio_emb_cos_sim": 0.7359843254089355,
      "eval_wikibio_emb_cos_sim_sem": 0.00951995496880162,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.714965343475342,
      "eval_wikibio_n_ngrams_match_1": 10.092,
      "eval_wikibio_n_ngrams_match_2": 3.33,
      "eval_wikibio_n_ngrams_match_3": 1.256,
      "eval_wikibio_num_pred_words": 36.452,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 41.05716416111395,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3585082625405733,
      "eval_wikibio_runtime": 9.933,
      "eval_wikibio_samples_per_second": 50.337,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.32100820650657813,
      "eval_wikibio_token_set_f1_sem": 0.0053061485193654934,
      "eval_wikibio_token_set_precision": 0.32732396678358794,
      "eval_wikibio_token_set_recall": 0.3337773079073864,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 178750
    },
    {
      "epoch": 34.32,
      "eval_nq_accuracy": 0.53465625,
      "eval_nq_bleu_score": 12.097115223885114,
      "eval_nq_bleu_score_sem": 0.46804934255176084,
      "eval_nq_emb_cos_sim": 0.8387157917022705,
      "eval_nq_emb_cos_sim_sem": 0.006997872407386038,
      "eval_nq_emb_top1_equal": 0.3359375,
      "eval_nq_emb_top1_equal_sem": 0.04191137143408563,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.140587091445923,
      "eval_nq_n_ngrams_match_1": 23.49,
      "eval_nq_n_ngrams_match_2": 8.72,
      "eval_nq_n_ngrams_match_3": 4.028,
      "eval_nq_num_pred_words": 49.282,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.504429041077529,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4554707610765809,
      "eval_nq_runtime": 10.4593,
      "eval_nq_samples_per_second": 47.804,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4678595729521041,
      "eval_nq_token_set_f1_sem": 0.0048572389473561395,
      "eval_nq_token_set_precision": 0.42781714349962374,
      "eval_nq_token_set_recall": 0.5237862459531191,
      "eval_nq_true_num_tokens": 64.0,
      "step": 178750
    },
    {
      "epoch": 34.32,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 178752
    },
    {
      "epoch": 34.32,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 178764
    },
    {
      "epoch": 34.33,
      "learning_rate": 0.001,
      "loss": 2.5182,
      "step": 178776
    },
    {
      "epoch": 34.33,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 178788
    },
    {
      "epoch": 34.33,
      "learning_rate": 0.001,
      "loss": 2.5244,
      "step": 178800
    },
    {
      "epoch": 34.33,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 178812
    },
    {
      "epoch": 34.34,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 178824
    },
    {
      "epoch": 34.34,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 178836
    },
    {
      "epoch": 34.34,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 178848
    },
    {
      "epoch": 34.34,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 178860
    },
    {
      "epoch": 34.35,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 178872
    },
    {
      "epoch": 34.35,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 178884
    },
    {
      "epoch": 34.35,
      "learning_rate": 0.001,
      "loss": 2.5196,
      "step": 178896
    },
    {
      "epoch": 34.35,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 178908
    },
    {
      "epoch": 34.35,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 178920
    },
    {
      "epoch": 34.36,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 178932
    },
    {
      "epoch": 34.36,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 178944
    },
    {
      "epoch": 34.36,
      "learning_rate": 0.001,
      "loss": 2.5163,
      "step": 178956
    },
    {
      "epoch": 34.36,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 178968
    },
    {
      "epoch": 34.37,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 178980
    },
    {
      "epoch": 34.37,
      "learning_rate": 0.001,
      "loss": 2.5164,
      "step": 178992
    },
    {
      "epoch": 34.37,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 179004
    },
    {
      "epoch": 34.37,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 179016
    },
    {
      "epoch": 34.38,
      "learning_rate": 0.001,
      "loss": 2.5167,
      "step": 179028
    },
    {
      "epoch": 34.38,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 179040
    },
    {
      "epoch": 34.38,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 179052
    },
    {
      "epoch": 34.38,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 179064
    },
    {
      "epoch": 34.38,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 179076
    },
    {
      "epoch": 34.39,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 179088
    },
    {
      "epoch": 34.39,
      "learning_rate": 0.001,
      "loss": 2.5213,
      "step": 179100
    },
    {
      "epoch": 34.39,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 179112
    },
    {
      "epoch": 34.39,
      "learning_rate": 0.001,
      "loss": 2.5167,
      "step": 179124
    },
    {
      "epoch": 34.4,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 179136
    },
    {
      "epoch": 34.4,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 179148
    },
    {
      "epoch": 34.4,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 179160
    },
    {
      "epoch": 34.4,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 179172
    },
    {
      "epoch": 34.41,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 179184
    },
    {
      "epoch": 34.41,
      "learning_rate": 0.001,
      "loss": 2.5213,
      "step": 179196
    },
    {
      "epoch": 34.41,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 179208
    },
    {
      "epoch": 34.41,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 179220
    },
    {
      "epoch": 34.41,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 179232
    },
    {
      "epoch": 34.42,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 179244
    },
    {
      "epoch": 34.42,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 179256
    },
    {
      "epoch": 34.42,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 179268
    },
    {
      "epoch": 34.42,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 179280
    },
    {
      "epoch": 34.43,
      "learning_rate": 0.001,
      "loss": 2.5164,
      "step": 179292
    },
    {
      "epoch": 34.43,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 179304
    },
    {
      "epoch": 34.43,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 179316
    },
    {
      "epoch": 34.43,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 179328
    },
    {
      "epoch": 34.44,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 179340
    },
    {
      "epoch": 34.44,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 179352
    },
    {
      "epoch": 34.44,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 179364
    },
    {
      "epoch": 34.44,
      "eval_ag_news_accuracy": 0.3274375,
      "eval_ag_news_bleu_score": 4.811342964802341,
      "eval_ag_news_bleu_score_sem": 0.15607395767645005,
      "eval_ag_news_emb_cos_sim": 0.813970148563385,
      "eval_ag_news_emb_cos_sim_sem": 0.007021656400836342,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5022685527801514,
      "eval_ag_news_n_ngrams_match_1": 14.39,
      "eval_ag_news_n_ngrams_match_2": 3.154,
      "eval_ag_news_n_ngrams_match_3": 0.908,
      "eval_ag_news_num_pred_words": 47.056,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.190661385321725,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35371846728922646,
      "eval_ag_news_runtime": 10.5534,
      "eval_ag_news_samples_per_second": 47.378,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.35611744869577433,
      "eval_ag_news_token_set_f1_sem": 0.004472964504265946,
      "eval_ag_news_token_set_precision": 0.34333100902908364,
      "eval_ag_news_token_set_recall": 0.3830182962470944,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 179375
    },
    {
      "epoch": 34.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.11471875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1958231375226434,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12481154158203375,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6836678385734558,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008170488612101212,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.221111297607422,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.398,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.96,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.724,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.68,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.055949331783708,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2187404411794168,
      "eval_anthropic_toxic_prompts_runtime": 9.8943,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.534,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3609242226719977,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006679386336990643,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44935726362851275,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33146817955536045,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 179375
    },
    {
      "epoch": 34.44,
      "eval_arxiv_accuracy": 0.353125,
      "eval_arxiv_bleu_score": 4.540336935215935,
      "eval_arxiv_bleu_score_sem": 0.13556716701654822,
      "eval_arxiv_emb_cos_sim": 0.7734513282775879,
      "eval_arxiv_emb_cos_sim_sem": 0.006722537582776494,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.344017267227173,
      "eval_arxiv_n_ngrams_match_1": 15.438,
      "eval_arxiv_n_ngrams_match_2": 3.038,
      "eval_arxiv_n_ngrams_match_3": 0.734,
      "eval_arxiv_num_pred_words": 40.814,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.332718489811967,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3697917113353645,
      "eval_arxiv_runtime": 10.3213,
      "eval_arxiv_samples_per_second": 48.444,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3596673818683494,
      "eval_arxiv_token_set_f1_sem": 0.0043135187632635935,
      "eval_arxiv_token_set_precision": 0.31372170099630187,
      "eval_arxiv_token_set_recall": 0.439029757604981,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 179375
    },
    {
      "epoch": 34.44,
      "eval_python_code_alpaca_accuracy": 0.162625,
      "eval_python_code_alpaca_bleu_score": 4.640023344215875,
      "eval_python_code_alpaca_bleu_score_sem": 0.13450912984635363,
      "eval_python_code_alpaca_emb_cos_sim": 0.765434741973877,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008636512901531246,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8486969470977783,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.112,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.016,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.002,
      "eval_python_code_alpaca_num_pred_words": 44.112,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.265269616761977,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3407851142617668,
      "eval_python_code_alpaca_runtime": 10.9587,
      "eval_python_code_alpaca_samples_per_second": 45.626,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4863034955772844,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005305979070785445,
      "eval_python_code_alpaca_token_set_precision": 0.5548519354276719,
      "eval_python_code_alpaca_token_set_recall": 0.45395270458790493,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 179375
    },
    {
      "epoch": 34.44,
      "eval_wikibio_accuracy": 0.32678125,
      "eval_wikibio_bleu_score": 6.004373304213732,
      "eval_wikibio_bleu_score_sem": 0.2179632308351529,
      "eval_wikibio_emb_cos_sim": 0.7409726977348328,
      "eval_wikibio_emb_cos_sim_sem": 0.009032686033544061,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.68875789642334,
      "eval_wikibio_n_ngrams_match_1": 10.002,
      "eval_wikibio_n_ngrams_match_2": 3.382,
      "eval_wikibio_n_ngrams_match_3": 1.234,
      "eval_wikibio_num_pred_words": 35.824,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.99513798788961,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35291988254204026,
      "eval_wikibio_runtime": 10.0767,
      "eval_wikibio_samples_per_second": 49.62,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.3189733917969741,
      "eval_wikibio_token_set_f1_sem": 0.0054931123466055,
      "eval_wikibio_token_set_precision": 0.3260878276320435,
      "eval_wikibio_token_set_recall": 0.3301078009495688,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 179375
    },
    {
      "epoch": 34.44,
      "eval_nq_accuracy": 0.53575,
      "eval_nq_bleu_score": 11.920051862616583,
      "eval_nq_bleu_score_sem": 0.4998006212207493,
      "eval_nq_emb_cos_sim": 0.8287443518638611,
      "eval_nq_emb_cos_sim_sem": 0.00734434781955943,
      "eval_nq_emb_top1_equal": 0.3515625,
      "eval_nq_emb_top1_equal_sem": 0.04236756101983345,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.137392044067383,
      "eval_nq_n_ngrams_match_1": 23.38,
      "eval_nq_n_ngrams_match_2": 8.586,
      "eval_nq_n_ngrams_match_3": 3.978,
      "eval_nq_num_pred_words": 49.148,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.477300349170047,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4534801780466444,
      "eval_nq_runtime": 10.6294,
      "eval_nq_samples_per_second": 47.039,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.46627282890371513,
      "eval_nq_token_set_f1_sem": 0.005003064160241319,
      "eval_nq_token_set_precision": 0.425000019638213,
      "eval_nq_token_set_recall": 0.5246687772022757,
      "eval_nq_true_num_tokens": 64.0,
      "step": 179375
    },
    {
      "epoch": 34.44,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 179376
    },
    {
      "epoch": 34.44,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 179388
    },
    {
      "epoch": 34.45,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 179400
    },
    {
      "epoch": 34.45,
      "learning_rate": 0.001,
      "loss": 2.509,
      "step": 179412
    },
    {
      "epoch": 34.45,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 179424
    },
    {
      "epoch": 34.45,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 179436
    },
    {
      "epoch": 34.46,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 179448
    },
    {
      "epoch": 34.46,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 179460
    },
    {
      "epoch": 34.46,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 179472
    },
    {
      "epoch": 34.46,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 179484
    },
    {
      "epoch": 34.47,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 179496
    },
    {
      "epoch": 34.47,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 179508
    },
    {
      "epoch": 34.47,
      "learning_rate": 0.001,
      "loss": 2.527,
      "step": 179520
    },
    {
      "epoch": 34.47,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 179532
    },
    {
      "epoch": 34.47,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 179544
    },
    {
      "epoch": 34.48,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 179556
    },
    {
      "epoch": 34.48,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 179568
    },
    {
      "epoch": 34.48,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 179580
    },
    {
      "epoch": 34.48,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 179592
    },
    {
      "epoch": 34.49,
      "learning_rate": 0.001,
      "loss": 2.5182,
      "step": 179604
    },
    {
      "epoch": 34.49,
      "learning_rate": 0.001,
      "loss": 2.519,
      "step": 179616
    },
    {
      "epoch": 34.49,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 179628
    },
    {
      "epoch": 34.49,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 179640
    },
    {
      "epoch": 34.5,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 179652
    },
    {
      "epoch": 34.5,
      "learning_rate": 0.001,
      "loss": 2.5161,
      "step": 179664
    },
    {
      "epoch": 34.5,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 179676
    },
    {
      "epoch": 34.5,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 179688
    },
    {
      "epoch": 34.5,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 179700
    },
    {
      "epoch": 34.51,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 179712
    },
    {
      "epoch": 34.51,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 179724
    },
    {
      "epoch": 34.51,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 179736
    },
    {
      "epoch": 34.51,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 179748
    },
    {
      "epoch": 34.52,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 179760
    },
    {
      "epoch": 34.52,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 179772
    },
    {
      "epoch": 34.52,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 179784
    },
    {
      "epoch": 34.52,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 179796
    },
    {
      "epoch": 34.53,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 179808
    },
    {
      "epoch": 34.53,
      "learning_rate": 0.001,
      "loss": 2.5168,
      "step": 179820
    },
    {
      "epoch": 34.53,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 179832
    },
    {
      "epoch": 34.53,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 179844
    },
    {
      "epoch": 34.53,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 179856
    },
    {
      "epoch": 34.54,
      "learning_rate": 0.001,
      "loss": 2.5119,
      "step": 179868
    },
    {
      "epoch": 34.54,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 179880
    },
    {
      "epoch": 34.54,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 179892
    },
    {
      "epoch": 34.54,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 179904
    },
    {
      "epoch": 34.55,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 179916
    },
    {
      "epoch": 34.55,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 179928
    },
    {
      "epoch": 34.55,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 179940
    },
    {
      "epoch": 34.55,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 179952
    },
    {
      "epoch": 34.56,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 179964
    },
    {
      "epoch": 34.56,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 179976
    },
    {
      "epoch": 34.56,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 179988
    },
    {
      "epoch": 34.56,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 180000
    },
    {
      "epoch": 34.56,
      "eval_ag_news_accuracy": 0.3271875,
      "eval_ag_news_bleu_score": 4.992180508673132,
      "eval_ag_news_bleu_score_sem": 0.16178611761678383,
      "eval_ag_news_emb_cos_sim": 0.8197941184043884,
      "eval_ag_news_emb_cos_sim_sem": 0.006158953217211359,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4940409660339355,
      "eval_ag_news_n_ngrams_match_1": 14.482,
      "eval_ag_news_n_ngrams_match_2": 3.212,
      "eval_ag_news_n_ngrams_match_3": 0.936,
      "eval_ag_news_num_pred_words": 46.568,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.91870265577338,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35836368312617006,
      "eval_ag_news_runtime": 10.7648,
      "eval_ag_news_samples_per_second": 46.448,
      "eval_ag_news_steps_per_second": 0.093,
      "eval_ag_news_token_set_f1": 0.36173706655885796,
      "eval_ag_news_token_set_f1_sem": 0.004467765586337585,
      "eval_ag_news_token_set_precision": 0.34649607846024855,
      "eval_ag_news_token_set_recall": 0.3927891325694287,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 180000
    },
    {
      "epoch": 34.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.11428125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2153284243726326,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12134144941431634,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6742618680000305,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009172288908157794,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2468583583831787,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.342,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.96,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.762,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.61,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.7094430530119,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21596396352962755,
      "eval_anthropic_toxic_prompts_runtime": 9.8169,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.933,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3637917754044938,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006451456352919871,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45173319944318713,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3291250054970117,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 180000
    },
    {
      "epoch": 34.56,
      "eval_arxiv_accuracy": 0.34884375,
      "eval_arxiv_bleu_score": 4.394816127528442,
      "eval_arxiv_bleu_score_sem": 0.1325831490118087,
      "eval_arxiv_emb_cos_sim": 0.7746796607971191,
      "eval_arxiv_emb_cos_sim_sem": 0.00746157664451445,
      "eval_arxiv_emb_top1_equal": 0.328125,
      "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.359733819961548,
      "eval_arxiv_n_ngrams_match_1": 15.32,
      "eval_arxiv_n_ngrams_match_2": 3.034,
      "eval_arxiv_n_ngrams_match_3": 0.666,
      "eval_arxiv_num_pred_words": 40.032,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.781528791100133,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36849553182008965,
      "eval_arxiv_runtime": 10.2349,
      "eval_arxiv_samples_per_second": 48.852,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.36144979084583245,
      "eval_arxiv_token_set_f1_sem": 0.004300922900143846,
      "eval_arxiv_token_set_precision": 0.31109290396632283,
      "eval_arxiv_token_set_recall": 0.4515905152525916,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 180000
    },
    {
      "epoch": 34.56,
      "eval_python_code_alpaca_accuracy": 0.16290625,
      "eval_python_code_alpaca_bleu_score": 4.97443298424783,
      "eval_python_code_alpaca_bleu_score_sem": 0.15618642989283169,
      "eval_python_code_alpaca_emb_cos_sim": 0.7671642899513245,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008481689746249016,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8598783016204834,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.152,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.146,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.112,
      "eval_python_code_alpaca_num_pred_words": 43.632,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.45940202634961,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34560691396560517,
      "eval_python_code_alpaca_runtime": 9.7684,
      "eval_python_code_alpaca_samples_per_second": 51.185,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.49139150141678084,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005344616504036387,
      "eval_python_code_alpaca_token_set_precision": 0.5600456416206923,
      "eval_python_code_alpaca_token_set_recall": 0.4590994112023471,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 180000
    },
    {
      "epoch": 34.56,
      "eval_wikibio_accuracy": 0.3230625,
      "eval_wikibio_bleu_score": 6.376735555836328,
      "eval_wikibio_bleu_score_sem": 0.23321904107344474,
      "eval_wikibio_emb_cos_sim": 0.7401601076126099,
      "eval_wikibio_emb_cos_sim_sem": 0.009658130118922047,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6513254642486572,
      "eval_wikibio_n_ngrams_match_1": 10.168,
      "eval_wikibio_n_ngrams_match_2": 3.53,
      "eval_wikibio_n_ngrams_match_3": 1.334,
      "eval_wikibio_num_pred_words": 35.93,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.52569665551027,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3621634892428316,
      "eval_wikibio_runtime": 11.2877,
      "eval_wikibio_samples_per_second": 44.296,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.32420980005857425,
      "eval_wikibio_token_set_f1_sem": 0.005499022169025263,
      "eval_wikibio_token_set_precision": 0.32933575716206726,
      "eval_wikibio_token_set_recall": 0.33502409467732436,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 180000
    },
    {
      "epoch": 34.56,
      "eval_nq_accuracy": 0.53325,
      "eval_nq_bleu_score": 12.11989823526993,
      "eval_nq_bleu_score_sem": 0.4859000116476797,
      "eval_nq_emb_cos_sim": 0.8346042037010193,
      "eval_nq_emb_cos_sim_sem": 0.00727023983044261,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1411452293395996,
      "eval_nq_n_ngrams_match_1": 23.364,
      "eval_nq_n_ngrams_match_2": 8.772,
      "eval_nq_n_ngrams_match_3": 4.034,
      "eval_nq_num_pred_words": 48.902,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.509177010076892,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4529333986672015,
      "eval_nq_runtime": 10.2379,
      "eval_nq_samples_per_second": 48.838,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.46787318736595207,
      "eval_nq_token_set_f1_sem": 0.005036197557146816,
      "eval_nq_token_set_precision": 0.4226296979372575,
      "eval_nq_token_set_recall": 0.5339993638198486,
      "eval_nq_true_num_tokens": 64.0,
      "step": 180000
    },
    {
      "epoch": 34.56,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 180012
    },
    {
      "epoch": 34.57,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 180024
    },
    {
      "epoch": 34.57,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 180036
    },
    {
      "epoch": 34.57,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 180048
    },
    {
      "epoch": 34.57,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 180060
    },
    {
      "epoch": 34.58,
      "learning_rate": 0.001,
      "loss": 2.5274,
      "step": 180072
    },
    {
      "epoch": 34.58,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 180084
    },
    {
      "epoch": 34.58,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 180096
    },
    {
      "epoch": 34.58,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 180108
    },
    {
      "epoch": 34.59,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 180120
    },
    {
      "epoch": 34.59,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 180132
    },
    {
      "epoch": 34.59,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 180144
    },
    {
      "epoch": 34.59,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 180156
    },
    {
      "epoch": 34.59,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 180168
    },
    {
      "epoch": 34.6,
      "learning_rate": 0.001,
      "loss": 2.521,
      "step": 180180
    },
    {
      "epoch": 34.6,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 180192
    },
    {
      "epoch": 34.6,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 180204
    },
    {
      "epoch": 34.6,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 180216
    },
    {
      "epoch": 34.61,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 180228
    },
    {
      "epoch": 34.61,
      "learning_rate": 0.001,
      "loss": 2.522,
      "step": 180240
    },
    {
      "epoch": 34.61,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 180252
    },
    {
      "epoch": 34.61,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 180264
    },
    {
      "epoch": 34.62,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 180276
    },
    {
      "epoch": 34.62,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 180288
    },
    {
      "epoch": 34.62,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 180300
    },
    {
      "epoch": 34.62,
      "learning_rate": 0.001,
      "loss": 2.5223,
      "step": 180312
    },
    {
      "epoch": 34.62,
      "learning_rate": 0.001,
      "loss": 2.5179,
      "step": 180324
    },
    {
      "epoch": 34.63,
      "learning_rate": 0.001,
      "loss": 2.5237,
      "step": 180336
    },
    {
      "epoch": 34.63,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 180348
    },
    {
      "epoch": 34.63,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 180360
    },
    {
      "epoch": 34.63,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 180372
    },
    {
      "epoch": 34.64,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 180384
    },
    {
      "epoch": 34.64,
      "learning_rate": 0.001,
      "loss": 2.522,
      "step": 180396
    },
    {
      "epoch": 34.64,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 180408
    },
    {
      "epoch": 34.64,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 180420
    },
    {
      "epoch": 34.65,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 180432
    },
    {
      "epoch": 34.65,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 180444
    },
    {
      "epoch": 34.65,
      "learning_rate": 0.001,
      "loss": 2.5303,
      "step": 180456
    },
    {
      "epoch": 34.65,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 180468
    },
    {
      "epoch": 34.65,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 180480
    },
    {
      "epoch": 34.66,
      "learning_rate": 0.001,
      "loss": 2.5235,
      "step": 180492
    },
    {
      "epoch": 34.66,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 180504
    },
    {
      "epoch": 34.66,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 180516
    },
    {
      "epoch": 34.66,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 180528
    },
    {
      "epoch": 34.67,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 180540
    },
    {
      "epoch": 34.67,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 180552
    },
    {
      "epoch": 34.67,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 180564
    },
    {
      "epoch": 34.67,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 180576
    },
    {
      "epoch": 34.68,
      "learning_rate": 0.001,
      "loss": 2.5273,
      "step": 180588
    },
    {
      "epoch": 34.68,
      "learning_rate": 0.001,
      "loss": 2.5182,
      "step": 180600
    },
    {
      "epoch": 34.68,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 180612
    },
    {
      "epoch": 34.68,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 180624
    },
    {
      "epoch": 34.68,
      "eval_ag_news_accuracy": 0.329,
      "eval_ag_news_bleu_score": 4.974508433046372,
      "eval_ag_news_bleu_score_sem": 0.15502555477679023,
      "eval_ag_news_emb_cos_sim": 0.8153427839279175,
      "eval_ag_news_emb_cos_sim_sem": 0.007401230918525453,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.494140148162842,
      "eval_ag_news_n_ngrams_match_1": 14.436,
      "eval_ag_news_n_ngrams_match_2": 3.22,
      "eval_ag_news_n_ngrams_match_3": 0.956,
      "eval_ag_news_num_pred_words": 46.872,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.92196776470116,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3557681546184433,
      "eval_ag_news_runtime": 10.1567,
      "eval_ag_news_samples_per_second": 49.229,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.35821993120855017,
      "eval_ag_news_token_set_f1_sem": 0.004680047693740569,
      "eval_ag_news_token_set_precision": 0.3448182084746686,
      "eval_ag_news_token_set_recall": 0.38495225114313913,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 180625
    },
    {
      "epoch": 34.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.114625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.163946604846592,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12060720723547808,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6742329001426697,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009851039537278948,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.224412679672241,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.316,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.326,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.138805287853586,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.96875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21603344633175534,
      "eval_anthropic_toxic_prompts_runtime": 9.7285,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.395,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36265401707493905,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066885456780596975,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44656755152218747,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33113894436021063,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 180625
    },
    {
      "epoch": 34.68,
      "eval_arxiv_accuracy": 0.351625,
      "eval_arxiv_bleu_score": 4.364824424679262,
      "eval_arxiv_bleu_score_sem": 0.12660735233163145,
      "eval_arxiv_emb_cos_sim": 0.7752806544303894,
      "eval_arxiv_emb_cos_sim_sem": 0.007239973124855725,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.351532220840454,
      "eval_arxiv_n_ngrams_match_1": 15.272,
      "eval_arxiv_n_ngrams_match_2": 2.934,
      "eval_arxiv_n_ngrams_match_3": 0.656,
      "eval_arxiv_num_pred_words": 40.126,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.546439601304957,
      "eval_arxiv_pred_num_tokens": 62.8984375,
      "eval_arxiv_rouge_score": 0.3687655812504964,
      "eval_arxiv_runtime": 10.4478,
      "eval_arxiv_samples_per_second": 47.857,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.3607652695099407,
      "eval_arxiv_token_set_f1_sem": 0.004163745905764404,
      "eval_arxiv_token_set_precision": 0.31325242155882815,
      "eval_arxiv_token_set_recall": 0.44207455899773457,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 180625
    },
    {
      "epoch": 34.68,
      "eval_python_code_alpaca_accuracy": 0.16125,
      "eval_python_code_alpaca_bleu_score": 4.794661149080865,
      "eval_python_code_alpaca_bleu_score_sem": 0.15499046591979487,
      "eval_python_code_alpaca_emb_cos_sim": 0.7546582221984863,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008590295589416512,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8581385612487793,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.852,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.982,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.046,
      "eval_python_code_alpaca_num_pred_words": 42.858,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.42905360661869,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3393293773582692,
      "eval_python_code_alpaca_runtime": 9.8051,
      "eval_python_code_alpaca_samples_per_second": 50.994,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.47472739527568747,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005446944661341495,
      "eval_python_code_alpaca_token_set_precision": 0.5384051703684122,
      "eval_python_code_alpaca_token_set_recall": 0.4493811862598187,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 180625
    },
    {
      "epoch": 34.68,
      "eval_wikibio_accuracy": 0.3265625,
      "eval_wikibio_bleu_score": 6.277221085514228,
      "eval_wikibio_bleu_score_sem": 0.223908910217878,
      "eval_wikibio_emb_cos_sim": 0.7418359518051147,
      "eval_wikibio_emb_cos_sim_sem": 0.00923359808476197,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.661391019821167,
      "eval_wikibio_n_ngrams_match_1": 10.098,
      "eval_wikibio_n_ngrams_match_2": 3.454,
      "eval_wikibio_n_ngrams_match_3": 1.328,
      "eval_wikibio_num_pred_words": 36.168,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.915437384090936,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3576883852165406,
      "eval_wikibio_runtime": 10.2675,
      "eval_wikibio_samples_per_second": 48.697,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3213830432880749,
      "eval_wikibio_token_set_f1_sem": 0.005467455556351841,
      "eval_wikibio_token_set_precision": 0.32636548310109453,
      "eval_wikibio_token_set_recall": 0.334278212354833,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 180625
    },
    {
      "epoch": 34.68,
      "eval_nq_accuracy": 0.53634375,
      "eval_nq_bleu_score": 11.923921213060266,
      "eval_nq_bleu_score_sem": 0.49461021323551146,
      "eval_nq_emb_cos_sim": 0.8370808959007263,
      "eval_nq_emb_cos_sim_sem": 0.007509415951752346,
      "eval_nq_emb_top1_equal": 0.3515625,
      "eval_nq_emb_top1_equal_sem": 0.04236756101983345,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.14229154586792,
      "eval_nq_n_ngrams_match_1": 23.246,
      "eval_nq_n_ngrams_match_2": 8.548,
      "eval_nq_n_ngrams_match_3": 3.97,
      "eval_nq_num_pred_words": 49.252,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.518936813169011,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45085028307263575,
      "eval_nq_runtime": 10.204,
      "eval_nq_samples_per_second": 49.001,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.46316763725279503,
      "eval_nq_token_set_f1_sem": 0.0048679272921271196,
      "eval_nq_token_set_precision": 0.4227747784835111,
      "eval_nq_token_set_recall": 0.5203570583988628,
      "eval_nq_true_num_tokens": 64.0,
      "step": 180625
    },
    {
      "epoch": 34.68,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 180636
    },
    {
      "epoch": 34.69,
      "learning_rate": 0.001,
      "loss": 2.5223,
      "step": 180648
    },
    {
      "epoch": 34.69,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 180660
    },
    {
      "epoch": 34.69,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 180672
    },
    {
      "epoch": 34.69,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 180684
    },
    {
      "epoch": 34.7,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 180696
    },
    {
      "epoch": 34.7,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 180708
    },
    {
      "epoch": 34.7,
      "learning_rate": 0.001,
      "loss": 2.5226,
      "step": 180720
    },
    {
      "epoch": 34.7,
      "learning_rate": 0.001,
      "loss": 2.5164,
      "step": 180732
    },
    {
      "epoch": 34.71,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 180744
    },
    {
      "epoch": 34.71,
      "learning_rate": 0.001,
      "loss": 2.51,
      "step": 180756
    },
    {
      "epoch": 34.71,
      "learning_rate": 0.001,
      "loss": 2.522,
      "step": 180768
    },
    {
      "epoch": 34.71,
      "learning_rate": 0.001,
      "loss": 2.5232,
      "step": 180780
    },
    {
      "epoch": 34.71,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 180792
    },
    {
      "epoch": 34.72,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 180804
    },
    {
      "epoch": 34.72,
      "learning_rate": 0.001,
      "loss": 2.5202,
      "step": 180816
    },
    {
      "epoch": 34.72,
      "learning_rate": 0.001,
      "loss": 2.5161,
      "step": 180828
    },
    {
      "epoch": 34.72,
      "learning_rate": 0.001,
      "loss": 2.522,
      "step": 180840
    },
    {
      "epoch": 34.73,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 180852
    },
    {
      "epoch": 34.73,
      "learning_rate": 0.001,
      "loss": 2.5186,
      "step": 180864
    },
    {
      "epoch": 34.73,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 180876
    },
    {
      "epoch": 34.73,
      "learning_rate": 0.001,
      "loss": 2.5186,
      "step": 180888
    },
    {
      "epoch": 34.74,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 180900
    },
    {
      "epoch": 34.74,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 180912
    },
    {
      "epoch": 34.74,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 180924
    },
    {
      "epoch": 34.74,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 180936
    },
    {
      "epoch": 34.74,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 180948
    },
    {
      "epoch": 34.75,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 180960
    },
    {
      "epoch": 34.75,
      "learning_rate": 0.001,
      "loss": 2.5225,
      "step": 180972
    },
    {
      "epoch": 34.75,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 180984
    },
    {
      "epoch": 34.75,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 180996
    },
    {
      "epoch": 34.76,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 181008
    },
    {
      "epoch": 34.76,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 181020
    },
    {
      "epoch": 34.76,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 181032
    },
    {
      "epoch": 34.76,
      "learning_rate": 0.001,
      "loss": 2.5179,
      "step": 181044
    },
    {
      "epoch": 34.76,
      "learning_rate": 0.001,
      "loss": 2.5186,
      "step": 181056
    },
    {
      "epoch": 34.77,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 181068
    },
    {
      "epoch": 34.77,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 181080
    },
    {
      "epoch": 34.77,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 181092
    },
    {
      "epoch": 34.77,
      "learning_rate": 0.001,
      "loss": 2.5275,
      "step": 181104
    },
    {
      "epoch": 34.78,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 181116
    },
    {
      "epoch": 34.78,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 181128
    },
    {
      "epoch": 34.78,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 181140
    },
    {
      "epoch": 34.78,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 181152
    },
    {
      "epoch": 34.79,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 181164
    },
    {
      "epoch": 34.79,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 181176
    },
    {
      "epoch": 34.79,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 181188
    },
    {
      "epoch": 34.79,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 181200
    },
    {
      "epoch": 34.79,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 181212
    },
    {
      "epoch": 34.8,
      "learning_rate": 0.001,
      "loss": 2.5249,
      "step": 181224
    },
    {
      "epoch": 34.8,
      "learning_rate": 0.001,
      "loss": 2.5194,
      "step": 181236
    },
    {
      "epoch": 34.8,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 181248
    },
    {
      "epoch": 34.8,
      "eval_ag_news_accuracy": 0.328875,
      "eval_ag_news_bleu_score": 4.804993873614055,
      "eval_ag_news_bleu_score_sem": 0.14381557000213738,
      "eval_ag_news_emb_cos_sim": 0.8208189010620117,
      "eval_ag_news_emb_cos_sim_sem": 0.006431708522983135,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4985673427581787,
      "eval_ag_news_n_ngrams_match_1": 14.176,
      "eval_ag_news_n_ngrams_match_2": 3.086,
      "eval_ag_news_n_ngrams_match_3": 0.866,
      "eval_ag_news_num_pred_words": 46.326,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.068042835248484,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3571379081987875,
      "eval_ag_news_runtime": 10.3061,
      "eval_ag_news_samples_per_second": 48.515,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3529043986975882,
      "eval_ag_news_token_set_f1_sem": 0.004098616217864085,
      "eval_ag_news_token_set_precision": 0.33827297152610164,
      "eval_ag_news_token_set_recall": 0.38201161879811407,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 181250
    },
    {
      "epoch": 34.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.11496875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2071442026162926,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12430579377141336,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6753900051116943,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00914498565285683,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.21734356880188,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.218,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.918,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.716,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.356,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.961722930966527,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.216366667957075,
      "eval_anthropic_toxic_prompts_runtime": 9.8046,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.996,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35854985319278126,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006436377657620959,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43671869039213135,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3316968474751847,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 181250
    },
    {
      "epoch": 34.8,
      "eval_arxiv_accuracy": 0.349,
      "eval_arxiv_bleu_score": 4.429891031934787,
      "eval_arxiv_bleu_score_sem": 0.1314076453950146,
      "eval_arxiv_emb_cos_sim": 0.7752380967140198,
      "eval_arxiv_emb_cos_sim_sem": 0.007095265871760618,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3631441593170166,
      "eval_arxiv_n_ngrams_match_1": 15.454,
      "eval_arxiv_n_ngrams_match_2": 3.038,
      "eval_arxiv_n_ngrams_match_3": 0.688,
      "eval_arxiv_num_pred_words": 40.742,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.87985113242794,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3671230992011829,
      "eval_arxiv_runtime": 10.0578,
      "eval_arxiv_samples_per_second": 49.713,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3610990832431798,
      "eval_arxiv_token_set_f1_sem": 0.004241424562178454,
      "eval_arxiv_token_set_precision": 0.3151974495500728,
      "eval_arxiv_token_set_recall": 0.4407001617443411,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 181250
    },
    {
      "epoch": 34.8,
      "eval_python_code_alpaca_accuracy": 0.16215625,
      "eval_python_code_alpaca_bleu_score": 4.676850928545665,
      "eval_python_code_alpaca_bleu_score_sem": 0.14177454527915884,
      "eval_python_code_alpaca_emb_cos_sim": 0.7616323232650757,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007801763615181115,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.868537664413452,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.852,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.988,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.004,
      "eval_python_code_alpaca_num_pred_words": 43.496,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.611245809017923,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3383870860287026,
      "eval_python_code_alpaca_runtime": 10.3995,
      "eval_python_code_alpaca_samples_per_second": 48.079,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.4870405216309066,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005466785875407218,
      "eval_python_code_alpaca_token_set_precision": 0.5389805849457641,
      "eval_python_code_alpaca_token_set_recall": 0.4661176088440191,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 181250
    },
    {
      "epoch": 34.8,
      "eval_wikibio_accuracy": 0.32778125,
      "eval_wikibio_bleu_score": 6.141191043301738,
      "eval_wikibio_bleu_score_sem": 0.21718134250764892,
      "eval_wikibio_emb_cos_sim": 0.749444842338562,
      "eval_wikibio_emb_cos_sim_sem": 0.008318791518209903,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.652595281600952,
      "eval_wikibio_n_ngrams_match_1": 10.37,
      "eval_wikibio_n_ngrams_match_2": 3.472,
      "eval_wikibio_n_ngrams_match_3": 1.306,
      "eval_wikibio_num_pred_words": 36.18,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.57464832689593,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36860213465066394,
      "eval_wikibio_runtime": 9.9655,
      "eval_wikibio_samples_per_second": 50.173,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3242861702515772,
      "eval_wikibio_token_set_f1_sem": 0.005358514615146788,
      "eval_wikibio_token_set_precision": 0.33549426531171667,
      "eval_wikibio_token_set_recall": 0.32899107763484203,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 181250
    },
    {
      "epoch": 34.8,
      "eval_nq_accuracy": 0.5345,
      "eval_nq_bleu_score": 11.753791075831005,
      "eval_nq_bleu_score_sem": 0.4679960419998606,
      "eval_nq_emb_cos_sim": 0.8319037556648254,
      "eval_nq_emb_cos_sim_sem": 0.007247675461421091,
      "eval_nq_emb_top1_equal": 0.34375,
      "eval_nq_emb_top1_equal_sem": 0.04214578430296913,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1426806449890137,
      "eval_nq_n_ngrams_match_1": 23.218,
      "eval_nq_n_ngrams_match_2": 8.532,
      "eval_nq_n_ngrams_match_3": 3.858,
      "eval_nq_num_pred_words": 48.954,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.522252168954852,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4518260088785709,
      "eval_nq_runtime": 10.4722,
      "eval_nq_samples_per_second": 47.746,
      "eval_nq_steps_per_second": 0.095,
      "eval_nq_token_set_f1": 0.46623278222821024,
      "eval_nq_token_set_f1_sem": 0.004872212076789529,
      "eval_nq_token_set_precision": 0.42553115163868954,
      "eval_nq_token_set_recall": 0.5245004882965694,
      "eval_nq_true_num_tokens": 64.0,
      "step": 181250
    },
    {
      "epoch": 34.8,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 181260
    },
    {
      "epoch": 34.81,
      "learning_rate": 0.001,
      "loss": 2.5167,
      "step": 181272
    },
    {
      "epoch": 34.81,
      "learning_rate": 0.001,
      "loss": 2.5238,
      "step": 181284
    },
    {
      "epoch": 34.81,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 181296
    },
    {
      "epoch": 34.81,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 181308
    },
    {
      "epoch": 34.82,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 181320
    },
    {
      "epoch": 34.82,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 181332
    },
    {
      "epoch": 34.82,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 181344
    },
    {
      "epoch": 34.82,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 181356
    },
    {
      "epoch": 34.82,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 181368
    },
    {
      "epoch": 34.83,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 181380
    },
    {
      "epoch": 34.83,
      "learning_rate": 0.001,
      "loss": 2.5218,
      "step": 181392
    },
    {
      "epoch": 34.83,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 181404
    },
    {
      "epoch": 34.83,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 181416
    },
    {
      "epoch": 34.84,
      "learning_rate": 0.001,
      "loss": 2.5212,
      "step": 181428
    },
    {
      "epoch": 34.84,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 181440
    },
    {
      "epoch": 34.84,
      "learning_rate": 0.001,
      "loss": 2.5114,
      "step": 181452
    },
    {
      "epoch": 34.84,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 181464
    },
    {
      "epoch": 34.85,
      "learning_rate": 0.001,
      "loss": 2.5266,
      "step": 181476
    },
    {
      "epoch": 34.85,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 181488
    },
    {
      "epoch": 34.85,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 181500
    },
    {
      "epoch": 34.85,
      "learning_rate": 0.001,
      "loss": 2.5219,
      "step": 181512
    },
    {
      "epoch": 34.85,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 181524
    },
    {
      "epoch": 34.86,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 181536
    },
    {
      "epoch": 34.86,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 181548
    },
    {
      "epoch": 34.86,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 181560
    },
    {
      "epoch": 34.86,
      "learning_rate": 0.001,
      "loss": 2.5146,
      "step": 181572
    },
    {
      "epoch": 34.87,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 181584
    },
    {
      "epoch": 34.87,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 181596
    },
    {
      "epoch": 34.87,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 181608
    },
    {
      "epoch": 34.87,
      "learning_rate": 0.001,
      "loss": 2.51,
      "step": 181620
    },
    {
      "epoch": 34.88,
      "learning_rate": 0.001,
      "loss": 2.5196,
      "step": 181632
    },
    {
      "epoch": 34.88,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 181644
    },
    {
      "epoch": 34.88,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 181656
    },
    {
      "epoch": 34.88,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 181668
    },
    {
      "epoch": 34.88,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 181680
    },
    {
      "epoch": 34.89,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 181692
    },
    {
      "epoch": 34.89,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 181704
    },
    {
      "epoch": 34.89,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 181716
    },
    {
      "epoch": 34.89,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 181728
    },
    {
      "epoch": 34.9,
      "learning_rate": 0.001,
      "loss": 2.5251,
      "step": 181740
    },
    {
      "epoch": 34.9,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 181752
    },
    {
      "epoch": 34.9,
      "learning_rate": 0.001,
      "loss": 2.5121,
      "step": 181764
    },
    {
      "epoch": 34.9,
      "learning_rate": 0.001,
      "loss": 2.5196,
      "step": 181776
    },
    {
      "epoch": 34.91,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 181788
    },
    {
      "epoch": 34.91,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 181800
    },
    {
      "epoch": 34.91,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 181812
    },
    {
      "epoch": 34.91,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 181824
    },
    {
      "epoch": 34.91,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 181836
    },
    {
      "epoch": 34.92,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 181848
    },
    {
      "epoch": 34.92,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 181860
    },
    {
      "epoch": 34.92,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 181872
    },
    {
      "epoch": 34.92,
      "eval_ag_news_accuracy": 0.32778125,
      "eval_ag_news_bleu_score": 4.9602767765405025,
      "eval_ag_news_bleu_score_sem": 0.15656288994688125,
      "eval_ag_news_emb_cos_sim": 0.8185689449310303,
      "eval_ag_news_emb_cos_sim_sem": 0.006665678849957266,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.49713397026062,
      "eval_ag_news_n_ngrams_match_1": 14.414,
      "eval_ag_news_n_ngrams_match_2": 3.222,
      "eval_ag_news_n_ngrams_match_3": 0.946,
      "eval_ag_news_num_pred_words": 47.056,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.02067796596033,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35595681717589284,
      "eval_ag_news_runtime": 10.3168,
      "eval_ag_news_samples_per_second": 48.465,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3572590160496617,
      "eval_ag_news_token_set_f1_sem": 0.004391761844769307,
      "eval_ag_news_token_set_precision": 0.34403067172163065,
      "eval_ag_news_token_set_recall": 0.38612976103485114,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 181875
    },
    {
      "epoch": 34.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.1145,
      "eval_anthropic_toxic_prompts_bleu_score": 3.207184484553967,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12655300506028005,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.682706356048584,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009426976386808556,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2380993366241455,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.296,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.96,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.776,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.36,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.485236828927505,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21522310366020758,
      "eval_anthropic_toxic_prompts_runtime": 9.8297,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.866,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3624188922080917,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067680241486070365,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44539145807318636,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3331691113548435,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 181875
    },
    {
      "epoch": 34.92,
      "eval_arxiv_accuracy": 0.35278125,
      "eval_arxiv_bleu_score": 4.539418752884601,
      "eval_arxiv_bleu_score_sem": 0.13076241325439472,
      "eval_arxiv_emb_cos_sim": 0.7864891290664673,
      "eval_arxiv_emb_cos_sim_sem": 0.006223616360994691,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3372349739074707,
      "eval_arxiv_n_ngrams_match_1": 15.746,
      "eval_arxiv_n_ngrams_match_2": 3.096,
      "eval_arxiv_n_ngrams_match_3": 0.708,
      "eval_arxiv_num_pred_words": 41.472,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.14120785722712,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37258023760745,
      "eval_arxiv_runtime": 10.0854,
      "eval_arxiv_samples_per_second": 49.576,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3657891911442688,
      "eval_arxiv_token_set_f1_sem": 0.0041102939618479824,
      "eval_arxiv_token_set_precision": 0.3195849110747359,
      "eval_arxiv_token_set_recall": 0.4405098954874597,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 181875
    },
    {
      "epoch": 34.92,
      "eval_python_code_alpaca_accuracy": 0.1626875,
      "eval_python_code_alpaca_bleu_score": 4.934295158768371,
      "eval_python_code_alpaca_bleu_score_sem": 0.15448891617669977,
      "eval_python_code_alpaca_emb_cos_sim": 0.7720087766647339,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007939166951138185,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.867623805999756,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.168,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.194,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.178,
      "eval_python_code_alpaca_num_pred_words": 45.24,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.595158975523308,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33922738357496696,
      "eval_python_code_alpaca_runtime": 9.7722,
      "eval_python_code_alpaca_samples_per_second": 51.166,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4959238966829353,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005318135492879365,
      "eval_python_code_alpaca_token_set_precision": 0.5570481958937105,
      "eval_python_code_alpaca_token_set_recall": 0.47168103921943716,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 181875
    },
    {
      "epoch": 34.92,
      "eval_wikibio_accuracy": 0.3291875,
      "eval_wikibio_bleu_score": 6.196995746032902,
      "eval_wikibio_bleu_score_sem": 0.2258332542701892,
      "eval_wikibio_emb_cos_sim": 0.7374675869941711,
      "eval_wikibio_emb_cos_sim_sem": 0.00925153025171434,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.61423921585083,
      "eval_wikibio_n_ngrams_match_1": 10.22,
      "eval_wikibio_n_ngrams_match_2": 3.418,
      "eval_wikibio_n_ngrams_match_3": 1.268,
      "eval_wikibio_num_pred_words": 36.276,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.12309251928056,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3555967650424833,
      "eval_wikibio_runtime": 10.0263,
      "eval_wikibio_samples_per_second": 49.869,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3236343940470751,
      "eval_wikibio_token_set_f1_sem": 0.005430149620092611,
      "eval_wikibio_token_set_precision": 0.3309549762143529,
      "eval_wikibio_token_set_recall": 0.33478350801841644,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 181875
    },
    {
      "epoch": 34.92,
      "eval_nq_accuracy": 0.5350625,
      "eval_nq_bleu_score": 12.173006305095079,
      "eval_nq_bleu_score_sem": 0.4833641743203409,
      "eval_nq_emb_cos_sim": 0.8393230438232422,
      "eval_nq_emb_cos_sim_sem": 0.0063963762930408456,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1397511959075928,
      "eval_nq_n_ngrams_match_1": 23.508,
      "eval_nq_n_ngrams_match_2": 8.796,
      "eval_nq_n_ngrams_match_3": 4.08,
      "eval_nq_num_pred_words": 49.38,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.497323197071449,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4554898941582331,
      "eval_nq_runtime": 10.8463,
      "eval_nq_samples_per_second": 46.099,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.4684268262234774,
      "eval_nq_token_set_f1_sem": 0.004860853452374327,
      "eval_nq_token_set_precision": 0.42800084347651546,
      "eval_nq_token_set_recall": 0.5256222382886034,
      "eval_nq_true_num_tokens": 64.0,
      "step": 181875
    },
    {
      "epoch": 34.92,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 181884
    },
    {
      "epoch": 34.93,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 181896
    },
    {
      "epoch": 34.93,
      "learning_rate": 0.001,
      "loss": 2.5164,
      "step": 181908
    },
    {
      "epoch": 34.93,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 181920
    },
    {
      "epoch": 34.93,
      "learning_rate": 0.001,
      "loss": 2.5229,
      "step": 181932
    },
    {
      "epoch": 34.94,
      "learning_rate": 0.001,
      "loss": 2.5277,
      "step": 181944
    },
    {
      "epoch": 34.94,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 181956
    },
    {
      "epoch": 34.94,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 181968
    },
    {
      "epoch": 34.94,
      "learning_rate": 0.001,
      "loss": 2.5242,
      "step": 181980
    },
    {
      "epoch": 34.94,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 181992
    },
    {
      "epoch": 34.95,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 182004
    },
    {
      "epoch": 34.95,
      "learning_rate": 0.001,
      "loss": 2.5139,
      "step": 182016
    },
    {
      "epoch": 34.95,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 182028
    },
    {
      "epoch": 34.95,
      "learning_rate": 0.001,
      "loss": 2.5121,
      "step": 182040
    },
    {
      "epoch": 34.96,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 182052
    },
    {
      "epoch": 34.96,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 182064
    },
    {
      "epoch": 34.96,
      "learning_rate": 0.001,
      "loss": 2.5255,
      "step": 182076
    },
    {
      "epoch": 34.96,
      "learning_rate": 0.001,
      "loss": 2.5172,
      "step": 182088
    },
    {
      "epoch": 34.97,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 182100
    },
    {
      "epoch": 34.97,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 182112
    },
    {
      "epoch": 34.97,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 182124
    },
    {
      "epoch": 34.97,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 182136
    },
    {
      "epoch": 34.97,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 182148
    },
    {
      "epoch": 34.98,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 182160
    },
    {
      "epoch": 34.98,
      "learning_rate": 0.001,
      "loss": 2.517,
      "step": 182172
    },
    {
      "epoch": 34.98,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 182184
    },
    {
      "epoch": 34.98,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 182196
    },
    {
      "epoch": 34.99,
      "learning_rate": 0.001,
      "loss": 2.5254,
      "step": 182208
    },
    {
      "epoch": 34.99,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 182220
    },
    {
      "epoch": 34.99,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 182232
    },
    {
      "epoch": 34.99,
      "learning_rate": 0.001,
      "loss": 2.509,
      "step": 182244
    },
    {
      "epoch": 35.0,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 182256
    },
    {
      "epoch": 35.0,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 182268
    },
    {
      "epoch": 35.0,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 182280
    },
    {
      "epoch": 35.0,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 182292
    },
    {
      "epoch": 35.0,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 182304
    },
    {
      "epoch": 35.01,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 182316
    },
    {
      "epoch": 35.01,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 182328
    },
    {
      "epoch": 35.01,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 182340
    },
    {
      "epoch": 35.01,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 182352
    },
    {
      "epoch": 35.02,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 182364
    },
    {
      "epoch": 35.02,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 182376
    },
    {
      "epoch": 35.02,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 182388
    },
    {
      "epoch": 35.02,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 182400
    },
    {
      "epoch": 35.03,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 182412
    },
    {
      "epoch": 35.03,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 182424
    },
    {
      "epoch": 35.03,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 182436
    },
    {
      "epoch": 35.03,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 182448
    },
    {
      "epoch": 35.03,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 182460
    },
    {
      "epoch": 35.04,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 182472
    },
    {
      "epoch": 35.04,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 182484
    },
    {
      "epoch": 35.04,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 182496
    },
    {
      "epoch": 35.04,
      "eval_ag_news_accuracy": 0.329375,
      "eval_ag_news_bleu_score": 5.013241895185381,
      "eval_ag_news_bleu_score_sem": 0.16971217796875318,
      "eval_ag_news_emb_cos_sim": 0.8215794563293457,
      "eval_ag_news_emb_cos_sim_sem": 0.006490493498488212,
      "eval_ag_news_emb_top1_equal": 0.2890625,
      "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.495105743408203,
      "eval_ag_news_n_ngrams_match_1": 14.534,
      "eval_ag_news_n_ngrams_match_2": 3.246,
      "eval_ag_news_n_ngrams_match_3": 0.944,
      "eval_ag_news_num_pred_words": 46.54,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.95377241298012,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3634637101165499,
      "eval_ag_news_runtime": 10.4592,
      "eval_ag_news_samples_per_second": 47.805,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35981091616312516,
      "eval_ag_news_token_set_f1_sem": 0.004489789468071795,
      "eval_ag_news_token_set_precision": 0.34730553931234,
      "eval_ag_news_token_set_recall": 0.38580613549033604,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 182500
    },
    {
      "epoch": 35.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.1156875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1064264684772644,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11985823692510499,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6748046278953552,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010383487259962153,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2069003582000732,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.29,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.888,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.848,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.70239884626327,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21973808663468183,
      "eval_anthropic_toxic_prompts_runtime": 9.6768,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.67,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35507274023612245,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00657076088930049,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4436292879843375,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.321794609671025,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 182500
    },
    {
      "epoch": 35.04,
      "eval_arxiv_accuracy": 0.3505625,
      "eval_arxiv_bleu_score": 4.392728758966296,
      "eval_arxiv_bleu_score_sem": 0.12192842762713288,
      "eval_arxiv_emb_cos_sim": 0.779847264289856,
      "eval_arxiv_emb_cos_sim_sem": 0.007890435316091972,
      "eval_arxiv_emb_top1_equal": 0.375,
      "eval_arxiv_emb_top1_equal_sem": 0.04295896296396028,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.362316370010376,
      "eval_arxiv_n_ngrams_match_1": 15.514,
      "eval_arxiv_n_ngrams_match_2": 3.05,
      "eval_arxiv_n_ngrams_match_3": 0.624,
      "eval_arxiv_num_pred_words": 41.092,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.855954592497817,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.370596800073038,
      "eval_arxiv_runtime": 10.6079,
      "eval_arxiv_samples_per_second": 47.134,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.3603772299266208,
      "eval_arxiv_token_set_f1_sem": 0.004226949685001713,
      "eval_arxiv_token_set_precision": 0.3158418822046445,
      "eval_arxiv_token_set_recall": 0.4332084098977886,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 182500
    },
    {
      "epoch": 35.04,
      "eval_python_code_alpaca_accuracy": 0.16078125,
      "eval_python_code_alpaca_bleu_score": 4.838796129483069,
      "eval_python_code_alpaca_bleu_score_sem": 0.1462466692264986,
      "eval_python_code_alpaca_emb_cos_sim": 0.7695719003677368,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007619172272175286,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8930504322052,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.094,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.0,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.066,
      "eval_python_code_alpaca_num_pred_words": 43.838,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.048280772926603,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34592551572692587,
      "eval_python_code_alpaca_runtime": 9.7173,
      "eval_python_code_alpaca_samples_per_second": 51.455,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.49022607803130974,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054169132953773656,
      "eval_python_code_alpaca_token_set_precision": 0.5523526157981145,
      "eval_python_code_alpaca_token_set_recall": 0.46187599070232777,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 182500
    },
    {
      "epoch": 35.04,
      "eval_wikibio_accuracy": 0.326875,
      "eval_wikibio_bleu_score": 6.176190741503038,
      "eval_wikibio_bleu_score_sem": 0.22174135625377686,
      "eval_wikibio_emb_cos_sim": 0.7572555541992188,
      "eval_wikibio_emb_cos_sim_sem": 0.008205068125670876,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.68223237991333,
      "eval_wikibio_n_ngrams_match_1": 10.12,
      "eval_wikibio_n_ngrams_match_2": 3.424,
      "eval_wikibio_n_ngrams_match_3": 1.294,
      "eval_wikibio_num_pred_words": 35.302,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.7349987491894,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35591152715847985,
      "eval_wikibio_runtime": 9.7287,
      "eval_wikibio_samples_per_second": 51.394,
      "eval_wikibio_steps_per_second": 0.103,
      "eval_wikibio_token_set_f1": 0.32301632706906014,
      "eval_wikibio_token_set_f1_sem": 0.005584873057938655,
      "eval_wikibio_token_set_precision": 0.3286334257346614,
      "eval_wikibio_token_set_recall": 0.3364926135384171,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 182500
    },
    {
      "epoch": 35.04,
      "eval_nq_accuracy": 0.53509375,
      "eval_nq_bleu_score": 12.156841420824348,
      "eval_nq_bleu_score_sem": 0.4956232439593376,
      "eval_nq_emb_cos_sim": 0.8391368389129639,
      "eval_nq_emb_cos_sim_sem": 0.0072206975271070705,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.135446786880493,
      "eval_nq_n_ngrams_match_1": 23.586,
      "eval_nq_n_ngrams_match_2": 8.792,
      "eval_nq_n_ngrams_match_3": 4.07,
      "eval_nq_num_pred_words": 49.326,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.460825848505804,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45521639355119436,
      "eval_nq_runtime": 10.1561,
      "eval_nq_samples_per_second": 49.232,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.46914761397712873,
      "eval_nq_token_set_f1_sem": 0.005000811715255126,
      "eval_nq_token_set_precision": 0.4286275655618775,
      "eval_nq_token_set_recall": 0.5261076186952025,
      "eval_nq_true_num_tokens": 64.0,
      "step": 182500
    },
    {
      "epoch": 35.04,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 182508
    },
    {
      "epoch": 35.05,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 182520
    },
    {
      "epoch": 35.05,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 182532
    },
    {
      "epoch": 35.05,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 182544
    },
    {
      "epoch": 35.05,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 182556
    },
    {
      "epoch": 35.06,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 182568
    },
    {
      "epoch": 35.06,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 182580
    },
    {
      "epoch": 35.06,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 182592
    },
    {
      "epoch": 35.06,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 182604
    },
    {
      "epoch": 35.06,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 182616
    },
    {
      "epoch": 35.07,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 182628
    },
    {
      "epoch": 35.07,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 182640
    },
    {
      "epoch": 35.07,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 182652
    },
    {
      "epoch": 35.07,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 182664
    },
    {
      "epoch": 35.08,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 182676
    },
    {
      "epoch": 35.08,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 182688
    },
    {
      "epoch": 35.08,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 182700
    },
    {
      "epoch": 35.08,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 182712
    },
    {
      "epoch": 35.09,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 182724
    },
    {
      "epoch": 35.09,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 182736
    },
    {
      "epoch": 35.09,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 182748
    },
    {
      "epoch": 35.09,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 182760
    },
    {
      "epoch": 35.09,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 182772
    },
    {
      "epoch": 35.1,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 182784
    },
    {
      "epoch": 35.1,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 182796
    },
    {
      "epoch": 35.1,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 182808
    },
    {
      "epoch": 35.1,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 182820
    },
    {
      "epoch": 35.11,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 182832
    },
    {
      "epoch": 35.11,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 182844
    },
    {
      "epoch": 35.11,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 182856
    },
    {
      "epoch": 35.11,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 182868
    },
    {
      "epoch": 35.12,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 182880
    },
    {
      "epoch": 35.12,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 182892
    },
    {
      "epoch": 35.12,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 182904
    },
    {
      "epoch": 35.12,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 182916
    },
    {
      "epoch": 35.12,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 182928
    },
    {
      "epoch": 35.13,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 182940
    },
    {
      "epoch": 35.13,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 182952
    },
    {
      "epoch": 35.13,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 182964
    },
    {
      "epoch": 35.13,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 182976
    },
    {
      "epoch": 35.14,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 182988
    },
    {
      "epoch": 35.14,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 183000
    },
    {
      "epoch": 35.14,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 183012
    },
    {
      "epoch": 35.14,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 183024
    },
    {
      "epoch": 35.15,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 183036
    },
    {
      "epoch": 35.15,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 183048
    },
    {
      "epoch": 35.15,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 183060
    },
    {
      "epoch": 35.15,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 183072
    },
    {
      "epoch": 35.15,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 183084
    },
    {
      "epoch": 35.16,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 183096
    },
    {
      "epoch": 35.16,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 183108
    },
    {
      "epoch": 35.16,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 183120
    },
    {
      "epoch": 35.16,
      "eval_ag_news_accuracy": 0.32628125,
      "eval_ag_news_bleu_score": 5.1006828275326095,
      "eval_ag_news_bleu_score_sem": 0.1590254434094689,
      "eval_ag_news_emb_cos_sim": 0.8179818391799927,
      "eval_ag_news_emb_cos_sim_sem": 0.008325306499027376,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5029258728027344,
      "eval_ag_news_n_ngrams_match_1": 14.602,
      "eval_ag_news_n_ngrams_match_2": 3.306,
      "eval_ag_news_n_ngrams_match_3": 1.024,
      "eval_ag_news_num_pred_words": 47.074,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.21248544352249,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3591954083076998,
      "eval_ag_news_runtime": 10.2184,
      "eval_ag_news_samples_per_second": 48.931,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.36251884270403273,
      "eval_ag_news_token_set_f1_sem": 0.00444502850364034,
      "eval_ag_news_token_set_precision": 0.34961905853337977,
      "eval_ag_news_token_set_recall": 0.38992204685461157,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 183125
    },
    {
      "epoch": 35.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.1145625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.074671202479418,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12037268882944767,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.669966459274292,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009768134710624496,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2112460136413574,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.118,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.692,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.11,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.80998054742067,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21178055775520543,
      "eval_anthropic_toxic_prompts_runtime": 9.791,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.067,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3474581877934118,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006651313609107666,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4330612943305178,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.31454256515335116,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 183125
    },
    {
      "epoch": 35.16,
      "eval_arxiv_accuracy": 0.35140625,
      "eval_arxiv_bleu_score": 4.41712638921239,
      "eval_arxiv_bleu_score_sem": 0.11733751366108557,
      "eval_arxiv_emb_cos_sim": 0.7801276445388794,
      "eval_arxiv_emb_cos_sim_sem": 0.006798413105352739,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.349822998046875,
      "eval_arxiv_n_ngrams_match_1": 15.456,
      "eval_arxiv_n_ngrams_match_2": 3.054,
      "eval_arxiv_n_ngrams_match_3": 0.652,
      "eval_arxiv_num_pred_words": 40.494,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.497689050707514,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.370027573022506,
      "eval_arxiv_runtime": 11.0212,
      "eval_arxiv_samples_per_second": 45.367,
      "eval_arxiv_steps_per_second": 0.091,
      "eval_arxiv_token_set_f1": 0.36386754280687483,
      "eval_arxiv_token_set_f1_sem": 0.004277710225902754,
      "eval_arxiv_token_set_precision": 0.3144962474657806,
      "eval_arxiv_token_set_recall": 0.44736028810800754,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 183125
    },
    {
      "epoch": 35.16,
      "eval_python_code_alpaca_accuracy": 0.16290625,
      "eval_python_code_alpaca_bleu_score": 4.710107004037594,
      "eval_python_code_alpaca_bleu_score_sem": 0.13745478952729376,
      "eval_python_code_alpaca_emb_cos_sim": 0.7673460841178894,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007453236178320849,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8564159870147705,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.212,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.016,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.984,
      "eval_python_code_alpaca_num_pred_words": 44.2,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.399056611395718,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34731626653215175,
      "eval_python_code_alpaca_runtime": 9.9789,
      "eval_python_code_alpaca_samples_per_second": 50.106,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.490336283181496,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005118987669945117,
      "eval_python_code_alpaca_token_set_precision": 0.5582848326943095,
      "eval_python_code_alpaca_token_set_recall": 0.4597384872252604,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 183125
    },
    {
      "epoch": 35.16,
      "eval_wikibio_accuracy": 0.3280625,
      "eval_wikibio_bleu_score": 5.945074091605398,
      "eval_wikibio_bleu_score_sem": 0.2225386624734864,
      "eval_wikibio_emb_cos_sim": 0.7341788411140442,
      "eval_wikibio_emb_cos_sim_sem": 0.010411692721239051,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6910667419433594,
      "eval_wikibio_n_ngrams_match_1": 9.96,
      "eval_wikibio_n_ngrams_match_2": 3.326,
      "eval_wikibio_n_ngrams_match_3": 1.23,
      "eval_wikibio_num_pred_words": 35.734,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 40.08758726753929,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3544598126452069,
      "eval_wikibio_runtime": 9.8404,
      "eval_wikibio_samples_per_second": 50.811,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.3165713037120058,
      "eval_wikibio_token_set_f1_sem": 0.005705833550776621,
      "eval_wikibio_token_set_precision": 0.32312242072404657,
      "eval_wikibio_token_set_recall": 0.3292619377690935,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 183125
    },
    {
      "epoch": 35.16,
      "eval_nq_accuracy": 0.53375,
      "eval_nq_bleu_score": 11.87979100615898,
      "eval_nq_bleu_score_sem": 0.47699644080441067,
      "eval_nq_emb_cos_sim": 0.8350943922996521,
      "eval_nq_emb_cos_sim_sem": 0.007061478587234057,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.142853021621704,
      "eval_nq_n_ngrams_match_1": 23.356,
      "eval_nq_n_ngrams_match_2": 8.68,
      "eval_nq_n_ngrams_match_3": 3.97,
      "eval_nq_num_pred_words": 49.004,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.523721332707789,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.452184831196799,
      "eval_nq_runtime": 10.3145,
      "eval_nq_samples_per_second": 48.476,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4675832325458132,
      "eval_nq_token_set_f1_sem": 0.004837720111850746,
      "eval_nq_token_set_precision": 0.4255649798242282,
      "eval_nq_token_set_recall": 0.5265182351335549,
      "eval_nq_true_num_tokens": 64.0,
      "step": 183125
    },
    {
      "epoch": 35.16,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 183132
    },
    {
      "epoch": 35.17,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 183144
    },
    {
      "epoch": 35.17,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 183156
    },
    {
      "epoch": 35.17,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 183168
    },
    {
      "epoch": 35.17,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 183180
    },
    {
      "epoch": 35.18,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 183192
    },
    {
      "epoch": 35.18,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 183204
    },
    {
      "epoch": 35.18,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 183216
    },
    {
      "epoch": 35.18,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 183228
    },
    {
      "epoch": 35.18,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 183240
    },
    {
      "epoch": 35.19,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 183252
    },
    {
      "epoch": 35.19,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 183264
    },
    {
      "epoch": 35.19,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 183276
    },
    {
      "epoch": 35.19,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 183288
    },
    {
      "epoch": 35.2,
      "learning_rate": 0.001,
      "loss": 2.5139,
      "step": 183300
    },
    {
      "epoch": 35.2,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 183312
    },
    {
      "epoch": 35.2,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 183324
    },
    {
      "epoch": 35.2,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 183336
    },
    {
      "epoch": 35.21,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 183348
    },
    {
      "epoch": 35.21,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 183360
    },
    {
      "epoch": 35.21,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 183372
    },
    {
      "epoch": 35.21,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 183384
    },
    {
      "epoch": 35.21,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 183396
    },
    {
      "epoch": 35.22,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 183408
    },
    {
      "epoch": 35.22,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 183420
    },
    {
      "epoch": 35.22,
      "learning_rate": 0.001,
      "loss": 2.5164,
      "step": 183432
    },
    {
      "epoch": 35.22,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 183444
    },
    {
      "epoch": 35.23,
      "learning_rate": 0.001,
      "loss": 2.5106,
      "step": 183456
    },
    {
      "epoch": 35.23,
      "learning_rate": 0.001,
      "loss": 2.5171,
      "step": 183468
    },
    {
      "epoch": 35.23,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 183480
    },
    {
      "epoch": 35.23,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 183492
    },
    {
      "epoch": 35.24,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 183504
    },
    {
      "epoch": 35.24,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 183516
    },
    {
      "epoch": 35.24,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 183528
    },
    {
      "epoch": 35.24,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 183540
    },
    {
      "epoch": 35.24,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 183552
    },
    {
      "epoch": 35.25,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 183564
    },
    {
      "epoch": 35.25,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 183576
    },
    {
      "epoch": 35.25,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 183588
    },
    {
      "epoch": 35.25,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 183600
    },
    {
      "epoch": 35.26,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 183612
    },
    {
      "epoch": 35.26,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 183624
    },
    {
      "epoch": 35.26,
      "learning_rate": 0.001,
      "loss": 2.5097,
      "step": 183636
    },
    {
      "epoch": 35.26,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 183648
    },
    {
      "epoch": 35.26,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 183660
    },
    {
      "epoch": 35.27,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 183672
    },
    {
      "epoch": 35.27,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 183684
    },
    {
      "epoch": 35.27,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 183696
    },
    {
      "epoch": 35.27,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 183708
    },
    {
      "epoch": 35.28,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 183720
    },
    {
      "epoch": 35.28,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 183732
    },
    {
      "epoch": 35.28,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 183744
    },
    {
      "epoch": 35.28,
      "eval_ag_news_accuracy": 0.32928125,
      "eval_ag_news_bleu_score": 5.0350591745732345,
      "eval_ag_news_bleu_score_sem": 0.16332784797736272,
      "eval_ag_news_emb_cos_sim": 0.8193514943122864,
      "eval_ag_news_emb_cos_sim_sem": 0.00701078251392053,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4954283237457275,
      "eval_ag_news_n_ngrams_match_1": 14.414,
      "eval_ag_news_n_ngrams_match_2": 3.292,
      "eval_ag_news_n_ngrams_match_3": 0.93,
      "eval_ag_news_num_pred_words": 46.472,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.96440436674523,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3595132850810327,
      "eval_ag_news_runtime": 10.8547,
      "eval_ag_news_samples_per_second": 46.063,
      "eval_ag_news_steps_per_second": 0.092,
      "eval_ag_news_token_set_f1": 0.3593461006832739,
      "eval_ag_news_token_set_f1_sem": 0.004323817170071455,
      "eval_ag_news_token_set_precision": 0.3478113187945974,
      "eval_ag_news_token_set_recall": 0.385787166846377,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 183750
    },
    {
      "epoch": 35.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.1151875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0494604625327564,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11380085826770461,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6774520874023438,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008644036772534948,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2130579948425293,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.158,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.86,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.68,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.292,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.854976519390522,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21342173094804487,
      "eval_anthropic_toxic_prompts_runtime": 9.8464,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.78,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35934005578858935,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006353934031636077,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43421591494584305,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33496714228531227,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 183750
    },
    {
      "epoch": 35.28,
      "eval_arxiv_accuracy": 0.352125,
      "eval_arxiv_bleu_score": 4.59033915940503,
      "eval_arxiv_bleu_score_sem": 0.1384014495808851,
      "eval_arxiv_emb_cos_sim": 0.7802977561950684,
      "eval_arxiv_emb_cos_sim_sem": 0.0076035937432944465,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3495640754699707,
      "eval_arxiv_n_ngrams_match_1": 15.722,
      "eval_arxiv_n_ngrams_match_2": 3.176,
      "eval_arxiv_n_ngrams_match_3": 0.718,
      "eval_arxiv_num_pred_words": 41.162,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.49031131079562,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37267638148118504,
      "eval_arxiv_runtime": 10.0572,
      "eval_arxiv_samples_per_second": 49.716,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3699975210100182,
      "eval_arxiv_token_set_f1_sem": 0.004402498100178867,
      "eval_arxiv_token_set_precision": 0.32354111806907515,
      "eval_arxiv_token_set_recall": 0.4518563857565413,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 183750
    },
    {
      "epoch": 35.28,
      "eval_python_code_alpaca_accuracy": 0.1634375,
      "eval_python_code_alpaca_bleu_score": 4.707011325951003,
      "eval_python_code_alpaca_bleu_score_sem": 0.15056426823941502,
      "eval_python_code_alpaca_emb_cos_sim": 0.7669054865837097,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00794338953852337,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.854538679122925,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.994,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.014,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.002,
      "eval_python_code_alpaca_num_pred_words": 44.276,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.366423865548054,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3374696312915223,
      "eval_python_code_alpaca_runtime": 9.9467,
      "eval_python_code_alpaca_samples_per_second": 50.268,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.48702681374330326,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005447475876108956,
      "eval_python_code_alpaca_token_set_precision": 0.5451984645091623,
      "eval_python_code_alpaca_token_set_recall": 0.46242735125039724,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 183750
    },
    {
      "epoch": 35.28,
      "eval_wikibio_accuracy": 0.32721875,
      "eval_wikibio_bleu_score": 6.370350377797937,
      "eval_wikibio_bleu_score_sem": 0.24008709359603292,
      "eval_wikibio_emb_cos_sim": 0.7414562702178955,
      "eval_wikibio_emb_cos_sim_sem": 0.009254724192578057,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.651345729827881,
      "eval_wikibio_n_ngrams_match_1": 10.288,
      "eval_wikibio_n_ngrams_match_2": 3.514,
      "eval_wikibio_n_ngrams_match_3": 1.342,
      "eval_wikibio_num_pred_words": 35.738,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.52647740897916,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3627555634804943,
      "eval_wikibio_runtime": 9.8323,
      "eval_wikibio_samples_per_second": 50.853,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.3269860424070768,
      "eval_wikibio_token_set_f1_sem": 0.005464955681678214,
      "eval_wikibio_token_set_precision": 0.33299037134432585,
      "eval_wikibio_token_set_recall": 0.33610429493305144,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 183750
    },
    {
      "epoch": 35.28,
      "eval_nq_accuracy": 0.534875,
      "eval_nq_bleu_score": 12.090322151045951,
      "eval_nq_bleu_score_sem": 0.48937771440493366,
      "eval_nq_emb_cos_sim": 0.838043212890625,
      "eval_nq_emb_cos_sim_sem": 0.007222411718461349,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1370575428009033,
      "eval_nq_n_ngrams_match_1": 23.492,
      "eval_nq_n_ngrams_match_2": 8.674,
      "eval_nq_n_ngrams_match_3": 4.038,
      "eval_nq_num_pred_words": 49.254,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.474465155681264,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4544349350026854,
      "eval_nq_runtime": 10.787,
      "eval_nq_samples_per_second": 46.352,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.4681845048577129,
      "eval_nq_token_set_f1_sem": 0.005061039467446109,
      "eval_nq_token_set_precision": 0.42737175885008416,
      "eval_nq_token_set_recall": 0.5270632761557374,
      "eval_nq_true_num_tokens": 64.0,
      "step": 183750
    },
    {
      "epoch": 35.28,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 183756
    },
    {
      "epoch": 35.29,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 183768
    },
    {
      "epoch": 35.29,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 183780
    },
    {
      "epoch": 35.29,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 183792
    },
    {
      "epoch": 35.29,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 183804
    },
    {
      "epoch": 35.29,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 183816
    },
    {
      "epoch": 35.3,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 183828
    },
    {
      "epoch": 35.3,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 183840
    },
    {
      "epoch": 35.3,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 183852
    },
    {
      "epoch": 35.3,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 183864
    },
    {
      "epoch": 35.31,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 183876
    },
    {
      "epoch": 35.31,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 183888
    },
    {
      "epoch": 35.31,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 183900
    },
    {
      "epoch": 35.31,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 183912
    },
    {
      "epoch": 35.32,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 183924
    },
    {
      "epoch": 35.32,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 183936
    },
    {
      "epoch": 35.32,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 183948
    },
    {
      "epoch": 35.32,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 183960
    },
    {
      "epoch": 35.32,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 183972
    },
    {
      "epoch": 35.33,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 183984
    },
    {
      "epoch": 35.33,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 183996
    },
    {
      "epoch": 35.33,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 184008
    },
    {
      "epoch": 35.33,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 184020
    },
    {
      "epoch": 35.34,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 184032
    },
    {
      "epoch": 35.34,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 184044
    },
    {
      "epoch": 35.34,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 184056
    },
    {
      "epoch": 35.34,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 184068
    },
    {
      "epoch": 35.35,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 184080
    },
    {
      "epoch": 35.35,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 184092
    },
    {
      "epoch": 35.35,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 184104
    },
    {
      "epoch": 35.35,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 184116
    },
    {
      "epoch": 35.35,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 184128
    },
    {
      "epoch": 35.36,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 184140
    },
    {
      "epoch": 35.36,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 184152
    },
    {
      "epoch": 35.36,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 184164
    },
    {
      "epoch": 35.36,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 184176
    },
    {
      "epoch": 35.37,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 184188
    },
    {
      "epoch": 35.37,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 184200
    },
    {
      "epoch": 35.37,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 184212
    },
    {
      "epoch": 35.37,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 184224
    },
    {
      "epoch": 35.38,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 184236
    },
    {
      "epoch": 35.38,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 184248
    },
    {
      "epoch": 35.38,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 184260
    },
    {
      "epoch": 35.38,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 184272
    },
    {
      "epoch": 35.38,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 184284
    },
    {
      "epoch": 35.39,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 184296
    },
    {
      "epoch": 35.39,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 184308
    },
    {
      "epoch": 35.39,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 184320
    },
    {
      "epoch": 35.39,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 184332
    },
    {
      "epoch": 35.4,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 184344
    },
    {
      "epoch": 35.4,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 184356
    },
    {
      "epoch": 35.4,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 184368
    },
    {
      "epoch": 35.4,
      "eval_ag_news_accuracy": 0.3275,
      "eval_ag_news_bleu_score": 5.02233286554183,
      "eval_ag_news_bleu_score_sem": 0.15801342207363395,
      "eval_ag_news_emb_cos_sim": 0.815298318862915,
      "eval_ag_news_emb_cos_sim_sem": 0.007963512683053798,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4890224933624268,
      "eval_ag_news_n_ngrams_match_1": 14.428,
      "eval_ag_news_n_ngrams_match_2": 3.282,
      "eval_ag_news_n_ngrams_match_3": 0.974,
      "eval_ag_news_num_pred_words": 46.588,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.75391488342903,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.357313787349157,
      "eval_ag_news_runtime": 10.1978,
      "eval_ag_news_samples_per_second": 49.03,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3594765000825628,
      "eval_ag_news_token_set_f1_sem": 0.004534021721079182,
      "eval_ag_news_token_set_precision": 0.3451295688016079,
      "eval_ag_news_token_set_recall": 0.39205604054841836,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 184375
    },
    {
      "epoch": 35.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.11596875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.240663514343173,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12656691333969075,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.67909836769104,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0089736218351676,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.232971429824829,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.18,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.806,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.354885410310242,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21533841683439475,
      "eval_anthropic_toxic_prompts_runtime": 9.8612,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.704,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.359768815340191,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006654633228710937,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4406971070022834,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32991240145405437,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 184375
    },
    {
      "epoch": 35.4,
      "eval_arxiv_accuracy": 0.35146875,
      "eval_arxiv_bleu_score": 4.525680555908677,
      "eval_arxiv_bleu_score_sem": 0.13355438782488488,
      "eval_arxiv_emb_cos_sim": 0.7820273041725159,
      "eval_arxiv_emb_cos_sim_sem": 0.006359780316187738,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.33813214302063,
      "eval_arxiv_n_ngrams_match_1": 15.652,
      "eval_arxiv_n_ngrams_match_2": 3.116,
      "eval_arxiv_n_ngrams_match_3": 0.708,
      "eval_arxiv_num_pred_words": 40.882,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.166466608715197,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37272214258905817,
      "eval_arxiv_runtime": 9.9784,
      "eval_arxiv_samples_per_second": 50.108,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.36720845480659464,
      "eval_arxiv_token_set_f1_sem": 0.004259083778969454,
      "eval_arxiv_token_set_precision": 0.3189695068701229,
      "eval_arxiv_token_set_recall": 0.4496757211632873,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 184375
    },
    {
      "epoch": 35.4,
      "eval_python_code_alpaca_accuracy": 0.1609375,
      "eval_python_code_alpaca_bleu_score": 4.6095404941324825,
      "eval_python_code_alpaca_bleu_score_sem": 0.13289128628504768,
      "eval_python_code_alpaca_emb_cos_sim": 0.7650666832923889,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007235849977691245,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.85518217086792,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.892,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.0,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.996,
      "eval_python_code_alpaca_num_pred_words": 44.232,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.3776026122755,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33486988706002796,
      "eval_python_code_alpaca_runtime": 10.4916,
      "eval_python_code_alpaca_samples_per_second": 47.657,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.48908110645062036,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005396519787064682,
      "eval_python_code_alpaca_token_set_precision": 0.5410059954155667,
      "eval_python_code_alpaca_token_set_recall": 0.4670958976323572,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 184375
    },
    {
      "epoch": 35.4,
      "eval_wikibio_accuracy": 0.332375,
      "eval_wikibio_bleu_score": 6.125300218373875,
      "eval_wikibio_bleu_score_sem": 0.2167732904141481,
      "eval_wikibio_emb_cos_sim": 0.7407878041267395,
      "eval_wikibio_emb_cos_sim_sem": 0.009500050461047842,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6234383583068848,
      "eval_wikibio_n_ngrams_match_1": 10.216,
      "eval_wikibio_n_ngrams_match_2": 3.444,
      "eval_wikibio_n_ngrams_match_3": 1.282,
      "eval_wikibio_num_pred_words": 36.448,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.46616871981944,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35692812588875417,
      "eval_wikibio_runtime": 10.0085,
      "eval_wikibio_samples_per_second": 49.957,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3241375753432324,
      "eval_wikibio_token_set_f1_sem": 0.0054715503618298755,
      "eval_wikibio_token_set_precision": 0.33067655336441815,
      "eval_wikibio_token_set_recall": 0.3367545729514021,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 184375
    },
    {
      "epoch": 35.4,
      "eval_nq_accuracy": 0.53440625,
      "eval_nq_bleu_score": 12.143635553439868,
      "eval_nq_bleu_score_sem": 0.4866815622986901,
      "eval_nq_emb_cos_sim": 0.8312341570854187,
      "eval_nq_emb_cos_sim_sem": 0.007448146946363158,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1362648010253906,
      "eval_nq_n_ngrams_match_1": 23.462,
      "eval_nq_n_ngrams_match_2": 8.722,
      "eval_nq_n_ngrams_match_3": 4.032,
      "eval_nq_num_pred_words": 49.122,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.467749755268137,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4550932441780584,
      "eval_nq_runtime": 10.1267,
      "eval_nq_samples_per_second": 49.375,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.46999534890370204,
      "eval_nq_token_set_f1_sem": 0.004968356697842554,
      "eval_nq_token_set_precision": 0.4265905046927454,
      "eval_nq_token_set_recall": 0.5309044132825025,
      "eval_nq_true_num_tokens": 64.0,
      "step": 184375
    },
    {
      "epoch": 35.4,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 184380
    },
    {
      "epoch": 35.41,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 184392
    },
    {
      "epoch": 35.41,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 184404
    },
    {
      "epoch": 35.41,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 184416
    },
    {
      "epoch": 35.41,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 184428
    },
    {
      "epoch": 35.41,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 184440
    },
    {
      "epoch": 35.42,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 184452
    },
    {
      "epoch": 35.42,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 184464
    },
    {
      "epoch": 35.42,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 184476
    },
    {
      "epoch": 35.42,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 184488
    },
    {
      "epoch": 35.43,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 184500
    },
    {
      "epoch": 35.43,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 184512
    },
    {
      "epoch": 35.43,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 184524
    },
    {
      "epoch": 35.43,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 184536
    },
    {
      "epoch": 35.44,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 184548
    },
    {
      "epoch": 35.44,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 184560
    },
    {
      "epoch": 35.44,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 184572
    },
    {
      "epoch": 35.44,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 184584
    },
    {
      "epoch": 35.44,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 184596
    },
    {
      "epoch": 35.45,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 184608
    },
    {
      "epoch": 35.45,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 184620
    },
    {
      "epoch": 35.45,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 184632
    },
    {
      "epoch": 35.45,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 184644
    },
    {
      "epoch": 35.46,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 184656
    },
    {
      "epoch": 35.46,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 184668
    },
    {
      "epoch": 35.46,
      "learning_rate": 0.001,
      "loss": 2.5175,
      "step": 184680
    },
    {
      "epoch": 35.46,
      "learning_rate": 0.001,
      "loss": 2.5097,
      "step": 184692
    },
    {
      "epoch": 35.47,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 184704
    },
    {
      "epoch": 35.47,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 184716
    },
    {
      "epoch": 35.47,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 184728
    },
    {
      "epoch": 35.47,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 184740
    },
    {
      "epoch": 35.47,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 184752
    },
    {
      "epoch": 35.48,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 184764
    },
    {
      "epoch": 35.48,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 184776
    },
    {
      "epoch": 35.48,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 184788
    },
    {
      "epoch": 35.48,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 184800
    },
    {
      "epoch": 35.49,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 184812
    },
    {
      "epoch": 35.49,
      "learning_rate": 0.001,
      "loss": 2.5139,
      "step": 184824
    },
    {
      "epoch": 35.49,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 184836
    },
    {
      "epoch": 35.49,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 184848
    },
    {
      "epoch": 35.5,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 184860
    },
    {
      "epoch": 35.5,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 184872
    },
    {
      "epoch": 35.5,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 184884
    },
    {
      "epoch": 35.5,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 184896
    },
    {
      "epoch": 35.5,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 184908
    },
    {
      "epoch": 35.51,
      "learning_rate": 0.001,
      "loss": 2.51,
      "step": 184920
    },
    {
      "epoch": 35.51,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 184932
    },
    {
      "epoch": 35.51,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 184944
    },
    {
      "epoch": 35.51,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 184956
    },
    {
      "epoch": 35.52,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 184968
    },
    {
      "epoch": 35.52,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 184980
    },
    {
      "epoch": 35.52,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 184992
    },
    {
      "epoch": 35.52,
      "eval_ag_news_accuracy": 0.32815625,
      "eval_ag_news_bleu_score": 5.118036734372337,
      "eval_ag_news_bleu_score_sem": 0.16270480864003506,
      "eval_ag_news_emb_cos_sim": 0.8269373178482056,
      "eval_ag_news_emb_cos_sim_sem": 0.006093308183566626,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4824256896972656,
      "eval_ag_news_n_ngrams_match_1": 14.522,
      "eval_ag_news_n_ngrams_match_2": 3.398,
      "eval_ag_news_n_ngrams_match_3": 0.94,
      "eval_ag_news_num_pred_words": 46.446,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.53855486256443,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36297070002481496,
      "eval_ag_news_runtime": 10.445,
      "eval_ag_news_samples_per_second": 47.87,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.36068298264861176,
      "eval_ag_news_token_set_f1_sem": 0.004496430395808484,
      "eval_ag_news_token_set_precision": 0.3475763128841663,
      "eval_ag_news_token_set_recall": 0.3881483723768435,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 185000
    },
    {
      "epoch": 35.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.115875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.315489923441029,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13205011448401568,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6768086552619934,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00871899625970059,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2032787799835205,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.292,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.99,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.794,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.874,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.61309897753817,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21726773142373662,
      "eval_anthropic_toxic_prompts_runtime": 9.8208,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.912,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36199767072267736,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006773983183416234,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4412133415317718,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33268008669339616,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 185000
    },
    {
      "epoch": 35.52,
      "eval_arxiv_accuracy": 0.35125,
      "eval_arxiv_bleu_score": 4.4631570942365615,
      "eval_arxiv_bleu_score_sem": 0.13282769479159373,
      "eval_arxiv_emb_cos_sim": 0.7777361869812012,
      "eval_arxiv_emb_cos_sim_sem": 0.006621751297096788,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3531644344329834,
      "eval_arxiv_n_ngrams_match_1": 15.354,
      "eval_arxiv_n_ngrams_match_2": 3.01,
      "eval_arxiv_n_ngrams_match_3": 0.678,
      "eval_arxiv_num_pred_words": 40.414,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.593071534325226,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36850896703069475,
      "eval_arxiv_runtime": 10.28,
      "eval_arxiv_samples_per_second": 48.638,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3603864486751362,
      "eval_arxiv_token_set_f1_sem": 0.004118420268863841,
      "eval_arxiv_token_set_precision": 0.31294214276877663,
      "eval_arxiv_token_set_recall": 0.44204019759506297,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 185000
    },
    {
      "epoch": 35.52,
      "eval_python_code_alpaca_accuracy": 0.160875,
      "eval_python_code_alpaca_bleu_score": 4.581966012971516,
      "eval_python_code_alpaca_bleu_score_sem": 0.14082114601207388,
      "eval_python_code_alpaca_emb_cos_sim": 0.7654900550842285,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007408754130984648,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8690898418426514,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.854,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.892,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.95,
      "eval_python_code_alpaca_num_pred_words": 43.172,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.620973026781638,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33969685596733734,
      "eval_python_code_alpaca_runtime": 10.9417,
      "eval_python_code_alpaca_samples_per_second": 45.697,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4823213908180107,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005541826362876887,
      "eval_python_code_alpaca_token_set_precision": 0.5359328765540199,
      "eval_python_code_alpaca_token_set_recall": 0.45833553577148034,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 185000
    },
    {
      "epoch": 35.52,
      "eval_wikibio_accuracy": 0.32946875,
      "eval_wikibio_bleu_score": 6.163874855344158,
      "eval_wikibio_bleu_score_sem": 0.2188857488939065,
      "eval_wikibio_emb_cos_sim": 0.7442362904548645,
      "eval_wikibio_emb_cos_sim_sem": 0.009862612139917728,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.5925068855285645,
      "eval_wikibio_n_ngrams_match_1": 10.242,
      "eval_wikibio_n_ngrams_match_2": 3.472,
      "eval_wikibio_n_ngrams_match_3": 1.23,
      "eval_wikibio_num_pred_words": 35.656,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.32502455833161,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3661457900991682,
      "eval_wikibio_runtime": 9.8336,
      "eval_wikibio_samples_per_second": 50.846,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.32827031754490676,
      "eval_wikibio_token_set_f1_sem": 0.005304280767558676,
      "eval_wikibio_token_set_precision": 0.3328358761016736,
      "eval_wikibio_token_set_recall": 0.34035115610550265,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 185000
    },
    {
      "epoch": 35.52,
      "eval_nq_accuracy": 0.53575,
      "eval_nq_bleu_score": 11.732349230505335,
      "eval_nq_bleu_score_sem": 0.4648652582328716,
      "eval_nq_emb_cos_sim": 0.8390904664993286,
      "eval_nq_emb_cos_sim_sem": 0.006988886567574614,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.132225275039673,
      "eval_nq_n_ngrams_match_1": 23.304,
      "eval_nq_n_ngrams_match_2": 8.6,
      "eval_nq_n_ngrams_match_3": 3.892,
      "eval_nq_num_pred_words": 48.854,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.433613054555531,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4529422530628724,
      "eval_nq_runtime": 10.4006,
      "eval_nq_samples_per_second": 48.074,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4695926394503976,
      "eval_nq_token_set_f1_sem": 0.005010782784351347,
      "eval_nq_token_set_precision": 0.4266045585364064,
      "eval_nq_token_set_recall": 0.5310916596797804,
      "eval_nq_true_num_tokens": 64.0,
      "step": 185000
    },
    {
      "epoch": 35.52,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 185004
    },
    {
      "epoch": 35.53,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 185016
    },
    {
      "epoch": 35.53,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 185028
    },
    {
      "epoch": 35.53,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 185040
    },
    {
      "epoch": 35.53,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 185052
    },
    {
      "epoch": 35.53,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 185064
    },
    {
      "epoch": 35.54,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 185076
    },
    {
      "epoch": 35.54,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 185088
    },
    {
      "epoch": 35.54,
      "learning_rate": 0.001,
      "loss": 2.5179,
      "step": 185100
    },
    {
      "epoch": 35.54,
      "learning_rate": 0.001,
      "loss": 2.5185,
      "step": 185112
    },
    {
      "epoch": 35.55,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 185124
    },
    {
      "epoch": 35.55,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 185136
    },
    {
      "epoch": 35.55,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 185148
    },
    {
      "epoch": 35.55,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 185160
    },
    {
      "epoch": 35.56,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 185172
    },
    {
      "epoch": 35.56,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 185184
    },
    {
      "epoch": 35.56,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 185196
    },
    {
      "epoch": 35.56,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 185208
    },
    {
      "epoch": 35.56,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 185220
    },
    {
      "epoch": 35.57,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 185232
    },
    {
      "epoch": 35.57,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 185244
    },
    {
      "epoch": 35.57,
      "learning_rate": 0.001,
      "loss": 2.5163,
      "step": 185256
    },
    {
      "epoch": 35.57,
      "learning_rate": 0.001,
      "loss": 2.5114,
      "step": 185268
    },
    {
      "epoch": 35.58,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 185280
    },
    {
      "epoch": 35.58,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 185292
    },
    {
      "epoch": 35.58,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 185304
    },
    {
      "epoch": 35.58,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 185316
    },
    {
      "epoch": 35.59,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 185328
    },
    {
      "epoch": 35.59,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 185340
    },
    {
      "epoch": 35.59,
      "learning_rate": 0.001,
      "loss": 2.5097,
      "step": 185352
    },
    {
      "epoch": 35.59,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 185364
    },
    {
      "epoch": 35.59,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 185376
    },
    {
      "epoch": 35.6,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 185388
    },
    {
      "epoch": 35.6,
      "learning_rate": 0.001,
      "loss": 2.5179,
      "step": 185400
    },
    {
      "epoch": 35.6,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 185412
    },
    {
      "epoch": 35.6,
      "learning_rate": 0.001,
      "loss": 2.5215,
      "step": 185424
    },
    {
      "epoch": 35.61,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 185436
    },
    {
      "epoch": 35.61,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 185448
    },
    {
      "epoch": 35.61,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 185460
    },
    {
      "epoch": 35.61,
      "learning_rate": 0.001,
      "loss": 2.5121,
      "step": 185472
    },
    {
      "epoch": 35.62,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 185484
    },
    {
      "epoch": 35.62,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 185496
    },
    {
      "epoch": 35.62,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 185508
    },
    {
      "epoch": 35.62,
      "learning_rate": 0.001,
      "loss": 2.5114,
      "step": 185520
    },
    {
      "epoch": 35.62,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 185532
    },
    {
      "epoch": 35.63,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 185544
    },
    {
      "epoch": 35.63,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 185556
    },
    {
      "epoch": 35.63,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 185568
    },
    {
      "epoch": 35.63,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 185580
    },
    {
      "epoch": 35.64,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 185592
    },
    {
      "epoch": 35.64,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 185604
    },
    {
      "epoch": 35.64,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 185616
    },
    {
      "epoch": 35.64,
      "eval_ag_news_accuracy": 0.32784375,
      "eval_ag_news_bleu_score": 4.969652937819136,
      "eval_ag_news_bleu_score_sem": 0.15497264702360217,
      "eval_ag_news_emb_cos_sim": 0.8214932680130005,
      "eval_ag_news_emb_cos_sim_sem": 0.0066276590998779,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.499420404434204,
      "eval_ag_news_n_ngrams_match_1": 14.482,
      "eval_ag_news_n_ngrams_match_2": 3.21,
      "eval_ag_news_n_ngrams_match_3": 0.892,
      "eval_ag_news_num_pred_words": 46.66,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.09626395075704,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36044240578901077,
      "eval_ag_news_runtime": 10.3922,
      "eval_ag_news_samples_per_second": 48.113,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3616891505286832,
      "eval_ag_news_token_set_f1_sem": 0.004351432475538731,
      "eval_ag_news_token_set_precision": 0.3477839170834988,
      "eval_ag_news_token_set_recall": 0.3910548282202589,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 185625
    },
    {
      "epoch": 35.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.11646875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.330412032871209,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13440409642053766,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6757363080978394,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009961537378817876,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2004940509796143,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.346,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.018,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.116,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.544653512206903,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2154228106433061,
      "eval_anthropic_toxic_prompts_runtime": 9.7583,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.238,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3609275836280132,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006777635325448888,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44523539864379863,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32516753286760575,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 185625
    },
    {
      "epoch": 35.64,
      "eval_arxiv_accuracy": 0.3505625,
      "eval_arxiv_bleu_score": 4.55711877997823,
      "eval_arxiv_bleu_score_sem": 0.13947960612331903,
      "eval_arxiv_emb_cos_sim": 0.777479887008667,
      "eval_arxiv_emb_cos_sim_sem": 0.0074299316052723555,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3542287349700928,
      "eval_arxiv_n_ngrams_match_1": 15.428,
      "eval_arxiv_n_ngrams_match_2": 3.048,
      "eval_arxiv_n_ngrams_match_3": 0.72,
      "eval_arxiv_num_pred_words": 40.398,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.623519355659003,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37035073746534186,
      "eval_arxiv_runtime": 9.9105,
      "eval_arxiv_samples_per_second": 50.451,
      "eval_arxiv_steps_per_second": 0.101,
      "eval_arxiv_token_set_f1": 0.36150438516886774,
      "eval_arxiv_token_set_f1_sem": 0.004284446126411858,
      "eval_arxiv_token_set_precision": 0.31371889064225117,
      "eval_arxiv_token_set_recall": 0.4444403661489109,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 185625
    },
    {
      "epoch": 35.64,
      "eval_python_code_alpaca_accuracy": 0.16165625,
      "eval_python_code_alpaca_bleu_score": 5.122092272355227,
      "eval_python_code_alpaca_bleu_score_sem": 0.16315070949104568,
      "eval_python_code_alpaca_emb_cos_sim": 0.7703523635864258,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007096247103077512,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.857260227203369,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.246,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.25,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.208,
      "eval_python_code_alpaca_num_pred_words": 44.29,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.413751796490857,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34227950149125086,
      "eval_python_code_alpaca_runtime": 9.7595,
      "eval_python_code_alpaca_samples_per_second": 51.232,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.4877912364045303,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005282165823117606,
      "eval_python_code_alpaca_token_set_precision": 0.5613433088662251,
      "eval_python_code_alpaca_token_set_recall": 0.4497674515220422,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 185625
    },
    {
      "epoch": 35.64,
      "eval_wikibio_accuracy": 0.325875,
      "eval_wikibio_bleu_score": 6.1927008019799725,
      "eval_wikibio_bleu_score_sem": 0.21627049446623878,
      "eval_wikibio_emb_cos_sim": 0.7465753555297852,
      "eval_wikibio_emb_cos_sim_sem": 0.009358737346346879,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6515841484069824,
      "eval_wikibio_n_ngrams_match_1": 10.244,
      "eval_wikibio_n_ngrams_match_2": 3.436,
      "eval_wikibio_n_ngrams_match_3": 1.302,
      "eval_wikibio_num_pred_words": 36.276,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.535663932056174,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36233505883510564,
      "eval_wikibio_runtime": 9.9574,
      "eval_wikibio_samples_per_second": 50.214,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.32679210392980357,
      "eval_wikibio_token_set_f1_sem": 0.0052421915807275565,
      "eval_wikibio_token_set_precision": 0.33289120139715345,
      "eval_wikibio_token_set_recall": 0.3387257390609709,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 185625
    },
    {
      "epoch": 35.64,
      "eval_nq_accuracy": 0.5338125,
      "eval_nq_bleu_score": 11.953313877716289,
      "eval_nq_bleu_score_sem": 0.47472028892851664,
      "eval_nq_emb_cos_sim": 0.8371002674102783,
      "eval_nq_emb_cos_sim_sem": 0.0070204506058422255,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1374948024749756,
      "eval_nq_n_ngrams_match_1": 23.538,
      "eval_nq_n_ngrams_match_2": 8.624,
      "eval_nq_n_ngrams_match_3": 3.966,
      "eval_nq_num_pred_words": 49.504,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.478171507813284,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4566189836674177,
      "eval_nq_runtime": 10.8201,
      "eval_nq_samples_per_second": 46.21,
      "eval_nq_steps_per_second": 0.092,
      "eval_nq_token_set_f1": 0.4696661307912864,
      "eval_nq_token_set_f1_sem": 0.00488059178251212,
      "eval_nq_token_set_precision": 0.4286679982147181,
      "eval_nq_token_set_recall": 0.5257964283648926,
      "eval_nq_true_num_tokens": 64.0,
      "step": 185625
    },
    {
      "epoch": 35.64,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 185628
    },
    {
      "epoch": 35.65,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 185640
    },
    {
      "epoch": 35.65,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 185652
    },
    {
      "epoch": 35.65,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 185664
    },
    {
      "epoch": 35.65,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 185676
    },
    {
      "epoch": 35.65,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 185688
    },
    {
      "epoch": 35.66,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 185700
    },
    {
      "epoch": 35.66,
      "learning_rate": 0.001,
      "loss": 2.5097,
      "step": 185712
    },
    {
      "epoch": 35.66,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 185724
    },
    {
      "epoch": 35.66,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 185736
    },
    {
      "epoch": 35.67,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 185748
    },
    {
      "epoch": 35.67,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 185760
    },
    {
      "epoch": 35.67,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 185772
    },
    {
      "epoch": 35.67,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 185784
    },
    {
      "epoch": 35.68,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 185796
    },
    {
      "epoch": 35.68,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 185808
    },
    {
      "epoch": 35.68,
      "learning_rate": 0.001,
      "loss": 2.5119,
      "step": 185820
    },
    {
      "epoch": 35.68,
      "learning_rate": 0.001,
      "loss": 2.5164,
      "step": 185832
    },
    {
      "epoch": 35.68,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 185844
    },
    {
      "epoch": 35.69,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 185856
    },
    {
      "epoch": 35.69,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 185868
    },
    {
      "epoch": 35.69,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 185880
    },
    {
      "epoch": 35.69,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 185892
    },
    {
      "epoch": 35.7,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 185904
    },
    {
      "epoch": 35.7,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 185916
    },
    {
      "epoch": 35.7,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 185928
    },
    {
      "epoch": 35.7,
      "learning_rate": 0.001,
      "loss": 2.5172,
      "step": 185940
    },
    {
      "epoch": 35.71,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 185952
    },
    {
      "epoch": 35.71,
      "learning_rate": 0.001,
      "loss": 2.5227,
      "step": 185964
    },
    {
      "epoch": 35.71,
      "learning_rate": 0.001,
      "loss": 2.5106,
      "step": 185976
    },
    {
      "epoch": 35.71,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 185988
    },
    {
      "epoch": 35.71,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 186000
    },
    {
      "epoch": 35.72,
      "learning_rate": 0.001,
      "loss": 2.5119,
      "step": 186012
    },
    {
      "epoch": 35.72,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 186024
    },
    {
      "epoch": 35.72,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 186036
    },
    {
      "epoch": 35.72,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 186048
    },
    {
      "epoch": 35.73,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 186060
    },
    {
      "epoch": 35.73,
      "learning_rate": 0.001,
      "loss": 2.5207,
      "step": 186072
    },
    {
      "epoch": 35.73,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 186084
    },
    {
      "epoch": 35.73,
      "learning_rate": 0.001,
      "loss": 2.5227,
      "step": 186096
    },
    {
      "epoch": 35.74,
      "learning_rate": 0.001,
      "loss": 2.5217,
      "step": 186108
    },
    {
      "epoch": 35.74,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 186120
    },
    {
      "epoch": 35.74,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 186132
    },
    {
      "epoch": 35.74,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 186144
    },
    {
      "epoch": 35.74,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 186156
    },
    {
      "epoch": 35.75,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 186168
    },
    {
      "epoch": 35.75,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 186180
    },
    {
      "epoch": 35.75,
      "learning_rate": 0.001,
      "loss": 2.5146,
      "step": 186192
    },
    {
      "epoch": 35.75,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 186204
    },
    {
      "epoch": 35.76,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 186216
    },
    {
      "epoch": 35.76,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 186228
    },
    {
      "epoch": 35.76,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 186240
    },
    {
      "epoch": 35.76,
      "eval_ag_news_accuracy": 0.3259375,
      "eval_ag_news_bleu_score": 5.027623565530976,
      "eval_ag_news_bleu_score_sem": 0.15916542707684467,
      "eval_ag_news_emb_cos_sim": 0.8101160526275635,
      "eval_ag_news_emb_cos_sim_sem": 0.007841746486438778,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4996113777160645,
      "eval_ag_news_n_ngrams_match_1": 14.296,
      "eval_ag_news_n_ngrams_match_2": 3.234,
      "eval_ag_news_n_ngrams_match_3": 0.952,
      "eval_ag_news_num_pred_words": 46.12,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.10258505646297,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3566406920462549,
      "eval_ag_news_runtime": 10.2616,
      "eval_ag_news_samples_per_second": 48.725,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3551098809060948,
      "eval_ag_news_token_set_f1_sem": 0.004530021713566501,
      "eval_ag_news_token_set_precision": 0.33988054363720377,
      "eval_ag_news_token_set_recall": 0.38557489037148485,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 186250
    },
    {
      "epoch": 35.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.11596875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2895778757698726,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12464101902062759,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6802198886871338,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009006047249651653,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2073121070861816,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.324,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.038,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.104,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.712572125746945,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21619338215415287,
      "eval_anthropic_toxic_prompts_runtime": 11.82,
      "eval_anthropic_toxic_prompts_samples_per_second": 42.301,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.085,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3683553251888338,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0069471203334324945,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44061712802162034,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.34575818135423747,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 186250
    },
    {
      "epoch": 35.76,
      "eval_arxiv_accuracy": 0.35140625,
      "eval_arxiv_bleu_score": 4.339785672492532,
      "eval_arxiv_bleu_score_sem": 0.125440224720847,
      "eval_arxiv_emb_cos_sim": 0.7803007364273071,
      "eval_arxiv_emb_cos_sim_sem": 0.007555921706361578,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.361938953399658,
      "eval_arxiv_n_ngrams_match_1": 15.396,
      "eval_arxiv_n_ngrams_match_2": 2.982,
      "eval_arxiv_n_ngrams_match_3": 0.624,
      "eval_arxiv_num_pred_words": 40.98,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.84506593082663,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36655014803718444,
      "eval_arxiv_runtime": 10.1295,
      "eval_arxiv_samples_per_second": 49.361,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3596421632721308,
      "eval_arxiv_token_set_f1_sem": 0.004136908934240977,
      "eval_arxiv_token_set_precision": 0.31221322105244453,
      "eval_arxiv_token_set_recall": 0.44213838864441857,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 186250
    },
    {
      "epoch": 35.76,
      "eval_python_code_alpaca_accuracy": 0.16075,
      "eval_python_code_alpaca_bleu_score": 4.822352781871232,
      "eval_python_code_alpaca_bleu_score_sem": 0.1522812408518182,
      "eval_python_code_alpaca_emb_cos_sim": 0.7608482837677002,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007524945748339595,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8470802307128906,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.016,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.02,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.048,
      "eval_python_code_alpaca_num_pred_words": 43.718,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.237379124066262,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3399097134245904,
      "eval_python_code_alpaca_runtime": 10.9552,
      "eval_python_code_alpaca_samples_per_second": 45.64,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4868519106804113,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005324954166205798,
      "eval_python_code_alpaca_token_set_precision": 0.549480602167133,
      "eval_python_code_alpaca_token_set_recall": 0.45788679034845914,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 186250
    },
    {
      "epoch": 35.76,
      "eval_wikibio_accuracy": 0.326,
      "eval_wikibio_bleu_score": 6.177749917935727,
      "eval_wikibio_bleu_score_sem": 0.22831022191086195,
      "eval_wikibio_emb_cos_sim": 0.7269377708435059,
      "eval_wikibio_emb_cos_sim_sem": 0.011205283634384772,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6449012756347656,
      "eval_wikibio_n_ngrams_match_1": 9.854,
      "eval_wikibio_n_ngrams_match_2": 3.342,
      "eval_wikibio_n_ngrams_match_3": 1.254,
      "eval_wikibio_num_pred_words": 34.882,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.27899359566418,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3545169627204853,
      "eval_wikibio_runtime": 9.8457,
      "eval_wikibio_samples_per_second": 50.783,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.31934678746931244,
      "eval_wikibio_token_set_f1_sem": 0.005682141274654576,
      "eval_wikibio_token_set_precision": 0.3211814485452031,
      "eval_wikibio_token_set_recall": 0.33600770913283734,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 186250
    },
    {
      "epoch": 35.76,
      "eval_nq_accuracy": 0.53353125,
      "eval_nq_bleu_score": 12.109511152050722,
      "eval_nq_bleu_score_sem": 0.48191282249374406,
      "eval_nq_emb_cos_sim": 0.8405352830886841,
      "eval_nq_emb_cos_sim_sem": 0.0069766053710452434,
      "eval_nq_emb_top1_equal": 0.359375,
      "eval_nq_emb_top1_equal_sem": 0.04257689651385297,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1373345851898193,
      "eval_nq_n_ngrams_match_1": 23.344,
      "eval_nq_n_ngrams_match_2": 8.656,
      "eval_nq_n_ngrams_match_3": 4.036,
      "eval_nq_num_pred_words": 49.212,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.476813267000946,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45299354503655653,
      "eval_nq_runtime": 10.1289,
      "eval_nq_samples_per_second": 49.364,
      "eval_nq_steps_per_second": 0.099,
      "eval_nq_token_set_f1": 0.46790660942990175,
      "eval_nq_token_set_f1_sem": 0.004916402100974587,
      "eval_nq_token_set_precision": 0.42637261787270686,
      "eval_nq_token_set_recall": 0.5273577632561018,
      "eval_nq_true_num_tokens": 64.0,
      "step": 186250
    },
    {
      "epoch": 35.76,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 186252
    },
    {
      "epoch": 35.76,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 186264
    },
    {
      "epoch": 35.77,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 186276
    },
    {
      "epoch": 35.77,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 186288
    },
    {
      "epoch": 35.77,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 186300
    },
    {
      "epoch": 35.77,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 186312
    },
    {
      "epoch": 35.78,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 186324
    },
    {
      "epoch": 35.78,
      "learning_rate": 0.001,
      "loss": 2.5161,
      "step": 186336
    },
    {
      "epoch": 35.78,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 186348
    },
    {
      "epoch": 35.78,
      "learning_rate": 0.001,
      "loss": 2.5139,
      "step": 186360
    },
    {
      "epoch": 35.79,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 186372
    },
    {
      "epoch": 35.79,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 186384
    },
    {
      "epoch": 35.79,
      "learning_rate": 0.001,
      "loss": 2.5146,
      "step": 186396
    },
    {
      "epoch": 35.79,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 186408
    },
    {
      "epoch": 35.79,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 186420
    },
    {
      "epoch": 35.8,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 186432
    },
    {
      "epoch": 35.8,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 186444
    },
    {
      "epoch": 35.8,
      "learning_rate": 0.001,
      "loss": 2.5161,
      "step": 186456
    },
    {
      "epoch": 35.8,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 186468
    },
    {
      "epoch": 35.81,
      "learning_rate": 0.001,
      "loss": 2.5252,
      "step": 186480
    },
    {
      "epoch": 35.81,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 186492
    },
    {
      "epoch": 35.81,
      "learning_rate": 0.001,
      "loss": 2.5253,
      "step": 186504
    },
    {
      "epoch": 35.81,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 186516
    },
    {
      "epoch": 35.82,
      "learning_rate": 0.001,
      "loss": 2.5208,
      "step": 186528
    },
    {
      "epoch": 35.82,
      "learning_rate": 0.001,
      "loss": 2.5182,
      "step": 186540
    },
    {
      "epoch": 35.82,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 186552
    },
    {
      "epoch": 35.82,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 186564
    },
    {
      "epoch": 35.82,
      "learning_rate": 0.001,
      "loss": 2.5139,
      "step": 186576
    },
    {
      "epoch": 35.83,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 186588
    },
    {
      "epoch": 35.83,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 186600
    },
    {
      "epoch": 35.83,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 186612
    },
    {
      "epoch": 35.83,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 186624
    },
    {
      "epoch": 35.84,
      "learning_rate": 0.001,
      "loss": 2.5213,
      "step": 186636
    },
    {
      "epoch": 35.84,
      "learning_rate": 0.001,
      "loss": 2.5258,
      "step": 186648
    },
    {
      "epoch": 35.84,
      "learning_rate": 0.001,
      "loss": 2.5216,
      "step": 186660
    },
    {
      "epoch": 35.84,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 186672
    },
    {
      "epoch": 35.85,
      "learning_rate": 0.001,
      "loss": 2.509,
      "step": 186684
    },
    {
      "epoch": 35.85,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 186696
    },
    {
      "epoch": 35.85,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 186708
    },
    {
      "epoch": 35.85,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 186720
    },
    {
      "epoch": 35.85,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 186732
    },
    {
      "epoch": 35.86,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 186744
    },
    {
      "epoch": 35.86,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 186756
    },
    {
      "epoch": 35.86,
      "learning_rate": 0.001,
      "loss": 2.517,
      "step": 186768
    },
    {
      "epoch": 35.86,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 186780
    },
    {
      "epoch": 35.87,
      "learning_rate": 0.001,
      "loss": 2.5106,
      "step": 186792
    },
    {
      "epoch": 35.87,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 186804
    },
    {
      "epoch": 35.87,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 186816
    },
    {
      "epoch": 35.87,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 186828
    },
    {
      "epoch": 35.88,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 186840
    },
    {
      "epoch": 35.88,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 186852
    },
    {
      "epoch": 35.88,
      "learning_rate": 0.001,
      "loss": 2.5198,
      "step": 186864
    },
    {
      "epoch": 35.88,
      "eval_ag_news_accuracy": 0.327375,
      "eval_ag_news_bleu_score": 4.85270456280253,
      "eval_ag_news_bleu_score_sem": 0.14786794935780656,
      "eval_ag_news_emb_cos_sim": 0.8168831467628479,
      "eval_ag_news_emb_cos_sim_sem": 0.006567543205922189,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.491161584854126,
      "eval_ag_news_n_ngrams_match_1": 14.208,
      "eval_ag_news_n_ngrams_match_2": 3.102,
      "eval_ag_news_n_ngrams_match_3": 0.87,
      "eval_ag_news_num_pred_words": 46.288,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.824053493783836,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35619190529604994,
      "eval_ag_news_runtime": 10.4026,
      "eval_ag_news_samples_per_second": 48.065,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35646040856758254,
      "eval_ag_news_token_set_f1_sem": 0.004357326085723667,
      "eval_ag_news_token_set_precision": 0.34191584272141656,
      "eval_ag_news_token_set_recall": 0.38774763945336205,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 186875
    },
    {
      "epoch": 35.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.11584375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2547740254674387,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12535173891302592,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6843358874320984,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008709347924114135,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.242084264755249,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.0,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.954,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.58699628349279,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21758626830840716,
      "eval_anthropic_toxic_prompts_runtime": 9.8205,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.914,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35894941563738564,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006725005618732095,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4436699086519642,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32576846770951484,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 186875
    },
    {
      "epoch": 35.88,
      "eval_arxiv_accuracy": 0.351,
      "eval_arxiv_bleu_score": 4.507362704308387,
      "eval_arxiv_bleu_score_sem": 0.131581266589406,
      "eval_arxiv_emb_cos_sim": 0.7721332311630249,
      "eval_arxiv_emb_cos_sim_sem": 0.008831844446588508,
      "eval_arxiv_emb_top1_equal": 0.3515625,
      "eval_arxiv_emb_top1_equal_sem": 0.04236756101983345,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3566012382507324,
      "eval_arxiv_n_ngrams_match_1": 15.184,
      "eval_arxiv_n_ngrams_match_2": 3.02,
      "eval_arxiv_n_ngrams_match_3": 0.72,
      "eval_arxiv_num_pred_words": 40.072,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.69150937060883,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3638956432410369,
      "eval_arxiv_runtime": 10.1365,
      "eval_arxiv_samples_per_second": 49.326,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.3569405259993931,
      "eval_arxiv_token_set_f1_sem": 0.004359564774131006,
      "eval_arxiv_token_set_precision": 0.30743827002505114,
      "eval_arxiv_token_set_recall": 0.4419336365498523,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 186875
    },
    {
      "epoch": 35.88,
      "eval_python_code_alpaca_accuracy": 0.162875,
      "eval_python_code_alpaca_bleu_score": 4.895389372581267,
      "eval_python_code_alpaca_bleu_score_sem": 0.15536732980811435,
      "eval_python_code_alpaca_emb_cos_sim": 0.762061595916748,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008538410843266077,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8486289978027344,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.888,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.038,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.04,
      "eval_python_code_alpaca_num_pred_words": 42.546,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.264096493719666,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3425739597371863,
      "eval_python_code_alpaca_runtime": 10.3168,
      "eval_python_code_alpaca_samples_per_second": 48.465,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.48025247901822893,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005285327007492674,
      "eval_python_code_alpaca_token_set_precision": 0.5378274946053501,
      "eval_python_code_alpaca_token_set_recall": 0.455818739862497,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 186875
    },
    {
      "epoch": 35.88,
      "eval_wikibio_accuracy": 0.3244375,
      "eval_wikibio_bleu_score": 6.360273716083152,
      "eval_wikibio_bleu_score_sem": 0.23722324752144258,
      "eval_wikibio_emb_cos_sim": 0.74052894115448,
      "eval_wikibio_emb_cos_sim_sem": 0.009436331670088957,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6699306964874268,
      "eval_wikibio_n_ngrams_match_1": 10.164,
      "eval_wikibio_n_ngrams_match_2": 3.512,
      "eval_wikibio_n_ngrams_match_3": 1.348,
      "eval_wikibio_num_pred_words": 35.742,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.24918565961401,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3612554319589409,
      "eval_wikibio_runtime": 9.7816,
      "eval_wikibio_samples_per_second": 51.116,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.3226035239129618,
      "eval_wikibio_token_set_f1_sem": 0.005657038901180159,
      "eval_wikibio_token_set_precision": 0.32892531093457045,
      "eval_wikibio_token_set_recall": 0.33506598987721126,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 186875
    },
    {
      "epoch": 35.88,
      "eval_nq_accuracy": 0.534,
      "eval_nq_bleu_score": 12.111168040222063,
      "eval_nq_bleu_score_sem": 0.47666294936925724,
      "eval_nq_emb_cos_sim": 0.8338232636451721,
      "eval_nq_emb_cos_sim_sem": 0.007299238179307122,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.135673761367798,
      "eval_nq_n_ngrams_match_1": 23.444,
      "eval_nq_n_ngrams_match_2": 8.734,
      "eval_nq_n_ngrams_match_3": 4.062,
      "eval_nq_num_pred_words": 48.898,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.462746458071383,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4553753785058464,
      "eval_nq_runtime": 11.0148,
      "eval_nq_samples_per_second": 45.393,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.4683151672091482,
      "eval_nq_token_set_f1_sem": 0.004938079858210904,
      "eval_nq_token_set_precision": 0.4259236173799477,
      "eval_nq_token_set_recall": 0.5291033863196826,
      "eval_nq_true_num_tokens": 64.0,
      "step": 186875
    },
    {
      "epoch": 35.88,
      "learning_rate": 0.001,
      "loss": 2.5236,
      "step": 186876
    },
    {
      "epoch": 35.88,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 186888
    },
    {
      "epoch": 35.89,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 186900
    },
    {
      "epoch": 35.89,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 186912
    },
    {
      "epoch": 35.89,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 186924
    },
    {
      "epoch": 35.89,
      "learning_rate": 0.001,
      "loss": 2.528,
      "step": 186936
    },
    {
      "epoch": 35.9,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 186948
    },
    {
      "epoch": 35.9,
      "learning_rate": 0.001,
      "loss": 2.5203,
      "step": 186960
    },
    {
      "epoch": 35.9,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 186972
    },
    {
      "epoch": 35.9,
      "learning_rate": 0.001,
      "loss": 2.5211,
      "step": 186984
    },
    {
      "epoch": 35.91,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 186996
    },
    {
      "epoch": 35.91,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 187008
    },
    {
      "epoch": 35.91,
      "learning_rate": 0.001,
      "loss": 2.517,
      "step": 187020
    },
    {
      "epoch": 35.91,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 187032
    },
    {
      "epoch": 35.91,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 187044
    },
    {
      "epoch": 35.92,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 187056
    },
    {
      "epoch": 35.92,
      "learning_rate": 0.001,
      "loss": 2.5161,
      "step": 187068
    },
    {
      "epoch": 35.92,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 187080
    },
    {
      "epoch": 35.92,
      "learning_rate": 0.001,
      "loss": 2.5221,
      "step": 187092
    },
    {
      "epoch": 35.93,
      "learning_rate": 0.001,
      "loss": 2.529,
      "step": 187104
    },
    {
      "epoch": 35.93,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 187116
    },
    {
      "epoch": 35.93,
      "learning_rate": 0.001,
      "loss": 2.526,
      "step": 187128
    },
    {
      "epoch": 35.93,
      "learning_rate": 0.001,
      "loss": 2.5097,
      "step": 187140
    },
    {
      "epoch": 35.94,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 187152
    },
    {
      "epoch": 35.94,
      "learning_rate": 0.001,
      "loss": 2.5213,
      "step": 187164
    },
    {
      "epoch": 35.94,
      "learning_rate": 0.001,
      "loss": 2.5153,
      "step": 187176
    },
    {
      "epoch": 35.94,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 187188
    },
    {
      "epoch": 35.94,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 187200
    },
    {
      "epoch": 35.95,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 187212
    },
    {
      "epoch": 35.95,
      "learning_rate": 0.001,
      "loss": 2.5121,
      "step": 187224
    },
    {
      "epoch": 35.95,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 187236
    },
    {
      "epoch": 35.95,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 187248
    },
    {
      "epoch": 35.96,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 187260
    },
    {
      "epoch": 35.96,
      "learning_rate": 0.001,
      "loss": 2.5196,
      "step": 187272
    },
    {
      "epoch": 35.96,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 187284
    },
    {
      "epoch": 35.96,
      "learning_rate": 0.001,
      "loss": 2.5223,
      "step": 187296
    },
    {
      "epoch": 35.97,
      "learning_rate": 0.001,
      "loss": 2.5229,
      "step": 187308
    },
    {
      "epoch": 35.97,
      "learning_rate": 0.001,
      "loss": 2.5307,
      "step": 187320
    },
    {
      "epoch": 35.97,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 187332
    },
    {
      "epoch": 35.97,
      "learning_rate": 0.001,
      "loss": 2.5167,
      "step": 187344
    },
    {
      "epoch": 35.97,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 187356
    },
    {
      "epoch": 35.98,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 187368
    },
    {
      "epoch": 35.98,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 187380
    },
    {
      "epoch": 35.98,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 187392
    },
    {
      "epoch": 35.98,
      "learning_rate": 0.001,
      "loss": 2.5245,
      "step": 187404
    },
    {
      "epoch": 35.99,
      "learning_rate": 0.001,
      "loss": 2.5195,
      "step": 187416
    },
    {
      "epoch": 35.99,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 187428
    },
    {
      "epoch": 35.99,
      "learning_rate": 0.001,
      "loss": 2.5196,
      "step": 187440
    },
    {
      "epoch": 35.99,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 187452
    },
    {
      "epoch": 36.0,
      "learning_rate": 0.001,
      "loss": 2.5213,
      "step": 187464
    },
    {
      "epoch": 36.0,
      "learning_rate": 0.001,
      "loss": 2.5282,
      "step": 187476
    },
    {
      "epoch": 36.0,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 187488
    },
    {
      "epoch": 36.0,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 187500
    },
    {
      "epoch": 36.0,
      "eval_ag_news_accuracy": 0.328625,
      "eval_ag_news_bleu_score": 4.942493222193823,
      "eval_ag_news_bleu_score_sem": 0.14813384873838195,
      "eval_ag_news_emb_cos_sim": 0.8226406574249268,
      "eval_ag_news_emb_cos_sim_sem": 0.006169854104451997,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4921488761901855,
      "eval_ag_news_n_ngrams_match_1": 14.414,
      "eval_ag_news_n_ngrams_match_2": 3.212,
      "eval_ag_news_n_ngrams_match_3": 0.9,
      "eval_ag_news_num_pred_words": 46.91,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.85647640020623,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3612314634303404,
      "eval_ag_news_runtime": 10.4452,
      "eval_ag_news_samples_per_second": 47.869,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35905100444952903,
      "eval_ag_news_token_set_f1_sem": 0.00432735548364872,
      "eval_ag_news_token_set_precision": 0.34633022241723005,
      "eval_ag_news_token_set_recall": 0.3861115140756779,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 187500
    },
    {
      "epoch": 36.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.11640625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2528173812140118,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12860246335865586,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6813318133354187,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009151567805214119,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.185739278793335,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.24,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.75,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.844,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.185161375395612,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2164702131498088,
      "eval_anthropic_toxic_prompts_runtime": 9.9763,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.119,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35754420951045773,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006796119587194955,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4385873553342065,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32701514642386076,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 187500
    },
    {
      "epoch": 36.0,
      "eval_arxiv_accuracy": 0.3544375,
      "eval_arxiv_bleu_score": 4.416894392451446,
      "eval_arxiv_bleu_score_sem": 0.1332919077802899,
      "eval_arxiv_emb_cos_sim": 0.7817720174789429,
      "eval_arxiv_emb_cos_sim_sem": 0.0075332592142083685,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.335984468460083,
      "eval_arxiv_n_ngrams_match_1": 15.264,
      "eval_arxiv_n_ngrams_match_2": 2.982,
      "eval_arxiv_n_ngrams_match_3": 0.676,
      "eval_arxiv_num_pred_words": 40.212,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.106039117438897,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36615235226363946,
      "eval_arxiv_runtime": 10.297,
      "eval_arxiv_samples_per_second": 48.558,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.36031862169647844,
      "eval_arxiv_token_set_f1_sem": 0.004229702155609524,
      "eval_arxiv_token_set_precision": 0.31063931808900175,
      "eval_arxiv_token_set_recall": 0.4487834177617892,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 187500
    },
    {
      "epoch": 36.0,
      "eval_python_code_alpaca_accuracy": 0.1636875,
      "eval_python_code_alpaca_bleu_score": 4.636028564665088,
      "eval_python_code_alpaca_bleu_score_sem": 0.1408475479289397,
      "eval_python_code_alpaca_emb_cos_sim": 0.7712357044219971,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0077602219690730565,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.817457437515259,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.062,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.034,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.0,
      "eval_python_code_alpaca_num_pred_words": 44.14,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.734248663094267,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33944866767766774,
      "eval_python_code_alpaca_runtime": 10.3069,
      "eval_python_code_alpaca_samples_per_second": 48.511,
      "eval_python_code_alpaca_steps_per_second": 0.097,
      "eval_python_code_alpaca_token_set_f1": 0.4891515935263797,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005444230842267233,
      "eval_python_code_alpaca_token_set_precision": 0.5536921405394953,
      "eval_python_code_alpaca_token_set_recall": 0.4588950020998636,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 187500
    },
    {
      "epoch": 36.0,
      "eval_wikibio_accuracy": 0.32615625,
      "eval_wikibio_bleu_score": 5.9866993447316315,
      "eval_wikibio_bleu_score_sem": 0.2187414030705155,
      "eval_wikibio_emb_cos_sim": 0.7428231239318848,
      "eval_wikibio_emb_cos_sim_sem": 0.008714659744115524,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6888046264648438,
      "eval_wikibio_n_ngrams_match_1": 10.026,
      "eval_wikibio_n_ngrams_match_2": 3.382,
      "eval_wikibio_n_ngrams_match_3": 1.234,
      "eval_wikibio_num_pred_words": 36.37,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.99700700601704,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35881949450267603,
      "eval_wikibio_runtime": 9.8381,
      "eval_wikibio_samples_per_second": 50.823,
      "eval_wikibio_steps_per_second": 0.102,
      "eval_wikibio_token_set_f1": 0.32007353674278294,
      "eval_wikibio_token_set_f1_sem": 0.005424940031134632,
      "eval_wikibio_token_set_precision": 0.32790295232509387,
      "eval_wikibio_token_set_recall": 0.3280172324537126,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 187500
    },
    {
      "epoch": 36.0,
      "eval_nq_accuracy": 0.5345625,
      "eval_nq_bleu_score": 11.629154870964845,
      "eval_nq_bleu_score_sem": 0.47471034362165127,
      "eval_nq_emb_cos_sim": 0.8308244943618774,
      "eval_nq_emb_cos_sim_sem": 0.007572436026988261,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1325623989105225,
      "eval_nq_n_ngrams_match_1": 23.33,
      "eval_nq_n_ngrams_match_2": 8.426,
      "eval_nq_n_ngrams_match_3": 3.816,
      "eval_nq_num_pred_words": 49.362,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.436456706138213,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45329224349146213,
      "eval_nq_runtime": 10.6286,
      "eval_nq_samples_per_second": 47.043,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.4640732003403243,
      "eval_nq_token_set_f1_sem": 0.00508825611317226,
      "eval_nq_token_set_precision": 0.4242892311907854,
      "eval_nq_token_set_recall": 0.5206706593580744,
      "eval_nq_true_num_tokens": 64.0,
      "step": 187500
    },
    {
      "epoch": 36.0,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 187512
    },
    {
      "epoch": 36.01,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 187524
    },
    {
      "epoch": 36.01,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 187536
    },
    {
      "epoch": 36.01,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 187548
    },
    {
      "epoch": 36.01,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 187560
    },
    {
      "epoch": 36.02,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 187572
    },
    {
      "epoch": 36.02,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 187584
    },
    {
      "epoch": 36.02,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 187596
    },
    {
      "epoch": 36.02,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 187608
    },
    {
      "epoch": 36.03,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 187620
    },
    {
      "epoch": 36.03,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 187632
    },
    {
      "epoch": 36.03,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 187644
    },
    {
      "epoch": 36.03,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 187656
    },
    {
      "epoch": 36.03,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 187668
    },
    {
      "epoch": 36.04,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 187680
    },
    {
      "epoch": 36.04,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 187692
    },
    {
      "epoch": 36.04,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 187704
    },
    {
      "epoch": 36.04,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 187716
    },
    {
      "epoch": 36.05,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 187728
    },
    {
      "epoch": 36.05,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 187740
    },
    {
      "epoch": 36.05,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 187752
    },
    {
      "epoch": 36.05,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 187764
    },
    {
      "epoch": 36.06,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 187776
    },
    {
      "epoch": 36.06,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 187788
    },
    {
      "epoch": 36.06,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 187800
    },
    {
      "epoch": 36.06,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 187812
    },
    {
      "epoch": 36.06,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 187824
    },
    {
      "epoch": 36.07,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 187836
    },
    {
      "epoch": 36.07,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 187848
    },
    {
      "epoch": 36.07,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 187860
    },
    {
      "epoch": 36.07,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 187872
    },
    {
      "epoch": 36.08,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 187884
    },
    {
      "epoch": 36.08,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 187896
    },
    {
      "epoch": 36.08,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 187908
    },
    {
      "epoch": 36.08,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 187920
    },
    {
      "epoch": 36.09,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 187932
    },
    {
      "epoch": 36.09,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 187944
    },
    {
      "epoch": 36.09,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 187956
    },
    {
      "epoch": 36.09,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 187968
    },
    {
      "epoch": 36.09,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 187980
    },
    {
      "epoch": 36.1,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 187992
    },
    {
      "epoch": 36.1,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 188004
    },
    {
      "epoch": 36.1,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 188016
    },
    {
      "epoch": 36.1,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 188028
    },
    {
      "epoch": 36.11,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 188040
    },
    {
      "epoch": 36.11,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 188052
    },
    {
      "epoch": 36.11,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 188064
    },
    {
      "epoch": 36.11,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 188076
    },
    {
      "epoch": 36.12,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 188088
    },
    {
      "epoch": 36.12,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 188100
    },
    {
      "epoch": 36.12,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 188112
    },
    {
      "epoch": 36.12,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 188124
    },
    {
      "epoch": 36.12,
      "eval_ag_news_accuracy": 0.32796875,
      "eval_ag_news_bleu_score": 4.995996668175454,
      "eval_ag_news_bleu_score_sem": 0.14697083637603395,
      "eval_ag_news_emb_cos_sim": 0.8210964202880859,
      "eval_ag_news_emb_cos_sim_sem": 0.006754837215259523,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.5001513957977295,
      "eval_ag_news_n_ngrams_match_1": 14.512,
      "eval_ag_news_n_ngrams_match_2": 3.316,
      "eval_ag_news_n_ngrams_match_3": 0.92,
      "eval_ag_news_num_pred_words": 46.662,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 33.12046587849239,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36274309202833455,
      "eval_ag_news_runtime": 11.8057,
      "eval_ag_news_samples_per_second": 42.353,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.36278094313381254,
      "eval_ag_news_token_set_f1_sem": 0.004395849795954817,
      "eval_ag_news_token_set_precision": 0.348730167831629,
      "eval_ag_news_token_set_recall": 0.3932009532911382,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 188125
    },
    {
      "epoch": 36.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.11634375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.235428472490034,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12399404198003786,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.680637001991272,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008425199139809752,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.192927837371826,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.97,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.88,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.359644213352187,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2182232073664308,
      "eval_anthropic_toxic_prompts_runtime": 9.9854,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.073,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36169427325117526,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006905703115647926,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4472755540047718,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3277039674454963,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 188125
    },
    {
      "epoch": 36.12,
      "eval_arxiv_accuracy": 0.35090625,
      "eval_arxiv_bleu_score": 4.4174585397988295,
      "eval_arxiv_bleu_score_sem": 0.1278726088860232,
      "eval_arxiv_emb_cos_sim": 0.7842085361480713,
      "eval_arxiv_emb_cos_sim_sem": 0.006283408251052474,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.35910701751709,
      "eval_arxiv_n_ngrams_match_1": 15.288,
      "eval_arxiv_n_ngrams_match_2": 2.976,
      "eval_arxiv_n_ngrams_match_3": 0.65,
      "eval_arxiv_num_pred_words": 40.326,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.76349411117982,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36982629948667634,
      "eval_arxiv_runtime": 10.3197,
      "eval_arxiv_samples_per_second": 48.451,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.358834892982993,
      "eval_arxiv_token_set_f1_sem": 0.0041332722647101135,
      "eval_arxiv_token_set_precision": 0.31028017096869653,
      "eval_arxiv_token_set_recall": 0.4405731224541131,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 188125
    },
    {
      "epoch": 36.12,
      "eval_python_code_alpaca_accuracy": 0.1616875,
      "eval_python_code_alpaca_bleu_score": 4.6461539174688955,
      "eval_python_code_alpaca_bleu_score_sem": 0.14160669208012872,
      "eval_python_code_alpaca_emb_cos_sim": 0.7626280188560486,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008380994339711411,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8524506092071533,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.03,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.01,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.026,
      "eval_python_code_alpaca_num_pred_words": 44.488,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.33019939110918,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33619768393109695,
      "eval_python_code_alpaca_runtime": 12.1766,
      "eval_python_code_alpaca_samples_per_second": 41.062,
      "eval_python_code_alpaca_steps_per_second": 0.082,
      "eval_python_code_alpaca_token_set_f1": 0.48191552425964873,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005149441772851338,
      "eval_python_code_alpaca_token_set_precision": 0.5498746585934386,
      "eval_python_code_alpaca_token_set_recall": 0.448166458051428,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 188125
    },
    {
      "epoch": 36.12,
      "eval_wikibio_accuracy": 0.32828125,
      "eval_wikibio_bleu_score": 5.842899620541409,
      "eval_wikibio_bleu_score_sem": 0.21367021975295392,
      "eval_wikibio_emb_cos_sim": 0.7363656759262085,
      "eval_wikibio_emb_cos_sim_sem": 0.009985318079545641,
      "eval_wikibio_emb_top1_equal": 0.1328125,
      "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.671426296234131,
      "eval_wikibio_n_ngrams_match_1": 9.642,
      "eval_wikibio_n_ngrams_match_2": 3.226,
      "eval_wikibio_n_ngrams_match_3": 1.222,
      "eval_wikibio_num_pred_words": 35.336,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.3079306502913,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34369270005573715,
      "eval_wikibio_runtime": 11.0798,
      "eval_wikibio_samples_per_second": 45.127,
      "eval_wikibio_steps_per_second": 0.09,
      "eval_wikibio_token_set_f1": 0.30747333378935354,
      "eval_wikibio_token_set_f1_sem": 0.005869789107156018,
      "eval_wikibio_token_set_precision": 0.3125237180433861,
      "eval_wikibio_token_set_recall": 0.32153228529965044,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 188125
    },
    {
      "epoch": 36.12,
      "eval_nq_accuracy": 0.535125,
      "eval_nq_bleu_score": 11.948907734875544,
      "eval_nq_bleu_score_sem": 0.49290621709386756,
      "eval_nq_emb_cos_sim": 0.8342015743255615,
      "eval_nq_emb_cos_sim_sem": 0.0070560791808131065,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.133300542831421,
      "eval_nq_n_ngrams_match_1": 23.482,
      "eval_nq_n_ngrams_match_2": 8.656,
      "eval_nq_n_ngrams_match_3": 3.974,
      "eval_nq_num_pred_words": 49.194,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.442686324264292,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4538564307692001,
      "eval_nq_runtime": 29.1975,
      "eval_nq_samples_per_second": 17.125,
      "eval_nq_steps_per_second": 0.034,
      "eval_nq_token_set_f1": 0.4677455973523867,
      "eval_nq_token_set_f1_sem": 0.005077195308921663,
      "eval_nq_token_set_precision": 0.4260586515948146,
      "eval_nq_token_set_recall": 0.5266026745359195,
      "eval_nq_true_num_tokens": 64.0,
      "step": 188125
    },
    {
      "epoch": 36.12,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 188136
    },
    {
      "epoch": 36.13,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 188148
    },
    {
      "epoch": 36.13,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 188160
    },
    {
      "epoch": 36.13,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 188172
    },
    {
      "epoch": 36.13,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 188184
    },
    {
      "epoch": 36.14,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 188196
    },
    {
      "epoch": 36.14,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 188208
    },
    {
      "epoch": 36.14,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 188220
    },
    {
      "epoch": 36.14,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 188232
    },
    {
      "epoch": 36.15,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 188244
    },
    {
      "epoch": 36.15,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 188256
    },
    {
      "epoch": 36.15,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 188268
    },
    {
      "epoch": 36.15,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 188280
    },
    {
      "epoch": 36.15,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 188292
    },
    {
      "epoch": 36.16,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 188304
    },
    {
      "epoch": 36.16,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 188316
    },
    {
      "epoch": 36.16,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 188328
    },
    {
      "epoch": 36.16,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 188340
    },
    {
      "epoch": 36.17,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 188352
    },
    {
      "epoch": 36.17,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 188364
    },
    {
      "epoch": 36.17,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 188376
    },
    {
      "epoch": 36.17,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 188388
    },
    {
      "epoch": 36.18,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 188400
    },
    {
      "epoch": 36.18,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 188412
    },
    {
      "epoch": 36.18,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 188424
    },
    {
      "epoch": 36.18,
      "learning_rate": 0.001,
      "loss": 2.5026,
      "step": 188436
    },
    {
      "epoch": 36.18,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 188448
    },
    {
      "epoch": 36.19,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 188460
    },
    {
      "epoch": 36.19,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 188472
    },
    {
      "epoch": 36.19,
      "learning_rate": 0.001,
      "loss": 2.51,
      "step": 188484
    },
    {
      "epoch": 36.19,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 188496
    },
    {
      "epoch": 36.2,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 188508
    },
    {
      "epoch": 36.2,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 188520
    },
    {
      "epoch": 36.2,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 188532
    },
    {
      "epoch": 36.2,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 188544
    },
    {
      "epoch": 36.21,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 188556
    },
    {
      "epoch": 36.21,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 188568
    },
    {
      "epoch": 36.21,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 188580
    },
    {
      "epoch": 36.21,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 188592
    },
    {
      "epoch": 36.21,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 188604
    },
    {
      "epoch": 36.22,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 188616
    },
    {
      "epoch": 36.22,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 188628
    },
    {
      "epoch": 36.22,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 188640
    },
    {
      "epoch": 36.22,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 188652
    },
    {
      "epoch": 36.23,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 188664
    },
    {
      "epoch": 36.23,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 188676
    },
    {
      "epoch": 36.23,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 188688
    },
    {
      "epoch": 36.23,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 188700
    },
    {
      "epoch": 36.24,
      "learning_rate": 0.001,
      "loss": 2.5134,
      "step": 188712
    },
    {
      "epoch": 36.24,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 188724
    },
    {
      "epoch": 36.24,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 188736
    },
    {
      "epoch": 36.24,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 188748
    },
    {
      "epoch": 36.24,
      "eval_ag_news_accuracy": 0.32878125,
      "eval_ag_news_bleu_score": 4.9238574272885,
      "eval_ag_news_bleu_score_sem": 0.1504874425649532,
      "eval_ag_news_emb_cos_sim": 0.8129588961601257,
      "eval_ag_news_emb_cos_sim_sem": 0.007488922705208432,
      "eval_ag_news_emb_top1_equal": 0.3046875,
      "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4919283390045166,
      "eval_ag_news_n_ngrams_match_1": 14.396,
      "eval_ag_news_n_ngrams_match_2": 3.168,
      "eval_ag_news_n_ngrams_match_3": 0.912,
      "eval_ag_news_num_pred_words": 46.464,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.84923112432568,
      "eval_ag_news_pred_num_tokens": 62.9296875,
      "eval_ag_news_rouge_score": 0.3576292001405687,
      "eval_ag_news_runtime": 25.9604,
      "eval_ag_news_samples_per_second": 19.26,
      "eval_ag_news_steps_per_second": 0.039,
      "eval_ag_news_token_set_f1": 0.3568604281270285,
      "eval_ag_news_token_set_f1_sem": 0.004526422508572503,
      "eval_ag_news_token_set_precision": 0.3437497553910095,
      "eval_ag_news_token_set_recall": 0.38662786716267344,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 188750
    },
    {
      "epoch": 36.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.11684375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1998125126454084,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12435613514143139,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6844603419303894,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007683176871450635,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1939547061920166,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.356,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.966,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.782,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.384671219993038,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22081209373945576,
      "eval_anthropic_toxic_prompts_runtime": 17.5877,
      "eval_anthropic_toxic_prompts_samples_per_second": 28.429,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.057,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3585070034131307,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006516720530655714,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44482047006478886,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3269506017809585,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 188750
    },
    {
      "epoch": 36.24,
      "eval_arxiv_accuracy": 0.35125,
      "eval_arxiv_bleu_score": 4.439760727117616,
      "eval_arxiv_bleu_score_sem": 0.13083885801093512,
      "eval_arxiv_emb_cos_sim": 0.7760308980941772,
      "eval_arxiv_emb_cos_sim_sem": 0.007010127262135088,
      "eval_arxiv_emb_top1_equal": 0.328125,
      "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3531010150909424,
      "eval_arxiv_n_ngrams_match_1": 15.508,
      "eval_arxiv_n_ngrams_match_2": 3.024,
      "eval_arxiv_n_ngrams_match_3": 0.692,
      "eval_arxiv_num_pred_words": 40.642,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.591258238041224,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37046020399137486,
      "eval_arxiv_runtime": 10.3045,
      "eval_arxiv_samples_per_second": 48.522,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3615212256100413,
      "eval_arxiv_token_set_f1_sem": 0.004211571302865289,
      "eval_arxiv_token_set_precision": 0.3155036624832157,
      "eval_arxiv_token_set_recall": 0.4408505464993048,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 188750
    },
    {
      "epoch": 36.24,
      "eval_python_code_alpaca_accuracy": 0.161875,
      "eval_python_code_alpaca_bleu_score": 4.955579151969956,
      "eval_python_code_alpaca_bleu_score_sem": 0.16010913537652974,
      "eval_python_code_alpaca_emb_cos_sim": 0.771697998046875,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007489784739969381,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.858457088470459,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.228,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.092,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.068,
      "eval_python_code_alpaca_num_pred_words": 43.306,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.434606118906725,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3494021093514086,
      "eval_python_code_alpaca_runtime": 10.4348,
      "eval_python_code_alpaca_samples_per_second": 47.917,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.4891469573994531,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005171675634925169,
      "eval_python_code_alpaca_token_set_precision": 0.5624987652833927,
      "eval_python_code_alpaca_token_set_recall": 0.45214848305592037,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 188750
    },
    {
      "epoch": 36.24,
      "eval_wikibio_accuracy": 0.32853125,
      "eval_wikibio_bleu_score": 6.027908547181792,
      "eval_wikibio_bleu_score_sem": 0.22614174946973598,
      "eval_wikibio_emb_cos_sim": 0.7355372905731201,
      "eval_wikibio_emb_cos_sim_sem": 0.010186502768189854,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.676727294921875,
      "eval_wikibio_n_ngrams_match_1": 9.99,
      "eval_wikibio_n_ngrams_match_2": 3.356,
      "eval_wikibio_n_ngrams_match_3": 1.242,
      "eval_wikibio_num_pred_words": 35.632,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.51685520423793,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35418379515377074,
      "eval_wikibio_runtime": 9.997,
      "eval_wikibio_samples_per_second": 50.015,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3175005582880516,
      "eval_wikibio_token_set_f1_sem": 0.005811048717102578,
      "eval_wikibio_token_set_precision": 0.32546487710866784,
      "eval_wikibio_token_set_recall": 0.32806607835011986,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 188750
    },
    {
      "epoch": 36.24,
      "eval_nq_accuracy": 0.5355625,
      "eval_nq_bleu_score": 11.89094613002224,
      "eval_nq_bleu_score_sem": 0.4823002520918112,
      "eval_nq_emb_cos_sim": 0.8321588039398193,
      "eval_nq_emb_cos_sim_sem": 0.008446335125793457,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1361355781555176,
      "eval_nq_n_ngrams_match_1": 23.438,
      "eval_nq_n_ngrams_match_2": 8.616,
      "eval_nq_n_ngrams_match_3": 3.974,
      "eval_nq_num_pred_words": 49.038,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.466655599039921,
      "eval_nq_pred_num_tokens": 62.9921875,
      "eval_nq_rouge_score": 0.45389672735941045,
      "eval_nq_runtime": 10.3738,
      "eval_nq_samples_per_second": 48.198,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.467122751926627,
      "eval_nq_token_set_f1_sem": 0.005178249951963895,
      "eval_nq_token_set_precision": 0.4255444778971428,
      "eval_nq_token_set_recall": 0.5259872792433951,
      "eval_nq_true_num_tokens": 64.0,
      "step": 188750
    },
    {
      "epoch": 36.24,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 188760
    },
    {
      "epoch": 36.25,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 188772
    },
    {
      "epoch": 36.25,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 188784
    },
    {
      "epoch": 36.25,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 188796
    },
    {
      "epoch": 36.25,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 188808
    },
    {
      "epoch": 36.26,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 188820
    },
    {
      "epoch": 36.26,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 188832
    },
    {
      "epoch": 36.26,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 188844
    },
    {
      "epoch": 36.26,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 188856
    },
    {
      "epoch": 36.26,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 188868
    },
    {
      "epoch": 36.27,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 188880
    },
    {
      "epoch": 36.27,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 188892
    },
    {
      "epoch": 36.27,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 188904
    },
    {
      "epoch": 36.27,
      "learning_rate": 0.001,
      "loss": 2.5053,
      "step": 188916
    },
    {
      "epoch": 36.28,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 188928
    },
    {
      "epoch": 36.28,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 188940
    },
    {
      "epoch": 36.28,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 188952
    },
    {
      "epoch": 36.28,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 188964
    },
    {
      "epoch": 36.29,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 188976
    },
    {
      "epoch": 36.29,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 188988
    },
    {
      "epoch": 36.29,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 189000
    },
    {
      "epoch": 36.29,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 189012
    },
    {
      "epoch": 36.29,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 189024
    },
    {
      "epoch": 36.3,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 189036
    },
    {
      "epoch": 36.3,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 189048
    },
    {
      "epoch": 36.3,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 189060
    },
    {
      "epoch": 36.3,
      "learning_rate": 0.001,
      "loss": 2.4993,
      "step": 189072
    },
    {
      "epoch": 36.31,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 189084
    },
    {
      "epoch": 36.31,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 189096
    },
    {
      "epoch": 36.31,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 189108
    },
    {
      "epoch": 36.31,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 189120
    },
    {
      "epoch": 36.32,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 189132
    },
    {
      "epoch": 36.32,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 189144
    },
    {
      "epoch": 36.32,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 189156
    },
    {
      "epoch": 36.32,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 189168
    },
    {
      "epoch": 36.32,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 189180
    },
    {
      "epoch": 36.33,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 189192
    },
    {
      "epoch": 36.33,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 189204
    },
    {
      "epoch": 36.33,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 189216
    },
    {
      "epoch": 36.33,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 189228
    },
    {
      "epoch": 36.34,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 189240
    },
    {
      "epoch": 36.34,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 189252
    },
    {
      "epoch": 36.34,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 189264
    },
    {
      "epoch": 36.34,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 189276
    },
    {
      "epoch": 36.35,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 189288
    },
    {
      "epoch": 36.35,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 189300
    },
    {
      "epoch": 36.35,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 189312
    },
    {
      "epoch": 36.35,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 189324
    },
    {
      "epoch": 36.35,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 189336
    },
    {
      "epoch": 36.36,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 189348
    },
    {
      "epoch": 36.36,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 189360
    },
    {
      "epoch": 36.36,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 189372
    },
    {
      "epoch": 36.36,
      "eval_ag_news_accuracy": 0.3275625,
      "eval_ag_news_bleu_score": 5.012270045746594,
      "eval_ag_news_bleu_score_sem": 0.1620868070547453,
      "eval_ag_news_emb_cos_sim": 0.8226636648178101,
      "eval_ag_news_emb_cos_sim_sem": 0.006504384836339599,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.477799654006958,
      "eval_ag_news_n_ngrams_match_1": 14.522,
      "eval_ag_news_n_ngrams_match_2": 3.214,
      "eval_ag_news_n_ngrams_match_3": 0.882,
      "eval_ag_news_num_pred_words": 47.096,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.38837797609544,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3610714619285975,
      "eval_ag_news_runtime": 10.3524,
      "eval_ag_news_samples_per_second": 48.298,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3594347006204614,
      "eval_ag_news_token_set_f1_sem": 0.004396998571959723,
      "eval_ag_news_token_set_precision": 0.3482525184071479,
      "eval_ag_news_token_set_recall": 0.38506998092606903,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 189375
    },
    {
      "epoch": 36.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.115875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1836737369434283,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12205984187178277,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6894845366477966,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00823496341215526,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.193580150604248,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.952,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.436,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.37553951540394,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21744424596820744,
      "eval_anthropic_toxic_prompts_runtime": 10.1426,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.297,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.099,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35701893695884623,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00641915069211616,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44348514081552026,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32330663031997403,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 189375
    },
    {
      "epoch": 36.36,
      "eval_arxiv_accuracy": 0.349625,
      "eval_arxiv_bleu_score": 4.359747482241014,
      "eval_arxiv_bleu_score_sem": 0.12076438492988241,
      "eval_arxiv_emb_cos_sim": 0.7828538417816162,
      "eval_arxiv_emb_cos_sim_sem": 0.00740250849487094,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.336054801940918,
      "eval_arxiv_n_ngrams_match_1": 15.296,
      "eval_arxiv_n_ngrams_match_2": 3.092,
      "eval_arxiv_n_ngrams_match_3": 0.674,
      "eval_arxiv_num_pred_words": 40.256,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.108015982521596,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36834990281488467,
      "eval_arxiv_runtime": 10.1811,
      "eval_arxiv_samples_per_second": 49.111,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.36213893966739696,
      "eval_arxiv_token_set_f1_sem": 0.004160231156175127,
      "eval_arxiv_token_set_precision": 0.313709864831142,
      "eval_arxiv_token_set_recall": 0.4500851986397599,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 189375
    },
    {
      "epoch": 36.36,
      "eval_python_code_alpaca_accuracy": 0.16225,
      "eval_python_code_alpaca_bleu_score": 4.654539109037991,
      "eval_python_code_alpaca_bleu_score_sem": 0.14086512537791676,
      "eval_python_code_alpaca_emb_cos_sim": 0.772710919380188,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006816303125456598,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8568575382232666,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.14,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.97,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.01,
      "eval_python_code_alpaca_num_pred_words": 44.272,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.406740882243863,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34200476896428433,
      "eval_python_code_alpaca_runtime": 9.7461,
      "eval_python_code_alpaca_samples_per_second": 51.303,
      "eval_python_code_alpaca_steps_per_second": 0.103,
      "eval_python_code_alpaca_token_set_f1": 0.48530332167581197,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005289122832711609,
      "eval_python_code_alpaca_token_set_precision": 0.5561826087093645,
      "eval_python_code_alpaca_token_set_recall": 0.45205733189991626,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 189375
    },
    {
      "epoch": 36.36,
      "eval_wikibio_accuracy": 0.3265625,
      "eval_wikibio_bleu_score": 6.145622930661852,
      "eval_wikibio_bleu_score_sem": 0.2320444008990757,
      "eval_wikibio_emb_cos_sim": 0.7508326768875122,
      "eval_wikibio_emb_cos_sim_sem": 0.009131730469000156,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.660064697265625,
      "eval_wikibio_n_ngrams_match_1": 10.158,
      "eval_wikibio_n_ngrams_match_2": 3.466,
      "eval_wikibio_n_ngrams_match_3": 1.3,
      "eval_wikibio_num_pred_words": 36.092,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.863857175288175,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36010979182625363,
      "eval_wikibio_runtime": 27.5499,
      "eval_wikibio_samples_per_second": 18.149,
      "eval_wikibio_steps_per_second": 0.036,
      "eval_wikibio_token_set_f1": 0.32075633576443446,
      "eval_wikibio_token_set_f1_sem": 0.005492914719916766,
      "eval_wikibio_token_set_precision": 0.32943771467184196,
      "eval_wikibio_token_set_recall": 0.326437197320436,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 189375
    },
    {
      "epoch": 36.36,
      "eval_nq_accuracy": 0.53471875,
      "eval_nq_bleu_score": 12.058839815591211,
      "eval_nq_bleu_score_sem": 0.4821687388836942,
      "eval_nq_emb_cos_sim": 0.8358081579208374,
      "eval_nq_emb_cos_sim_sem": 0.008123797806485237,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.135204315185547,
      "eval_nq_n_ngrams_match_1": 23.682,
      "eval_nq_n_ngrams_match_2": 8.72,
      "eval_nq_n_ngrams_match_3": 4.04,
      "eval_nq_num_pred_words": 49.268,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.458774586418219,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4580690173697595,
      "eval_nq_runtime": 27.5464,
      "eval_nq_samples_per_second": 18.151,
      "eval_nq_steps_per_second": 0.036,
      "eval_nq_token_set_f1": 0.4711447883237337,
      "eval_nq_token_set_f1_sem": 0.004979566751032423,
      "eval_nq_token_set_precision": 0.4315833516092539,
      "eval_nq_token_set_recall": 0.5267383484018845,
      "eval_nq_true_num_tokens": 64.0,
      "step": 189375
    },
    {
      "epoch": 36.36,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 189384
    },
    {
      "epoch": 36.37,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 189396
    },
    {
      "epoch": 36.37,
      "learning_rate": 0.001,
      "loss": 2.5149,
      "step": 189408
    },
    {
      "epoch": 36.37,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 189420
    },
    {
      "epoch": 36.37,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 189432
    },
    {
      "epoch": 36.38,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 189444
    },
    {
      "epoch": 36.38,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 189456
    },
    {
      "epoch": 36.38,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 189468
    },
    {
      "epoch": 36.38,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 189480
    },
    {
      "epoch": 36.38,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 189492
    },
    {
      "epoch": 36.39,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 189504
    },
    {
      "epoch": 36.39,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 189516
    },
    {
      "epoch": 36.39,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 189528
    },
    {
      "epoch": 36.39,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 189540
    },
    {
      "epoch": 36.4,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 189552
    },
    {
      "epoch": 36.4,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 189564
    },
    {
      "epoch": 36.4,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 189576
    },
    {
      "epoch": 36.4,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 189588
    },
    {
      "epoch": 36.41,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 189600
    },
    {
      "epoch": 36.41,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 189612
    },
    {
      "epoch": 36.41,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 189624
    },
    {
      "epoch": 36.41,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 189636
    },
    {
      "epoch": 36.41,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 189648
    },
    {
      "epoch": 36.42,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 189660
    },
    {
      "epoch": 36.42,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 189672
    },
    {
      "epoch": 36.42,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 189684
    },
    {
      "epoch": 36.42,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 189696
    },
    {
      "epoch": 36.43,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 189708
    },
    {
      "epoch": 36.43,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 189720
    },
    {
      "epoch": 36.43,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 189732
    },
    {
      "epoch": 36.43,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 189744
    },
    {
      "epoch": 36.44,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 189756
    },
    {
      "epoch": 36.44,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 189768
    },
    {
      "epoch": 36.44,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 189780
    },
    {
      "epoch": 36.44,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 189792
    },
    {
      "epoch": 36.44,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 189804
    },
    {
      "epoch": 36.45,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 189816
    },
    {
      "epoch": 36.45,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 189828
    },
    {
      "epoch": 36.45,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 189840
    },
    {
      "epoch": 36.45,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 189852
    },
    {
      "epoch": 36.46,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 189864
    },
    {
      "epoch": 36.46,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 189876
    },
    {
      "epoch": 36.46,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 189888
    },
    {
      "epoch": 36.46,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 189900
    },
    {
      "epoch": 36.47,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 189912
    },
    {
      "epoch": 36.47,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 189924
    },
    {
      "epoch": 36.47,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 189936
    },
    {
      "epoch": 36.47,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 189948
    },
    {
      "epoch": 36.47,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 189960
    },
    {
      "epoch": 36.48,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 189972
    },
    {
      "epoch": 36.48,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 189984
    },
    {
      "epoch": 36.48,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 189996
    },
    {
      "epoch": 36.48,
      "eval_ag_news_accuracy": 0.32859375,
      "eval_ag_news_bleu_score": 4.889326613839696,
      "eval_ag_news_bleu_score_sem": 0.150296906940068,
      "eval_ag_news_emb_cos_sim": 0.8097676038742065,
      "eval_ag_news_emb_cos_sim_sem": 0.0081402693217189,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.480982780456543,
      "eval_ag_news_n_ngrams_match_1": 14.2,
      "eval_ag_news_n_ngrams_match_2": 3.198,
      "eval_ag_news_n_ngrams_match_3": 0.874,
      "eval_ag_news_num_pred_words": 46.69,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.491638537213106,
      "eval_ag_news_pred_num_tokens": 62.9921875,
      "eval_ag_news_rouge_score": 0.3529091680922551,
      "eval_ag_news_runtime": 10.4144,
      "eval_ag_news_samples_per_second": 48.011,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35523733325441376,
      "eval_ag_news_token_set_f1_sem": 0.004663478066243663,
      "eval_ag_news_token_set_precision": 0.34007220103033126,
      "eval_ag_news_token_set_recall": 0.38890395718823123,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 190000
    },
    {
      "epoch": 36.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.1159375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.18309835174893,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12002750812927747,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6776454448699951,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008478981811232193,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1892497539520264,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.318,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.986,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.748,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.496,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.27021198028778,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21491906455286766,
      "eval_anthropic_toxic_prompts_runtime": 9.9927,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.036,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36750045574153034,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006832478995432572,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4443156260470236,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3403265840379068,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 190000
    },
    {
      "epoch": 36.48,
      "eval_arxiv_accuracy": 0.3525625,
      "eval_arxiv_bleu_score": 4.503671129384424,
      "eval_arxiv_bleu_score_sem": 0.13285956255150483,
      "eval_arxiv_emb_cos_sim": 0.7726466655731201,
      "eval_arxiv_emb_cos_sim_sem": 0.00748045843264733,
      "eval_arxiv_emb_top1_equal": 0.34375,
      "eval_arxiv_emb_top1_equal_sem": 0.04214578430296913,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3361704349517822,
      "eval_arxiv_n_ngrams_match_1": 15.408,
      "eval_arxiv_n_ngrams_match_2": 3.086,
      "eval_arxiv_n_ngrams_match_3": 0.726,
      "eval_arxiv_num_pred_words": 40.762,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.111266384962363,
      "eval_arxiv_pred_num_tokens": 62.8984375,
      "eval_arxiv_rouge_score": 0.36849865990809466,
      "eval_arxiv_runtime": 10.474,
      "eval_arxiv_samples_per_second": 47.737,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3635061651643006,
      "eval_arxiv_token_set_f1_sem": 0.004016926991203057,
      "eval_arxiv_token_set_precision": 0.31478739769375236,
      "eval_arxiv_token_set_recall": 0.4511008585885091,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 190000
    },
    {
      "epoch": 36.48,
      "eval_python_code_alpaca_accuracy": 0.1620625,
      "eval_python_code_alpaca_bleu_score": 4.821750766070412,
      "eval_python_code_alpaca_bleu_score_sem": 0.14120862933157688,
      "eval_python_code_alpaca_emb_cos_sim": 0.7656441926956177,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007524264813318341,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8377509117126465,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.196,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.066,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.084,
      "eval_python_code_alpaca_num_pred_words": 44.234,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.07731392608985,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34422328335384295,
      "eval_python_code_alpaca_runtime": 9.8998,
      "eval_python_code_alpaca_samples_per_second": 50.506,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.49656639966862104,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0053701727727626936,
      "eval_python_code_alpaca_token_set_precision": 0.5613374215543115,
      "eval_python_code_alpaca_token_set_recall": 0.4661012946282913,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 190000
    },
    {
      "epoch": 36.48,
      "eval_wikibio_accuracy": 0.33040625,
      "eval_wikibio_bleu_score": 5.821449665588072,
      "eval_wikibio_bleu_score_sem": 0.20508171945609774,
      "eval_wikibio_emb_cos_sim": 0.7408549189567566,
      "eval_wikibio_emb_cos_sim_sem": 0.009548500239044638,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.640425443649292,
      "eval_wikibio_n_ngrams_match_1": 9.918,
      "eval_wikibio_n_ngrams_match_2": 3.336,
      "eval_wikibio_n_ngrams_match_3": 1.192,
      "eval_wikibio_num_pred_words": 35.55,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.10804610326765,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34884211846331337,
      "eval_wikibio_runtime": 10.2106,
      "eval_wikibio_samples_per_second": 48.969,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.3148316281668783,
      "eval_wikibio_token_set_f1_sem": 0.005733429946334979,
      "eval_wikibio_token_set_precision": 0.32222579695056164,
      "eval_wikibio_token_set_recall": 0.32237351169818707,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 190000
    },
    {
      "epoch": 36.48,
      "eval_nq_accuracy": 0.53625,
      "eval_nq_bleu_score": 12.089524617474838,
      "eval_nq_bleu_score_sem": 0.4878974414477028,
      "eval_nq_emb_cos_sim": 0.8411483764648438,
      "eval_nq_emb_cos_sim_sem": 0.006586143795550161,
      "eval_nq_emb_top1_equal": 0.34375,
      "eval_nq_emb_top1_equal_sem": 0.04214578430296913,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1337051391601562,
      "eval_nq_n_ngrams_match_1": 23.602,
      "eval_nq_n_ngrams_match_2": 8.768,
      "eval_nq_n_ngrams_match_3": 4.068,
      "eval_nq_num_pred_words": 49.082,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.44610289527519,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45755382537163897,
      "eval_nq_runtime": 11.0329,
      "eval_nq_samples_per_second": 45.319,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.471376407837602,
      "eval_nq_token_set_f1_sem": 0.004911739546606772,
      "eval_nq_token_set_precision": 0.4310105004624452,
      "eval_nq_token_set_recall": 0.5276862723635517,
      "eval_nq_true_num_tokens": 64.0,
      "step": 190000
    },
    {
      "epoch": 36.48,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 190008
    },
    {
      "epoch": 36.49,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 190020
    },
    {
      "epoch": 36.49,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 190032
    },
    {
      "epoch": 36.49,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 190044
    },
    {
      "epoch": 36.49,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 190056
    },
    {
      "epoch": 36.5,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 190068
    },
    {
      "epoch": 36.5,
      "learning_rate": 0.001,
      "loss": 2.4993,
      "step": 190080
    },
    {
      "epoch": 36.5,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 190092
    },
    {
      "epoch": 36.5,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 190104
    },
    {
      "epoch": 36.5,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 190116
    },
    {
      "epoch": 36.51,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 190128
    },
    {
      "epoch": 36.51,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 190140
    },
    {
      "epoch": 36.51,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 190152
    },
    {
      "epoch": 36.51,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 190164
    },
    {
      "epoch": 36.52,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 190176
    },
    {
      "epoch": 36.52,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 190188
    },
    {
      "epoch": 36.52,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 190200
    },
    {
      "epoch": 36.52,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 190212
    },
    {
      "epoch": 36.53,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 190224
    },
    {
      "epoch": 36.53,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 190236
    },
    {
      "epoch": 36.53,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 190248
    },
    {
      "epoch": 36.53,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 190260
    },
    {
      "epoch": 36.53,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 190272
    },
    {
      "epoch": 36.54,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 190284
    },
    {
      "epoch": 36.54,
      "learning_rate": 0.001,
      "loss": 2.5166,
      "step": 190296
    },
    {
      "epoch": 36.54,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 190308
    },
    {
      "epoch": 36.54,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 190320
    },
    {
      "epoch": 36.55,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 190332
    },
    {
      "epoch": 36.55,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 190344
    },
    {
      "epoch": 36.55,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 190356
    },
    {
      "epoch": 36.55,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 190368
    },
    {
      "epoch": 36.56,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 190380
    },
    {
      "epoch": 36.56,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 190392
    },
    {
      "epoch": 36.56,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 190404
    },
    {
      "epoch": 36.56,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 190416
    },
    {
      "epoch": 36.56,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 190428
    },
    {
      "epoch": 36.57,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 190440
    },
    {
      "epoch": 36.57,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 190452
    },
    {
      "epoch": 36.57,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 190464
    },
    {
      "epoch": 36.57,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 190476
    },
    {
      "epoch": 36.58,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 190488
    },
    {
      "epoch": 36.58,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 190500
    },
    {
      "epoch": 36.58,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 190512
    },
    {
      "epoch": 36.58,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 190524
    },
    {
      "epoch": 36.59,
      "learning_rate": 0.001,
      "loss": 2.5114,
      "step": 190536
    },
    {
      "epoch": 36.59,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 190548
    },
    {
      "epoch": 36.59,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 190560
    },
    {
      "epoch": 36.59,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 190572
    },
    {
      "epoch": 36.59,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 190584
    },
    {
      "epoch": 36.6,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 190596
    },
    {
      "epoch": 36.6,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 190608
    },
    {
      "epoch": 36.6,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 190620
    },
    {
      "epoch": 36.6,
      "eval_ag_news_accuracy": 0.32790625,
      "eval_ag_news_bleu_score": 4.970183054805225,
      "eval_ag_news_bleu_score_sem": 0.15658988378414648,
      "eval_ag_news_emb_cos_sim": 0.8129646182060242,
      "eval_ag_news_emb_cos_sim_sem": 0.008132920623554321,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.488006830215454,
      "eval_ag_news_n_ngrams_match_1": 14.302,
      "eval_ag_news_n_ngrams_match_2": 3.23,
      "eval_ag_news_n_ngrams_match_3": 0.95,
      "eval_ag_news_num_pred_words": 46.01,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.720664827449426,
      "eval_ag_news_pred_num_tokens": 62.9921875,
      "eval_ag_news_rouge_score": 0.35718559401245165,
      "eval_ag_news_runtime": 10.6665,
      "eval_ag_news_samples_per_second": 46.876,
      "eval_ag_news_steps_per_second": 0.094,
      "eval_ag_news_token_set_f1": 0.35591973392216913,
      "eval_ag_news_token_set_f1_sem": 0.004579572732854059,
      "eval_ag_news_token_set_precision": 0.3409783392758831,
      "eval_ag_news_token_set_recall": 0.388332797829414,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 190625
    },
    {
      "epoch": 36.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.11740625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2230847961552382,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12441745931148382,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6794794797897339,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007912346408961574,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.187584161758423,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.932,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.66,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.229821351172216,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21566638527432538,
      "eval_anthropic_toxic_prompts_runtime": 10.3769,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.184,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35942395714194436,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00662595840290432,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.438351593603664,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3316988295536793,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 190625
    },
    {
      "epoch": 36.6,
      "eval_arxiv_accuracy": 0.3519375,
      "eval_arxiv_bleu_score": 4.529135521041015,
      "eval_arxiv_bleu_score_sem": 0.13090129068801212,
      "eval_arxiv_emb_cos_sim": 0.7814404368400574,
      "eval_arxiv_emb_cos_sim_sem": 0.006839354158869517,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3420345783233643,
      "eval_arxiv_n_ngrams_match_1": 15.472,
      "eval_arxiv_n_ngrams_match_2": 3.046,
      "eval_arxiv_n_ngrams_match_3": 0.722,
      "eval_arxiv_num_pred_words": 40.864,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.276599175202673,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3688866299810071,
      "eval_arxiv_runtime": 10.2496,
      "eval_arxiv_samples_per_second": 48.783,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.36288703756801005,
      "eval_arxiv_token_set_f1_sem": 0.004146852334698222,
      "eval_arxiv_token_set_precision": 0.3150340745764005,
      "eval_arxiv_token_set_recall": 0.44637937930033084,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 190625
    },
    {
      "epoch": 36.6,
      "eval_python_code_alpaca_accuracy": 0.1643125,
      "eval_python_code_alpaca_bleu_score": 4.845545085774176,
      "eval_python_code_alpaca_bleu_score_sem": 0.15143531997666987,
      "eval_python_code_alpaca_emb_cos_sim": 0.7690310478210449,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007414402467229616,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8400282859802246,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.096,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.102,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.118,
      "eval_python_code_alpaca_num_pred_words": 44.636,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.11624968019859,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33805627720845466,
      "eval_python_code_alpaca_runtime": 10.1755,
      "eval_python_code_alpaca_samples_per_second": 49.138,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.48657765601849395,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00544803390017607,
      "eval_python_code_alpaca_token_set_precision": 0.5525007636804988,
      "eval_python_code_alpaca_token_set_recall": 0.45726584468655856,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 190625
    },
    {
      "epoch": 36.6,
      "eval_wikibio_accuracy": 0.33078125,
      "eval_wikibio_bleu_score": 6.05024393516826,
      "eval_wikibio_bleu_score_sem": 0.22924731291614986,
      "eval_wikibio_emb_cos_sim": 0.7359403967857361,
      "eval_wikibio_emb_cos_sim_sem": 0.010322207717860837,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6080996990203857,
      "eval_wikibio_n_ngrams_match_1": 9.936,
      "eval_wikibio_n_ngrams_match_2": 3.42,
      "eval_wikibio_n_ngrams_match_3": 1.242,
      "eval_wikibio_num_pred_words": 35.516,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.89587289105412,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3531432564053888,
      "eval_wikibio_runtime": 10.6937,
      "eval_wikibio_samples_per_second": 46.756,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.31618956456395764,
      "eval_wikibio_token_set_f1_sem": 0.0057675730544714515,
      "eval_wikibio_token_set_precision": 0.3214490436287887,
      "eval_wikibio_token_set_recall": 0.331749318634022,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 190625
    },
    {
      "epoch": 36.6,
      "eval_nq_accuracy": 0.53615625,
      "eval_nq_bleu_score": 12.209789525909452,
      "eval_nq_bleu_score_sem": 0.49577106116458447,
      "eval_nq_emb_cos_sim": 0.8389476537704468,
      "eval_nq_emb_cos_sim_sem": 0.006790953113166311,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.127750873565674,
      "eval_nq_n_ngrams_match_1": 23.538,
      "eval_nq_n_ngrams_match_2": 8.668,
      "eval_nq_n_ngrams_match_3": 4.106,
      "eval_nq_num_pred_words": 49.03,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.39596197970087,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4577853482780898,
      "eval_nq_runtime": 10.7903,
      "eval_nq_samples_per_second": 46.338,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.4689916771684365,
      "eval_nq_token_set_f1_sem": 0.0048284255827953145,
      "eval_nq_token_set_precision": 0.4277415814122987,
      "eval_nq_token_set_recall": 0.5269846898422346,
      "eval_nq_true_num_tokens": 64.0,
      "step": 190625
    },
    {
      "epoch": 36.6,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 190632
    },
    {
      "epoch": 36.61,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 190644
    },
    {
      "epoch": 36.61,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 190656
    },
    {
      "epoch": 36.61,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 190668
    },
    {
      "epoch": 36.61,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 190680
    },
    {
      "epoch": 36.62,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 190692
    },
    {
      "epoch": 36.62,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 190704
    },
    {
      "epoch": 36.62,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 190716
    },
    {
      "epoch": 36.62,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 190728
    },
    {
      "epoch": 36.62,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 190740
    },
    {
      "epoch": 36.63,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 190752
    },
    {
      "epoch": 36.63,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 190764
    },
    {
      "epoch": 36.63,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 190776
    },
    {
      "epoch": 36.63,
      "learning_rate": 0.001,
      "loss": 2.5179,
      "step": 190788
    },
    {
      "epoch": 36.64,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 190800
    },
    {
      "epoch": 36.64,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 190812
    },
    {
      "epoch": 36.64,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 190824
    },
    {
      "epoch": 36.64,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 190836
    },
    {
      "epoch": 36.65,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 190848
    },
    {
      "epoch": 36.65,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 190860
    },
    {
      "epoch": 36.65,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 190872
    },
    {
      "epoch": 36.65,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 190884
    },
    {
      "epoch": 36.65,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 190896
    },
    {
      "epoch": 36.66,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 190908
    },
    {
      "epoch": 36.66,
      "learning_rate": 0.001,
      "loss": 2.51,
      "step": 190920
    },
    {
      "epoch": 36.66,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 190932
    },
    {
      "epoch": 36.66,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 190944
    },
    {
      "epoch": 36.67,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 190956
    },
    {
      "epoch": 36.67,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 190968
    },
    {
      "epoch": 36.67,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 190980
    },
    {
      "epoch": 36.67,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 190992
    },
    {
      "epoch": 36.68,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 191004
    },
    {
      "epoch": 36.68,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 191016
    },
    {
      "epoch": 36.68,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 191028
    },
    {
      "epoch": 36.68,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 191040
    },
    {
      "epoch": 36.68,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 191052
    },
    {
      "epoch": 36.69,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 191064
    },
    {
      "epoch": 36.69,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 191076
    },
    {
      "epoch": 36.69,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 191088
    },
    {
      "epoch": 36.69,
      "learning_rate": 0.001,
      "loss": 2.5197,
      "step": 191100
    },
    {
      "epoch": 36.7,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 191112
    },
    {
      "epoch": 36.7,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 191124
    },
    {
      "epoch": 36.7,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 191136
    },
    {
      "epoch": 36.7,
      "learning_rate": 0.001,
      "loss": 2.5127,
      "step": 191148
    },
    {
      "epoch": 36.71,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 191160
    },
    {
      "epoch": 36.71,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 191172
    },
    {
      "epoch": 36.71,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 191184
    },
    {
      "epoch": 36.71,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 191196
    },
    {
      "epoch": 36.71,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 191208
    },
    {
      "epoch": 36.72,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 191220
    },
    {
      "epoch": 36.72,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 191232
    },
    {
      "epoch": 36.72,
      "learning_rate": 0.001,
      "loss": 2.509,
      "step": 191244
    },
    {
      "epoch": 36.72,
      "eval_ag_news_accuracy": 0.32865625,
      "eval_ag_news_bleu_score": 4.874845444214836,
      "eval_ag_news_bleu_score_sem": 0.15551308839001637,
      "eval_ag_news_emb_cos_sim": 0.821132481098175,
      "eval_ag_news_emb_cos_sim_sem": 0.006658413128400693,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4878122806549072,
      "eval_ag_news_n_ngrams_match_1": 14.42,
      "eval_ag_news_n_ngrams_match_2": 3.15,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.766,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.7142996556772,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3569458635557363,
      "eval_ag_news_runtime": 10.5645,
      "eval_ag_news_samples_per_second": 47.328,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.357107765394553,
      "eval_ag_news_token_set_f1_sem": 0.0044922367423741024,
      "eval_ag_news_token_set_precision": 0.34201986581189936,
      "eval_ag_news_token_set_recall": 0.38759391638912366,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 191250
    },
    {
      "epoch": 36.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.1169375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3454322364822757,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12618890293920984,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6851115822792053,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008640836246226153,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2210209369659424,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.26,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.028,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.794,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.476,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.053685362417614,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21500217716272796,
      "eval_anthropic_toxic_prompts_runtime": 9.7647,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.205,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3569594211281399,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00653108879760226,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4398992791319905,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32468329980141386,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 191250
    },
    {
      "epoch": 36.72,
      "eval_arxiv_accuracy": 0.35284375,
      "eval_arxiv_bleu_score": 4.456663465691136,
      "eval_arxiv_bleu_score_sem": 0.12454161831872101,
      "eval_arxiv_emb_cos_sim": 0.7780709266662598,
      "eval_arxiv_emb_cos_sim_sem": 0.006981997533476606,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3266918659210205,
      "eval_arxiv_n_ngrams_match_1": 15.616,
      "eval_arxiv_n_ngrams_match_2": 3.09,
      "eval_arxiv_n_ngrams_match_3": 0.654,
      "eval_arxiv_num_pred_words": 40.876,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.8460706296287,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3723113187218906,
      "eval_arxiv_runtime": 10.5588,
      "eval_arxiv_samples_per_second": 47.354,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.36602190801024914,
      "eval_arxiv_token_set_f1_sem": 0.004233314161744006,
      "eval_arxiv_token_set_precision": 0.31895854846175137,
      "eval_arxiv_token_set_recall": 0.4479607899479252,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 191250
    },
    {
      "epoch": 36.72,
      "eval_python_code_alpaca_accuracy": 0.162,
      "eval_python_code_alpaca_bleu_score": 4.869967117063928,
      "eval_python_code_alpaca_bleu_score_sem": 0.14995947754495137,
      "eval_python_code_alpaca_emb_cos_sim": 0.7709345817565918,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007327043904067676,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8595130443573,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.962,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.022,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.068,
      "eval_python_code_alpaca_num_pred_words": 43.068,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.45302601746131,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34106871123170546,
      "eval_python_code_alpaca_runtime": 9.8012,
      "eval_python_code_alpaca_samples_per_second": 51.014,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.48344841239500697,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005146199673156121,
      "eval_python_code_alpaca_token_set_precision": 0.545758760293622,
      "eval_python_code_alpaca_token_set_recall": 0.45612336885109833,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 191250
    },
    {
      "epoch": 36.72,
      "eval_wikibio_accuracy": 0.330875,
      "eval_wikibio_bleu_score": 5.8798543960203595,
      "eval_wikibio_bleu_score_sem": 0.22478571728584212,
      "eval_wikibio_emb_cos_sim": 0.7380439043045044,
      "eval_wikibio_emb_cos_sim_sem": 0.009970870271585008,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6549746990203857,
      "eval_wikibio_n_ngrams_match_1": 9.676,
      "eval_wikibio_n_ngrams_match_2": 3.294,
      "eval_wikibio_n_ngrams_match_3": 1.254,
      "eval_wikibio_num_pred_words": 35.27,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.66654280137412,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3423513361375374,
      "eval_wikibio_runtime": 10.6715,
      "eval_wikibio_samples_per_second": 46.854,
      "eval_wikibio_steps_per_second": 0.094,
      "eval_wikibio_token_set_f1": 0.3084683876421554,
      "eval_wikibio_token_set_f1_sem": 0.006050335069347697,
      "eval_wikibio_token_set_precision": 0.31470856913338124,
      "eval_wikibio_token_set_recall": 0.3248336981986287,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 191250
    },
    {
      "epoch": 36.72,
      "eval_nq_accuracy": 0.5354375,
      "eval_nq_bleu_score": 12.210826306975214,
      "eval_nq_bleu_score_sem": 0.48720291355776335,
      "eval_nq_emb_cos_sim": 0.8385281562805176,
      "eval_nq_emb_cos_sim_sem": 0.006453378588562813,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.132600784301758,
      "eval_nq_n_ngrams_match_1": 23.548,
      "eval_nq_n_ngrams_match_2": 8.74,
      "eval_nq_n_ngrams_match_3": 4.092,
      "eval_nq_num_pred_words": 49.204,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.436780549044897,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45753519315122376,
      "eval_nq_runtime": 10.3536,
      "eval_nq_samples_per_second": 48.292,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4719651167065373,
      "eval_nq_token_set_f1_sem": 0.004741203260339542,
      "eval_nq_token_set_precision": 0.4287682934356082,
      "eval_nq_token_set_recall": 0.5328991235947066,
      "eval_nq_true_num_tokens": 64.0,
      "step": 191250
    },
    {
      "epoch": 36.72,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 191256
    },
    {
      "epoch": 36.73,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 191268
    },
    {
      "epoch": 36.73,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 191280
    },
    {
      "epoch": 36.73,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 191292
    },
    {
      "epoch": 36.73,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 191304
    },
    {
      "epoch": 36.74,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 191316
    },
    {
      "epoch": 36.74,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 191328
    },
    {
      "epoch": 36.74,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 191340
    },
    {
      "epoch": 36.74,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 191352
    },
    {
      "epoch": 36.74,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 191364
    },
    {
      "epoch": 36.75,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 191376
    },
    {
      "epoch": 36.75,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 191388
    },
    {
      "epoch": 36.75,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 191400
    },
    {
      "epoch": 36.75,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 191412
    },
    {
      "epoch": 36.76,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 191424
    },
    {
      "epoch": 36.76,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 191436
    },
    {
      "epoch": 36.76,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 191448
    },
    {
      "epoch": 36.76,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 191460
    },
    {
      "epoch": 36.76,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 191472
    },
    {
      "epoch": 36.77,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 191484
    },
    {
      "epoch": 36.77,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 191496
    },
    {
      "epoch": 36.77,
      "learning_rate": 0.001,
      "loss": 2.5053,
      "step": 191508
    },
    {
      "epoch": 36.77,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 191520
    },
    {
      "epoch": 36.78,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 191532
    },
    {
      "epoch": 36.78,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 191544
    },
    {
      "epoch": 36.78,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 191556
    },
    {
      "epoch": 36.78,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 191568
    },
    {
      "epoch": 36.79,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 191580
    },
    {
      "epoch": 36.79,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 191592
    },
    {
      "epoch": 36.79,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 191604
    },
    {
      "epoch": 36.79,
      "learning_rate": 0.001,
      "loss": 2.5106,
      "step": 191616
    },
    {
      "epoch": 36.79,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 191628
    },
    {
      "epoch": 36.8,
      "learning_rate": 0.001,
      "loss": 2.5169,
      "step": 191640
    },
    {
      "epoch": 36.8,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 191652
    },
    {
      "epoch": 36.8,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 191664
    },
    {
      "epoch": 36.8,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 191676
    },
    {
      "epoch": 36.81,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 191688
    },
    {
      "epoch": 36.81,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 191700
    },
    {
      "epoch": 36.81,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 191712
    },
    {
      "epoch": 36.81,
      "learning_rate": 0.001,
      "loss": 2.5119,
      "step": 191724
    },
    {
      "epoch": 36.82,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 191736
    },
    {
      "epoch": 36.82,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 191748
    },
    {
      "epoch": 36.82,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 191760
    },
    {
      "epoch": 36.82,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 191772
    },
    {
      "epoch": 36.82,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 191784
    },
    {
      "epoch": 36.83,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 191796
    },
    {
      "epoch": 36.83,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 191808
    },
    {
      "epoch": 36.83,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 191820
    },
    {
      "epoch": 36.83,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 191832
    },
    {
      "epoch": 36.84,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 191844
    },
    {
      "epoch": 36.84,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 191856
    },
    {
      "epoch": 36.84,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 191868
    },
    {
      "epoch": 36.84,
      "eval_ag_news_accuracy": 0.32890625,
      "eval_ag_news_bleu_score": 4.961828104843212,
      "eval_ag_news_bleu_score_sem": 0.16032314557285152,
      "eval_ag_news_emb_cos_sim": 0.8242726922035217,
      "eval_ag_news_emb_cos_sim_sem": 0.006349109260980386,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4847490787506104,
      "eval_ag_news_n_ngrams_match_1": 14.4,
      "eval_ag_news_n_ngrams_match_2": 3.188,
      "eval_ag_news_n_ngrams_match_3": 0.904,
      "eval_ag_news_num_pred_words": 46.562,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.614242476583044,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3599661587819324,
      "eval_ag_news_runtime": 11.0218,
      "eval_ag_news_samples_per_second": 45.365,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.3626752166450062,
      "eval_ag_news_token_set_f1_sem": 0.004522312777982414,
      "eval_ag_news_token_set_precision": 0.34749341850621096,
      "eval_ag_news_token_set_recall": 0.3957483171577414,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 191875
    },
    {
      "epoch": 36.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.11709375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.281347898377915,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12580742555756105,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6784017086029053,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00871333211838743,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.197878360748291,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.29,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.006,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.95,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.48053619386211,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2166390113462608,
      "eval_anthropic_toxic_prompts_runtime": 9.9694,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.154,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35545920993497343,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006452949749060365,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4444899045629664,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32280364009316653,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 191875
    },
    {
      "epoch": 36.84,
      "eval_arxiv_accuracy": 0.35175,
      "eval_arxiv_bleu_score": 4.676512130556084,
      "eval_arxiv_bleu_score_sem": 0.14350609871824133,
      "eval_arxiv_emb_cos_sim": 0.7798517942428589,
      "eval_arxiv_emb_cos_sim_sem": 0.007022511191607703,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.338822603225708,
      "eval_arxiv_n_ngrams_match_1": 15.576,
      "eval_arxiv_n_ngrams_match_2": 3.118,
      "eval_arxiv_n_ngrams_match_3": 0.77,
      "eval_arxiv_num_pred_words": 40.468,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.18592114854607,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.372831543507237,
      "eval_arxiv_runtime": 10.2924,
      "eval_arxiv_samples_per_second": 48.579,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3666216839205324,
      "eval_arxiv_token_set_f1_sem": 0.004222766437259845,
      "eval_arxiv_token_set_precision": 0.31763263029430083,
      "eval_arxiv_token_set_recall": 0.4488276679301581,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 191875
    },
    {
      "epoch": 36.84,
      "eval_python_code_alpaca_accuracy": 0.16040625,
      "eval_python_code_alpaca_bleu_score": 4.555213771220985,
      "eval_python_code_alpaca_bleu_score_sem": 0.144761155222261,
      "eval_python_code_alpaca_emb_cos_sim": 0.7632350921630859,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008651484252375725,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8670387268066406,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.852,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.806,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.902,
      "eval_python_code_alpaca_num_pred_words": 42.264,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.58486742508677,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34191790035151476,
      "eval_python_code_alpaca_runtime": 9.9994,
      "eval_python_code_alpaca_samples_per_second": 50.003,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.4770770858748362,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005352187216895897,
      "eval_python_code_alpaca_token_set_precision": 0.5363579753651608,
      "eval_python_code_alpaca_token_set_recall": 0.45543513079432774,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 191875
    },
    {
      "epoch": 36.84,
      "eval_wikibio_accuracy": 0.3295625,
      "eval_wikibio_bleu_score": 6.005917623987676,
      "eval_wikibio_bleu_score_sem": 0.2255834478810922,
      "eval_wikibio_emb_cos_sim": 0.7490946054458618,
      "eval_wikibio_emb_cos_sim_sem": 0.008131074064753937,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.64663028717041,
      "eval_wikibio_n_ngrams_match_1": 10.054,
      "eval_wikibio_n_ngrams_match_2": 3.356,
      "eval_wikibio_n_ngrams_match_3": 1.23,
      "eval_wikibio_num_pred_words": 35.918,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.34523566731452,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3558271770968173,
      "eval_wikibio_runtime": 9.9332,
      "eval_wikibio_samples_per_second": 50.336,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.321376023951464,
      "eval_wikibio_token_set_f1_sem": 0.005481143993179589,
      "eval_wikibio_token_set_precision": 0.327372122150575,
      "eval_wikibio_token_set_recall": 0.33075503118179234,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 191875
    },
    {
      "epoch": 36.84,
      "eval_nq_accuracy": 0.53565625,
      "eval_nq_bleu_score": 12.075373742611097,
      "eval_nq_bleu_score_sem": 0.48848954483686985,
      "eval_nq_emb_cos_sim": 0.8354136347770691,
      "eval_nq_emb_cos_sim_sem": 0.00677859091566246,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.132789373397827,
      "eval_nq_n_ngrams_match_1": 23.636,
      "eval_nq_n_ngrams_match_2": 8.752,
      "eval_nq_n_ngrams_match_3": 4.016,
      "eval_nq_num_pred_words": 49.436,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.438371783902433,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4573583033802422,
      "eval_nq_runtime": 10.7189,
      "eval_nq_samples_per_second": 46.647,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.47062080889447266,
      "eval_nq_token_set_f1_sem": 0.004858305314723143,
      "eval_nq_token_set_precision": 0.4281424166269732,
      "eval_nq_token_set_recall": 0.5292878656494459,
      "eval_nq_true_num_tokens": 64.0,
      "step": 191875
    },
    {
      "epoch": 36.84,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 191880
    },
    {
      "epoch": 36.85,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 191892
    },
    {
      "epoch": 36.85,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 191904
    },
    {
      "epoch": 36.85,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 191916
    },
    {
      "epoch": 36.85,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 191928
    },
    {
      "epoch": 36.85,
      "learning_rate": 0.001,
      "loss": 2.5181,
      "step": 191940
    },
    {
      "epoch": 36.86,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 191952
    },
    {
      "epoch": 36.86,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 191964
    },
    {
      "epoch": 36.86,
      "learning_rate": 0.001,
      "loss": 2.5161,
      "step": 191976
    },
    {
      "epoch": 36.86,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 191988
    },
    {
      "epoch": 36.87,
      "learning_rate": 0.001,
      "loss": 2.5234,
      "step": 192000
    },
    {
      "epoch": 36.87,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 192012
    },
    {
      "epoch": 36.87,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 192024
    },
    {
      "epoch": 36.87,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 192036
    },
    {
      "epoch": 36.88,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 192048
    },
    {
      "epoch": 36.88,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 192060
    },
    {
      "epoch": 36.88,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 192072
    },
    {
      "epoch": 36.88,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 192084
    },
    {
      "epoch": 36.88,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 192096
    },
    {
      "epoch": 36.89,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 192108
    },
    {
      "epoch": 36.89,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 192120
    },
    {
      "epoch": 36.89,
      "learning_rate": 0.001,
      "loss": 2.5153,
      "step": 192132
    },
    {
      "epoch": 36.89,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 192144
    },
    {
      "epoch": 36.9,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 192156
    },
    {
      "epoch": 36.9,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 192168
    },
    {
      "epoch": 36.9,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 192180
    },
    {
      "epoch": 36.9,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 192192
    },
    {
      "epoch": 36.91,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 192204
    },
    {
      "epoch": 36.91,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 192216
    },
    {
      "epoch": 36.91,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 192228
    },
    {
      "epoch": 36.91,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 192240
    },
    {
      "epoch": 36.91,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 192252
    },
    {
      "epoch": 36.92,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 192264
    },
    {
      "epoch": 36.92,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 192276
    },
    {
      "epoch": 36.92,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 192288
    },
    {
      "epoch": 36.92,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 192300
    },
    {
      "epoch": 36.93,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 192312
    },
    {
      "epoch": 36.93,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 192324
    },
    {
      "epoch": 36.93,
      "learning_rate": 0.001,
      "loss": 2.51,
      "step": 192336
    },
    {
      "epoch": 36.93,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 192348
    },
    {
      "epoch": 36.94,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 192360
    },
    {
      "epoch": 36.94,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 192372
    },
    {
      "epoch": 36.94,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 192384
    },
    {
      "epoch": 36.94,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 192396
    },
    {
      "epoch": 36.94,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 192408
    },
    {
      "epoch": 36.95,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 192420
    },
    {
      "epoch": 36.95,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 192432
    },
    {
      "epoch": 36.95,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 192444
    },
    {
      "epoch": 36.95,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 192456
    },
    {
      "epoch": 36.96,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 192468
    },
    {
      "epoch": 36.96,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 192480
    },
    {
      "epoch": 36.96,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 192492
    },
    {
      "epoch": 36.96,
      "eval_ag_news_accuracy": 0.32921875,
      "eval_ag_news_bleu_score": 4.94957534953809,
      "eval_ag_news_bleu_score_sem": 0.14853514945693413,
      "eval_ag_news_emb_cos_sim": 0.820341944694519,
      "eval_ag_news_emb_cos_sim_sem": 0.006971182257022006,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.48870587348938,
      "eval_ag_news_n_ngrams_match_1": 14.304,
      "eval_ag_news_n_ngrams_match_2": 3.224,
      "eval_ag_news_n_ngrams_match_3": 0.91,
      "eval_ag_news_num_pred_words": 46.654,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.743545984643184,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.358820998943969,
      "eval_ag_news_runtime": 14.5588,
      "eval_ag_news_samples_per_second": 34.343,
      "eval_ag_news_steps_per_second": 0.069,
      "eval_ag_news_token_set_f1": 0.3587045411243008,
      "eval_ag_news_token_set_f1_sem": 0.004347115915121873,
      "eval_ag_news_token_set_precision": 0.3433912714335185,
      "eval_ag_news_token_set_recall": 0.39036720805435543,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 192500
    },
    {
      "epoch": 36.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.1160625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.183696577411922,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12454425373907795,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6816329956054688,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008621962360627369,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2554514408111572,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.162,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.92,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.512,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.931318347399888,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21238038842071585,
      "eval_anthropic_toxic_prompts_runtime": 9.9481,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.261,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35923065859600295,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006756317873980588,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4337587112924653,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3327786501825931,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 192500
    },
    {
      "epoch": 36.96,
      "eval_arxiv_accuracy": 0.34921875,
      "eval_arxiv_bleu_score": 4.583263205881023,
      "eval_arxiv_bleu_score_sem": 0.13138340016539993,
      "eval_arxiv_emb_cos_sim": 0.7842831611633301,
      "eval_arxiv_emb_cos_sim_sem": 0.006627923176225601,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3556504249572754,
      "eval_arxiv_n_ngrams_match_1": 15.618,
      "eval_arxiv_n_ngrams_match_2": 3.182,
      "eval_arxiv_n_ngrams_match_3": 0.728,
      "eval_arxiv_num_pred_words": 40.422,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.664242067201428,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37410331590847146,
      "eval_arxiv_runtime": 10.473,
      "eval_arxiv_samples_per_second": 47.742,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.3652666730821766,
      "eval_arxiv_token_set_f1_sem": 0.004156405458533003,
      "eval_arxiv_token_set_precision": 0.31611131200351805,
      "eval_arxiv_token_set_recall": 0.44949166081900477,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 192500
    },
    {
      "epoch": 36.96,
      "eval_python_code_alpaca_accuracy": 0.1601875,
      "eval_python_code_alpaca_bleu_score": 4.558052190681162,
      "eval_python_code_alpaca_bleu_score_sem": 0.14585363179511066,
      "eval_python_code_alpaca_emb_cos_sim": 0.7560789585113525,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008701819443298876,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8719370365142822,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.85,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.92,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.002,
      "eval_python_code_alpaca_num_pred_words": 44.196,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.671214857488266,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3326628578833356,
      "eval_python_code_alpaca_runtime": 10.3863,
      "eval_python_code_alpaca_samples_per_second": 48.14,
      "eval_python_code_alpaca_steps_per_second": 0.096,
      "eval_python_code_alpaca_token_set_f1": 0.48301183568944633,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005631118753790701,
      "eval_python_code_alpaca_token_set_precision": 0.540086277860109,
      "eval_python_code_alpaca_token_set_recall": 0.4636515897902906,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 192500
    },
    {
      "epoch": 36.96,
      "eval_wikibio_accuracy": 0.3309375,
      "eval_wikibio_bleu_score": 5.91978715623965,
      "eval_wikibio_bleu_score_sem": 0.2084940686323091,
      "eval_wikibio_emb_cos_sim": 0.7401781678199768,
      "eval_wikibio_emb_cos_sim_sem": 0.00995853309877246,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6312167644500732,
      "eval_wikibio_n_ngrams_match_1": 9.746,
      "eval_wikibio_n_ngrams_match_2": 3.302,
      "eval_wikibio_n_ngrams_match_3": 1.168,
      "eval_wikibio_num_pred_words": 35.224,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.75873216028906,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35089385192024114,
      "eval_wikibio_runtime": 9.9671,
      "eval_wikibio_samples_per_second": 50.165,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3145718034888637,
      "eval_wikibio_token_set_f1_sem": 0.0055858330471781565,
      "eval_wikibio_token_set_precision": 0.31818262148560844,
      "eval_wikibio_token_set_recall": 0.32801109531710293,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 192500
    },
    {
      "epoch": 36.96,
      "eval_nq_accuracy": 0.5361875,
      "eval_nq_bleu_score": 12.269416750995518,
      "eval_nq_bleu_score_sem": 0.4863028889827014,
      "eval_nq_emb_cos_sim": 0.8320356607437134,
      "eval_nq_emb_cos_sim_sem": 0.00702092541643248,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1297690868377686,
      "eval_nq_n_ngrams_match_1": 23.412,
      "eval_nq_n_ngrams_match_2": 8.748,
      "eval_nq_n_ngrams_match_3": 4.16,
      "eval_nq_num_pred_words": 49.11,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.412923932261764,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45498159980778147,
      "eval_nq_runtime": 10.1997,
      "eval_nq_samples_per_second": 49.021,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.468365486828502,
      "eval_nq_token_set_f1_sem": 0.0048862762311763,
      "eval_nq_token_set_precision": 0.4252104214006085,
      "eval_nq_token_set_recall": 0.5301342461670362,
      "eval_nq_true_num_tokens": 64.0,
      "step": 192500
    },
    {
      "epoch": 36.96,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 192504
    },
    {
      "epoch": 36.97,
      "learning_rate": 0.001,
      "loss": 2.5135,
      "step": 192516
    },
    {
      "epoch": 36.97,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 192528
    },
    {
      "epoch": 36.97,
      "learning_rate": 0.001,
      "loss": 2.5163,
      "step": 192540
    },
    {
      "epoch": 36.97,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 192552
    },
    {
      "epoch": 36.97,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 192564
    },
    {
      "epoch": 36.98,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 192576
    },
    {
      "epoch": 36.98,
      "learning_rate": 0.001,
      "loss": 2.4993,
      "step": 192588
    },
    {
      "epoch": 36.98,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 192600
    },
    {
      "epoch": 36.98,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 192612
    },
    {
      "epoch": 36.99,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 192624
    },
    {
      "epoch": 36.99,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 192636
    },
    {
      "epoch": 36.99,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 192648
    },
    {
      "epoch": 36.99,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 192660
    },
    {
      "epoch": 37.0,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 192672
    },
    {
      "epoch": 37.0,
      "learning_rate": 0.001,
      "loss": 2.5144,
      "step": 192684
    },
    {
      "epoch": 37.0,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 192696
    },
    {
      "epoch": 37.0,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 192708
    },
    {
      "epoch": 37.0,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 192720
    },
    {
      "epoch": 37.01,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 192732
    },
    {
      "epoch": 37.01,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 192744
    },
    {
      "epoch": 37.01,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 192756
    },
    {
      "epoch": 37.01,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 192768
    },
    {
      "epoch": 37.02,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 192780
    },
    {
      "epoch": 37.02,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 192792
    },
    {
      "epoch": 37.02,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 192804
    },
    {
      "epoch": 37.02,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 192816
    },
    {
      "epoch": 37.03,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 192828
    },
    {
      "epoch": 37.03,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 192840
    },
    {
      "epoch": 37.03,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 192852
    },
    {
      "epoch": 37.03,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 192864
    },
    {
      "epoch": 37.03,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 192876
    },
    {
      "epoch": 37.04,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 192888
    },
    {
      "epoch": 37.04,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 192900
    },
    {
      "epoch": 37.04,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 192912
    },
    {
      "epoch": 37.04,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 192924
    },
    {
      "epoch": 37.05,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 192936
    },
    {
      "epoch": 37.05,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 192948
    },
    {
      "epoch": 37.05,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 192960
    },
    {
      "epoch": 37.05,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 192972
    },
    {
      "epoch": 37.06,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 192984
    },
    {
      "epoch": 37.06,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 192996
    },
    {
      "epoch": 37.06,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 193008
    },
    {
      "epoch": 37.06,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 193020
    },
    {
      "epoch": 37.06,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 193032
    },
    {
      "epoch": 37.07,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 193044
    },
    {
      "epoch": 37.07,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 193056
    },
    {
      "epoch": 37.07,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 193068
    },
    {
      "epoch": 37.07,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 193080
    },
    {
      "epoch": 37.08,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 193092
    },
    {
      "epoch": 37.08,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 193104
    },
    {
      "epoch": 37.08,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 193116
    },
    {
      "epoch": 37.08,
      "eval_ag_news_accuracy": 0.3293125,
      "eval_ag_news_bleu_score": 5.0472875870568314,
      "eval_ag_news_bleu_score_sem": 0.16974254125289642,
      "eval_ag_news_emb_cos_sim": 0.8192209005355835,
      "eval_ag_news_emb_cos_sim_sem": 0.006446453334516867,
      "eval_ag_news_emb_top1_equal": 0.3203125,
      "eval_ag_news_emb_top1_equal_sem": 0.041403754790620424,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4924087524414062,
      "eval_ag_news_n_ngrams_match_1": 14.496,
      "eval_ag_news_n_ngrams_match_2": 3.232,
      "eval_ag_news_n_ngrams_match_3": 0.908,
      "eval_ag_news_num_pred_words": 46.624,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.865016127709566,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36331507324856993,
      "eval_ag_news_runtime": 10.2781,
      "eval_ag_news_samples_per_second": 48.647,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.36136002826625707,
      "eval_ag_news_token_set_f1_sem": 0.004416313646991193,
      "eval_ag_news_token_set_precision": 0.3492121929628191,
      "eval_ag_news_token_set_recall": 0.38782790738673356,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 193125
    },
    {
      "epoch": 37.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11675,
      "eval_anthropic_toxic_prompts_bleu_score": 3.183582336003102,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12301005787103086,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6879515647888184,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008261988761673854,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.223449230194092,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.4,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.276,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.114596982629802,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21872745927530302,
      "eval_anthropic_toxic_prompts_runtime": 10.2158,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.944,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3579716251366683,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00650853606887439,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45274270629613617,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3200028890383885,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 193125
    },
    {
      "epoch": 37.08,
      "eval_arxiv_accuracy": 0.34984375,
      "eval_arxiv_bleu_score": 4.399790539622733,
      "eval_arxiv_bleu_score_sem": 0.12277834051173142,
      "eval_arxiv_emb_cos_sim": 0.7755804061889648,
      "eval_arxiv_emb_cos_sim_sem": 0.006803201382470282,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.36305570602417,
      "eval_arxiv_n_ngrams_match_1": 15.482,
      "eval_arxiv_n_ngrams_match_2": 3.068,
      "eval_arxiv_n_ngrams_match_3": 0.656,
      "eval_arxiv_num_pred_words": 40.658,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.877296727472785,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3717343898829476,
      "eval_arxiv_runtime": 10.1065,
      "eval_arxiv_samples_per_second": 49.473,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.362411897609555,
      "eval_arxiv_token_set_f1_sem": 0.004161236094802209,
      "eval_arxiv_token_set_precision": 0.31616375213373865,
      "eval_arxiv_token_set_recall": 0.44116641020039854,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 193125
    },
    {
      "epoch": 37.08,
      "eval_python_code_alpaca_accuracy": 0.16078125,
      "eval_python_code_alpaca_bleu_score": 4.781514928853036,
      "eval_python_code_alpaca_bleu_score_sem": 0.14685525073175398,
      "eval_python_code_alpaca_emb_cos_sim": 0.7665096521377563,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007397083405214178,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8710124492645264,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.064,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.07,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.054,
      "eval_python_code_alpaca_num_pred_words": 43.626,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.65488382843735,
      "eval_python_code_alpaca_pred_num_tokens": 62.9609375,
      "eval_python_code_alpaca_rouge_score": 0.3452564879086022,
      "eval_python_code_alpaca_runtime": 9.8738,
      "eval_python_code_alpaca_samples_per_second": 50.639,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.48209317633085697,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005186671950039917,
      "eval_python_code_alpaca_token_set_precision": 0.5547722503496798,
      "eval_python_code_alpaca_token_set_recall": 0.4487437550030472,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 193125
    },
    {
      "epoch": 37.08,
      "eval_wikibio_accuracy": 0.3285625,
      "eval_wikibio_bleu_score": 5.926579864580395,
      "eval_wikibio_bleu_score_sem": 0.21905318042124408,
      "eval_wikibio_emb_cos_sim": 0.7521117925643921,
      "eval_wikibio_emb_cos_sim_sem": 0.010075538677139274,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6762471199035645,
      "eval_wikibio_n_ngrams_match_1": 9.878,
      "eval_wikibio_n_ngrams_match_2": 3.298,
      "eval_wikibio_n_ngrams_match_3": 1.24,
      "eval_wikibio_num_pred_words": 35.422,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.497884752499665,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3503315835033644,
      "eval_wikibio_runtime": 9.8661,
      "eval_wikibio_samples_per_second": 50.679,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.31365354782013044,
      "eval_wikibio_token_set_f1_sem": 0.005838103597768974,
      "eval_wikibio_token_set_precision": 0.321877752264818,
      "eval_wikibio_token_set_recall": 0.32082498590336117,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 193125
    },
    {
      "epoch": 37.08,
      "eval_nq_accuracy": 0.5366875,
      "eval_nq_bleu_score": 12.370374831293935,
      "eval_nq_bleu_score_sem": 0.49273457524295083,
      "eval_nq_emb_cos_sim": 0.8358243107795715,
      "eval_nq_emb_cos_sim_sem": 0.00722880355145583,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.131504535675049,
      "eval_nq_n_ngrams_match_1": 23.678,
      "eval_nq_n_ngrams_match_2": 8.858,
      "eval_nq_n_ngrams_match_3": 4.172,
      "eval_nq_num_pred_words": 49.272,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.427536807599324,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4585536076556796,
      "eval_nq_runtime": 11.841,
      "eval_nq_samples_per_second": 42.226,
      "eval_nq_steps_per_second": 0.084,
      "eval_nq_token_set_f1": 0.4708879282164866,
      "eval_nq_token_set_f1_sem": 0.004901324214237776,
      "eval_nq_token_set_precision": 0.4308746724594358,
      "eval_nq_token_set_recall": 0.5272144650390836,
      "eval_nq_true_num_tokens": 64.0,
      "step": 193125
    },
    {
      "epoch": 37.08,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 193128
    },
    {
      "epoch": 37.09,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 193140
    },
    {
      "epoch": 37.09,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 193152
    },
    {
      "epoch": 37.09,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 193164
    },
    {
      "epoch": 37.09,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 193176
    },
    {
      "epoch": 37.09,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 193188
    },
    {
      "epoch": 37.1,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 193200
    },
    {
      "epoch": 37.1,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 193212
    },
    {
      "epoch": 37.1,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 193224
    },
    {
      "epoch": 37.1,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 193236
    },
    {
      "epoch": 37.11,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 193248
    },
    {
      "epoch": 37.11,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 193260
    },
    {
      "epoch": 37.11,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 193272
    },
    {
      "epoch": 37.11,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 193284
    },
    {
      "epoch": 37.12,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 193296
    },
    {
      "epoch": 37.12,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 193308
    },
    {
      "epoch": 37.12,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 193320
    },
    {
      "epoch": 37.12,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 193332
    },
    {
      "epoch": 37.12,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 193344
    },
    {
      "epoch": 37.13,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 193356
    },
    {
      "epoch": 37.13,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 193368
    },
    {
      "epoch": 37.13,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 193380
    },
    {
      "epoch": 37.13,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 193392
    },
    {
      "epoch": 37.14,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 193404
    },
    {
      "epoch": 37.14,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 193416
    },
    {
      "epoch": 37.14,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 193428
    },
    {
      "epoch": 37.14,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 193440
    },
    {
      "epoch": 37.15,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 193452
    },
    {
      "epoch": 37.15,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 193464
    },
    {
      "epoch": 37.15,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 193476
    },
    {
      "epoch": 37.15,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 193488
    },
    {
      "epoch": 37.15,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 193500
    },
    {
      "epoch": 37.16,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 193512
    },
    {
      "epoch": 37.16,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 193524
    },
    {
      "epoch": 37.16,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 193536
    },
    {
      "epoch": 37.16,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 193548
    },
    {
      "epoch": 37.17,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 193560
    },
    {
      "epoch": 37.17,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 193572
    },
    {
      "epoch": 37.17,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 193584
    },
    {
      "epoch": 37.17,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 193596
    },
    {
      "epoch": 37.18,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 193608
    },
    {
      "epoch": 37.18,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 193620
    },
    {
      "epoch": 37.18,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 193632
    },
    {
      "epoch": 37.18,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 193644
    },
    {
      "epoch": 37.18,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 193656
    },
    {
      "epoch": 37.19,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 193668
    },
    {
      "epoch": 37.19,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 193680
    },
    {
      "epoch": 37.19,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 193692
    },
    {
      "epoch": 37.19,
      "learning_rate": 0.001,
      "loss": 2.5053,
      "step": 193704
    },
    {
      "epoch": 37.2,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 193716
    },
    {
      "epoch": 37.2,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 193728
    },
    {
      "epoch": 37.2,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 193740
    },
    {
      "epoch": 37.2,
      "eval_ag_news_accuracy": 0.32765625,
      "eval_ag_news_bleu_score": 4.987352437371996,
      "eval_ag_news_bleu_score_sem": 0.16506155774105166,
      "eval_ag_news_emb_cos_sim": 0.8157989978790283,
      "eval_ag_news_emb_cos_sim_sem": 0.007362029081054258,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.49606990814209,
      "eval_ag_news_n_ngrams_match_1": 14.412,
      "eval_ag_news_n_ngrams_match_2": 3.232,
      "eval_ag_news_n_ngrams_match_3": 0.918,
      "eval_ag_news_num_pred_words": 46.842,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.98556060025125,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3590879086206805,
      "eval_ag_news_runtime": 11.0347,
      "eval_ag_news_samples_per_second": 45.312,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.35810359564648675,
      "eval_ag_news_token_set_f1_sem": 0.0045469070027615195,
      "eval_ag_news_token_set_precision": 0.34649717777803274,
      "eval_ag_news_token_set_recall": 0.38418050892403593,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 193750
    },
    {
      "epoch": 37.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.11609375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.415877945184848,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1287005073311264,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6898690462112427,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0080730760499359,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.209953546524048,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.53,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.102,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.804,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.88,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.77793517663694,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22442653942444352,
      "eval_anthropic_toxic_prompts_runtime": 10.7562,
      "eval_anthropic_toxic_prompts_samples_per_second": 46.485,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.093,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3655782024453944,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006377973840075936,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.46001993108976014,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32589759195678614,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 193750
    },
    {
      "epoch": 37.2,
      "eval_arxiv_accuracy": 0.3536875,
      "eval_arxiv_bleu_score": 4.618313153989449,
      "eval_arxiv_bleu_score_sem": 0.1350069263448915,
      "eval_arxiv_emb_cos_sim": 0.7814196944236755,
      "eval_arxiv_emb_cos_sim_sem": 0.006535607090007568,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.335975408554077,
      "eval_arxiv_n_ngrams_match_1": 15.636,
      "eval_arxiv_n_ngrams_match_2": 3.11,
      "eval_arxiv_n_ngrams_match_3": 0.732,
      "eval_arxiv_num_pred_words": 40.66,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.10578448051979,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37455004000824543,
      "eval_arxiv_runtime": 10.4242,
      "eval_arxiv_samples_per_second": 47.965,
      "eval_arxiv_steps_per_second": 0.096,
      "eval_arxiv_token_set_f1": 0.3672948883127612,
      "eval_arxiv_token_set_f1_sem": 0.004040077812151361,
      "eval_arxiv_token_set_precision": 0.31889205301294843,
      "eval_arxiv_token_set_recall": 0.4494963085270952,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 193750
    },
    {
      "epoch": 37.2,
      "eval_python_code_alpaca_accuracy": 0.16096875,
      "eval_python_code_alpaca_bleu_score": 4.892885050657554,
      "eval_python_code_alpaca_bleu_score_sem": 0.15617172133774,
      "eval_python_code_alpaca_emb_cos_sim": 0.7601250410079956,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008629491499987135,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.856093645095825,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.082,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.146,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.078,
      "eval_python_code_alpaca_num_pred_words": 43.088,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.393449069921157,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34200803190834106,
      "eval_python_code_alpaca_runtime": 10.1633,
      "eval_python_code_alpaca_samples_per_second": 49.197,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4862262850886598,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005619742519728222,
      "eval_python_code_alpaca_token_set_precision": 0.5489696423712079,
      "eval_python_code_alpaca_token_set_recall": 0.4603094019709726,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 193750
    },
    {
      "epoch": 37.2,
      "eval_wikibio_accuracy": 0.3301875,
      "eval_wikibio_bleu_score": 5.926853942417159,
      "eval_wikibio_bleu_score_sem": 0.21383495753295081,
      "eval_wikibio_emb_cos_sim": 0.7365812063217163,
      "eval_wikibio_emb_cos_sim_sem": 0.010531684798949567,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6643404960632324,
      "eval_wikibio_n_ngrams_match_1": 9.988,
      "eval_wikibio_n_ngrams_match_2": 3.378,
      "eval_wikibio_n_ngrams_match_3": 1.256,
      "eval_wikibio_num_pred_words": 36.072,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.03038697932219,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3587143477871787,
      "eval_wikibio_runtime": 10.4952,
      "eval_wikibio_samples_per_second": 47.641,
      "eval_wikibio_steps_per_second": 0.095,
      "eval_wikibio_token_set_f1": 0.31864203588405426,
      "eval_wikibio_token_set_f1_sem": 0.005533096524289964,
      "eval_wikibio_token_set_precision": 0.326705964008385,
      "eval_wikibio_token_set_recall": 0.33015486000763916,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 193750
    },
    {
      "epoch": 37.2,
      "eval_nq_accuracy": 0.53721875,
      "eval_nq_bleu_score": 12.245648338199913,
      "eval_nq_bleu_score_sem": 0.48728367368860276,
      "eval_nq_emb_cos_sim": 0.8392508029937744,
      "eval_nq_emb_cos_sim_sem": 0.007142248412593629,
      "eval_nq_emb_top1_equal": 0.3359375,
      "eval_nq_emb_top1_equal_sem": 0.04191137143408563,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.130819320678711,
      "eval_nq_n_ngrams_match_1": 23.536,
      "eval_nq_n_ngrams_match_2": 8.806,
      "eval_nq_n_ngrams_match_3": 4.084,
      "eval_nq_num_pred_words": 49.102,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.421764110991576,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4564676868639352,
      "eval_nq_runtime": 10.7337,
      "eval_nq_samples_per_second": 46.582,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.47014775712288226,
      "eval_nq_token_set_f1_sem": 0.0049832179699802765,
      "eval_nq_token_set_precision": 0.42928421059191796,
      "eval_nq_token_set_recall": 0.5279550463868034,
      "eval_nq_true_num_tokens": 64.0,
      "step": 193750
    },
    {
      "epoch": 37.2,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 193752
    },
    {
      "epoch": 37.21,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 193764
    },
    {
      "epoch": 37.21,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 193776
    },
    {
      "epoch": 37.21,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 193788
    },
    {
      "epoch": 37.21,
      "learning_rate": 0.001,
      "loss": 2.4993,
      "step": 193800
    },
    {
      "epoch": 37.21,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 193812
    },
    {
      "epoch": 37.22,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 193824
    },
    {
      "epoch": 37.22,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 193836
    },
    {
      "epoch": 37.22,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 193848
    },
    {
      "epoch": 37.22,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 193860
    },
    {
      "epoch": 37.23,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 193872
    },
    {
      "epoch": 37.23,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 193884
    },
    {
      "epoch": 37.23,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 193896
    },
    {
      "epoch": 37.23,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 193908
    },
    {
      "epoch": 37.24,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 193920
    },
    {
      "epoch": 37.24,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 193932
    },
    {
      "epoch": 37.24,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 193944
    },
    {
      "epoch": 37.24,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 193956
    },
    {
      "epoch": 37.24,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 193968
    },
    {
      "epoch": 37.25,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 193980
    },
    {
      "epoch": 37.25,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 193992
    },
    {
      "epoch": 37.25,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 194004
    },
    {
      "epoch": 37.25,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 194016
    },
    {
      "epoch": 37.26,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 194028
    },
    {
      "epoch": 37.26,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 194040
    },
    {
      "epoch": 37.26,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 194052
    },
    {
      "epoch": 37.26,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 194064
    },
    {
      "epoch": 37.26,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 194076
    },
    {
      "epoch": 37.27,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 194088
    },
    {
      "epoch": 37.27,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 194100
    },
    {
      "epoch": 37.27,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 194112
    },
    {
      "epoch": 37.27,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 194124
    },
    {
      "epoch": 37.28,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 194136
    },
    {
      "epoch": 37.28,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 194148
    },
    {
      "epoch": 37.28,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 194160
    },
    {
      "epoch": 37.28,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 194172
    },
    {
      "epoch": 37.29,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 194184
    },
    {
      "epoch": 37.29,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 194196
    },
    {
      "epoch": 37.29,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 194208
    },
    {
      "epoch": 37.29,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 194220
    },
    {
      "epoch": 37.29,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 194232
    },
    {
      "epoch": 37.3,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 194244
    },
    {
      "epoch": 37.3,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 194256
    },
    {
      "epoch": 37.3,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 194268
    },
    {
      "epoch": 37.3,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 194280
    },
    {
      "epoch": 37.31,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 194292
    },
    {
      "epoch": 37.31,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 194304
    },
    {
      "epoch": 37.31,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 194316
    },
    {
      "epoch": 37.31,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 194328
    },
    {
      "epoch": 37.32,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 194340
    },
    {
      "epoch": 37.32,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 194352
    },
    {
      "epoch": 37.32,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 194364
    },
    {
      "epoch": 37.32,
      "eval_ag_news_accuracy": 0.3278125,
      "eval_ag_news_bleu_score": 4.930670692561991,
      "eval_ag_news_bleu_score_sem": 0.15178758369773337,
      "eval_ag_news_emb_cos_sim": 0.8194109201431274,
      "eval_ag_news_emb_cos_sim_sem": 0.006857397619843736,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4890034198760986,
      "eval_ag_news_n_ngrams_match_1": 14.396,
      "eval_ag_news_n_ngrams_match_2": 3.212,
      "eval_ag_news_n_ngrams_match_3": 0.9,
      "eval_ag_news_num_pred_words": 46.952,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.75329015803918,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3582680219683052,
      "eval_ag_news_runtime": 10.3699,
      "eval_ag_news_samples_per_second": 48.216,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.3568136618038535,
      "eval_ag_news_token_set_f1_sem": 0.0044437308384324145,
      "eval_ag_news_token_set_precision": 0.34709898326299293,
      "eval_ag_news_token_set_recall": 0.38049060297805015,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 194375
    },
    {
      "epoch": 37.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.116875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2592883582400756,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12186789282290272,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6781432628631592,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008447857680695912,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.21248197555542,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.308,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.0,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.748,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.324,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.840663696161567,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2171679287748752,
      "eval_anthropic_toxic_prompts_runtime": 10.3736,
      "eval_anthropic_toxic_prompts_samples_per_second": 48.199,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.096,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3529143401339154,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006555732752202401,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4391528107955721,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.320556486725126,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 194375
    },
    {
      "epoch": 37.32,
      "eval_arxiv_accuracy": 0.35265625,
      "eval_arxiv_bleu_score": 4.3999055260186575,
      "eval_arxiv_bleu_score_sem": 0.12258063090345232,
      "eval_arxiv_emb_cos_sim": 0.7799035310745239,
      "eval_arxiv_emb_cos_sim_sem": 0.006211274578370587,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3566486835479736,
      "eval_arxiv_n_ngrams_match_1": 15.36,
      "eval_arxiv_n_ngrams_match_2": 3.028,
      "eval_arxiv_n_ngrams_match_3": 0.66,
      "eval_arxiv_num_pred_words": 40.45,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.692870680092827,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3701688705609501,
      "eval_arxiv_runtime": 11.8409,
      "eval_arxiv_samples_per_second": 42.227,
      "eval_arxiv_steps_per_second": 0.084,
      "eval_arxiv_token_set_f1": 0.3582110091680112,
      "eval_arxiv_token_set_f1_sem": 0.004059473478740534,
      "eval_arxiv_token_set_precision": 0.3113617155700013,
      "eval_arxiv_token_set_recall": 0.4401518079863058,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 194375
    },
    {
      "epoch": 37.32,
      "eval_python_code_alpaca_accuracy": 0.16371875,
      "eval_python_code_alpaca_bleu_score": 4.775014853561377,
      "eval_python_code_alpaca_bleu_score_sem": 0.15479763267848184,
      "eval_python_code_alpaca_emb_cos_sim": 0.755107581615448,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008915453257321994,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.833731174468994,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.934,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.962,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.018,
      "eval_python_code_alpaca_num_pred_words": 43.312,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.008805396671562,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3401846253313796,
      "eval_python_code_alpaca_runtime": 9.8021,
      "eval_python_code_alpaca_samples_per_second": 51.009,
      "eval_python_code_alpaca_steps_per_second": 0.102,
      "eval_python_code_alpaca_token_set_f1": 0.47675677416632645,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005513643105916052,
      "eval_python_code_alpaca_token_set_precision": 0.5411193627439754,
      "eval_python_code_alpaca_token_set_recall": 0.4497073035938302,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 194375
    },
    {
      "epoch": 37.32,
      "eval_wikibio_accuracy": 0.3295625,
      "eval_wikibio_bleu_score": 6.203771319594105,
      "eval_wikibio_bleu_score_sem": 0.22153006970172062,
      "eval_wikibio_emb_cos_sim": 0.7438701391220093,
      "eval_wikibio_emb_cos_sim_sem": 0.009452194690197245,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6615147590637207,
      "eval_wikibio_n_ngrams_match_1": 10.324,
      "eval_wikibio_n_ngrams_match_2": 3.494,
      "eval_wikibio_n_ngrams_match_3": 1.322,
      "eval_wikibio_num_pred_words": 36.582,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.920253048773695,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36229334941439284,
      "eval_wikibio_runtime": 9.4919,
      "eval_wikibio_samples_per_second": 52.677,
      "eval_wikibio_steps_per_second": 0.105,
      "eval_wikibio_token_set_f1": 0.32509779845142867,
      "eval_wikibio_token_set_f1_sem": 0.005426929573211413,
      "eval_wikibio_token_set_precision": 0.33686437038097244,
      "eval_wikibio_token_set_recall": 0.32875227375568794,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 194375
    },
    {
      "epoch": 37.32,
      "eval_nq_accuracy": 0.53740625,
      "eval_nq_bleu_score": 12.00340341710867,
      "eval_nq_bleu_score_sem": 0.48171148468188396,
      "eval_nq_emb_cos_sim": 0.8391791582107544,
      "eval_nq_emb_cos_sim_sem": 0.006667585336307877,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1303999423980713,
      "eval_nq_n_ngrams_match_1": 23.3,
      "eval_nq_n_ngrams_match_2": 8.578,
      "eval_nq_n_ngrams_match_3": 3.948,
      "eval_nq_num_pred_words": 49.0,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.41823294653735,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45372407114469504,
      "eval_nq_runtime": 10.4044,
      "eval_nq_samples_per_second": 48.057,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46251134527244336,
      "eval_nq_token_set_f1_sem": 0.0049133807000650565,
      "eval_nq_token_set_precision": 0.4233934234837151,
      "eval_nq_token_set_recall": 0.5171948329638827,
      "eval_nq_true_num_tokens": 64.0,
      "step": 194375
    },
    {
      "epoch": 37.32,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 194376
    },
    {
      "epoch": 37.32,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 194388
    },
    {
      "epoch": 37.33,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 194400
    },
    {
      "epoch": 37.33,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 194412
    },
    {
      "epoch": 37.33,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 194424
    },
    {
      "epoch": 37.33,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 194436
    },
    {
      "epoch": 37.34,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 194448
    },
    {
      "epoch": 37.34,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 194460
    },
    {
      "epoch": 37.34,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 194472
    },
    {
      "epoch": 37.34,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 194484
    },
    {
      "epoch": 37.35,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 194496
    },
    {
      "epoch": 37.35,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 194508
    },
    {
      "epoch": 37.35,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 194520
    },
    {
      "epoch": 37.35,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 194532
    },
    {
      "epoch": 37.35,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 194544
    },
    {
      "epoch": 37.36,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 194556
    },
    {
      "epoch": 37.36,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 194568
    },
    {
      "epoch": 37.36,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 194580
    },
    {
      "epoch": 37.36,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 194592
    },
    {
      "epoch": 37.37,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 194604
    },
    {
      "epoch": 37.37,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 194616
    },
    {
      "epoch": 37.37,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 194628
    },
    {
      "epoch": 37.37,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 194640
    },
    {
      "epoch": 37.38,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 194652
    },
    {
      "epoch": 37.38,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 194664
    },
    {
      "epoch": 37.38,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 194676
    },
    {
      "epoch": 37.38,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 194688
    },
    {
      "epoch": 37.38,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 194700
    },
    {
      "epoch": 37.39,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 194712
    },
    {
      "epoch": 37.39,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 194724
    },
    {
      "epoch": 37.39,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 194736
    },
    {
      "epoch": 37.39,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 194748
    },
    {
      "epoch": 37.4,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 194760
    },
    {
      "epoch": 37.4,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 194772
    },
    {
      "epoch": 37.4,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 194784
    },
    {
      "epoch": 37.4,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 194796
    },
    {
      "epoch": 37.41,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 194808
    },
    {
      "epoch": 37.41,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 194820
    },
    {
      "epoch": 37.41,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 194832
    },
    {
      "epoch": 37.41,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 194844
    },
    {
      "epoch": 37.41,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 194856
    },
    {
      "epoch": 37.42,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 194868
    },
    {
      "epoch": 37.42,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 194880
    },
    {
      "epoch": 37.42,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 194892
    },
    {
      "epoch": 37.42,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 194904
    },
    {
      "epoch": 37.43,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 194916
    },
    {
      "epoch": 37.43,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 194928
    },
    {
      "epoch": 37.43,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 194940
    },
    {
      "epoch": 37.43,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 194952
    },
    {
      "epoch": 37.44,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 194964
    },
    {
      "epoch": 37.44,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 194976
    },
    {
      "epoch": 37.44,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 194988
    },
    {
      "epoch": 37.44,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 195000
    },
    {
      "epoch": 37.44,
      "eval_ag_news_accuracy": 0.32796875,
      "eval_ag_news_bleu_score": 5.244067875540459,
      "eval_ag_news_bleu_score_sem": 0.17487987941260041,
      "eval_ag_news_emb_cos_sim": 0.8198561072349548,
      "eval_ag_news_emb_cos_sim_sem": 0.007308784440494839,
      "eval_ag_news_emb_top1_equal": 0.1953125,
      "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.478668451309204,
      "eval_ag_news_n_ngrams_match_1": 14.404,
      "eval_ag_news_n_ngrams_match_2": 3.358,
      "eval_ag_news_n_ngrams_match_3": 1.02,
      "eval_ag_news_num_pred_words": 46.516,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.41652913856147,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35791499620070455,
      "eval_ag_news_runtime": 10.0458,
      "eval_ag_news_samples_per_second": 49.772,
      "eval_ag_news_steps_per_second": 0.1,
      "eval_ag_news_token_set_f1": 0.3576864714670146,
      "eval_ag_news_token_set_f1_sem": 0.004596480762188387,
      "eval_ag_news_token_set_precision": 0.3448366794518283,
      "eval_ag_news_token_set_recall": 0.38447758016454087,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 195000
    },
    {
      "epoch": 37.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.116,
      "eval_anthropic_toxic_prompts_bleu_score": 3.230960722920636,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12652715997134178,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6770283579826355,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008723208968918155,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.187702178955078,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.326,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.974,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.392,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.232681055507367,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9921875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21719043400690022,
      "eval_anthropic_toxic_prompts_runtime": 9.9488,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.257,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3599878178488781,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006559571815956049,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4446544910892496,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32877459401478754,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 195000
    },
    {
      "epoch": 37.44,
      "eval_arxiv_accuracy": 0.35075,
      "eval_arxiv_bleu_score": 4.421723876239833,
      "eval_arxiv_bleu_score_sem": 0.13081013770764,
      "eval_arxiv_emb_cos_sim": 0.7836287021636963,
      "eval_arxiv_emb_cos_sim_sem": 0.007277140059797254,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3493075370788574,
      "eval_arxiv_n_ngrams_match_1": 15.398,
      "eval_arxiv_n_ngrams_match_2": 3.06,
      "eval_arxiv_n_ngrams_match_3": 0.684,
      "eval_arxiv_num_pred_words": 40.316,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.483003389590884,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3701232713221733,
      "eval_arxiv_runtime": 10.1425,
      "eval_arxiv_samples_per_second": 49.297,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.36066145867072974,
      "eval_arxiv_token_set_f1_sem": 0.004317172138607544,
      "eval_arxiv_token_set_precision": 0.31440047985832426,
      "eval_arxiv_token_set_recall": 0.44132088538840525,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 195000
    },
    {
      "epoch": 37.44,
      "eval_python_code_alpaca_accuracy": 0.1626875,
      "eval_python_code_alpaca_bleu_score": 4.944235461568967,
      "eval_python_code_alpaca_bleu_score_sem": 0.16625114340706515,
      "eval_python_code_alpaca_emb_cos_sim": 0.7681782841682434,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008460871837262667,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8473594188690186,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.926,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.064,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.132,
      "eval_python_code_alpaca_num_pred_words": 43.318,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.24219226801552,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34094041620934323,
      "eval_python_code_alpaca_runtime": 9.932,
      "eval_python_code_alpaca_samples_per_second": 50.342,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.48029914494784204,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0058647336376740225,
      "eval_python_code_alpaca_token_set_precision": 0.5420172127474987,
      "eval_python_code_alpaca_token_set_recall": 0.4579217159509164,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 195000
    },
    {
      "epoch": 37.44,
      "eval_wikibio_accuracy": 0.32665625,
      "eval_wikibio_bleu_score": 5.905575571952946,
      "eval_wikibio_bleu_score_sem": 0.21851932046204833,
      "eval_wikibio_emb_cos_sim": 0.7433996796607971,
      "eval_wikibio_emb_cos_sim_sem": 0.009556348113946268,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.665127992630005,
      "eval_wikibio_n_ngrams_match_1": 10.19,
      "eval_wikibio_n_ngrams_match_2": 3.406,
      "eval_wikibio_n_ngrams_match_3": 1.252,
      "eval_wikibio_num_pred_words": 36.964,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.06113538060937,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35906320474372844,
      "eval_wikibio_runtime": 10.362,
      "eval_wikibio_samples_per_second": 48.253,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.31800409151553855,
      "eval_wikibio_token_set_f1_sem": 0.005476764499547402,
      "eval_wikibio_token_set_precision": 0.3291303454959236,
      "eval_wikibio_token_set_recall": 0.3238962928597888,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 195000
    },
    {
      "epoch": 37.44,
      "eval_nq_accuracy": 0.536125,
      "eval_nq_bleu_score": 12.102546142598007,
      "eval_nq_bleu_score_sem": 0.48849432140077825,
      "eval_nq_emb_cos_sim": 0.8412253856658936,
      "eval_nq_emb_cos_sim_sem": 0.006508606765180276,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1275899410247803,
      "eval_nq_n_ngrams_match_1": 23.464,
      "eval_nq_n_ngrams_match_2": 8.688,
      "eval_nq_n_ngrams_match_3": 4.038,
      "eval_nq_num_pred_words": 48.968,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.394610904925095,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4571428648214083,
      "eval_nq_runtime": 10.2017,
      "eval_nq_samples_per_second": 49.011,
      "eval_nq_steps_per_second": 0.098,
      "eval_nq_token_set_f1": 0.471117674143217,
      "eval_nq_token_set_f1_sem": 0.0048392622866591265,
      "eval_nq_token_set_precision": 0.428317159728763,
      "eval_nq_token_set_recall": 0.5313456789986281,
      "eval_nq_true_num_tokens": 64.0,
      "step": 195000
    },
    {
      "epoch": 37.44,
      "learning_rate": 0.001,
      "loss": 2.4993,
      "step": 195012
    },
    {
      "epoch": 37.45,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 195024
    },
    {
      "epoch": 37.45,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 195036
    },
    {
      "epoch": 37.45,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 195048
    },
    {
      "epoch": 37.45,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 195060
    },
    {
      "epoch": 37.46,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 195072
    },
    {
      "epoch": 37.46,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 195084
    },
    {
      "epoch": 37.46,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 195096
    },
    {
      "epoch": 37.46,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 195108
    },
    {
      "epoch": 37.47,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 195120
    },
    {
      "epoch": 37.47,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 195132
    },
    {
      "epoch": 37.47,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 195144
    },
    {
      "epoch": 37.47,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 195156
    },
    {
      "epoch": 37.47,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 195168
    },
    {
      "epoch": 37.48,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 195180
    },
    {
      "epoch": 37.48,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 195192
    },
    {
      "epoch": 37.48,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 195204
    },
    {
      "epoch": 37.48,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 195216
    },
    {
      "epoch": 37.49,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 195228
    },
    {
      "epoch": 37.49,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 195240
    },
    {
      "epoch": 37.49,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 195252
    },
    {
      "epoch": 37.49,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 195264
    },
    {
      "epoch": 37.5,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 195276
    },
    {
      "epoch": 37.5,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 195288
    },
    {
      "epoch": 37.5,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 195300
    },
    {
      "epoch": 37.5,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 195312
    },
    {
      "epoch": 37.5,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 195324
    },
    {
      "epoch": 37.51,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 195336
    },
    {
      "epoch": 37.51,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 195348
    },
    {
      "epoch": 37.51,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 195360
    },
    {
      "epoch": 37.51,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 195372
    },
    {
      "epoch": 37.52,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 195384
    },
    {
      "epoch": 37.52,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 195396
    },
    {
      "epoch": 37.52,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 195408
    },
    {
      "epoch": 37.52,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 195420
    },
    {
      "epoch": 37.53,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 195432
    },
    {
      "epoch": 37.53,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 195444
    },
    {
      "epoch": 37.53,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 195456
    },
    {
      "epoch": 37.53,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 195468
    },
    {
      "epoch": 37.53,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 195480
    },
    {
      "epoch": 37.54,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 195492
    },
    {
      "epoch": 37.54,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 195504
    },
    {
      "epoch": 37.54,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 195516
    },
    {
      "epoch": 37.54,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 195528
    },
    {
      "epoch": 37.55,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 195540
    },
    {
      "epoch": 37.55,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 195552
    },
    {
      "epoch": 37.55,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 195564
    },
    {
      "epoch": 37.55,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 195576
    },
    {
      "epoch": 37.56,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 195588
    },
    {
      "epoch": 37.56,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 195600
    },
    {
      "epoch": 37.56,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 195612
    },
    {
      "epoch": 37.56,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 195624
    },
    {
      "epoch": 37.56,
      "eval_ag_news_accuracy": 0.32840625,
      "eval_ag_news_bleu_score": 5.160678237118373,
      "eval_ag_news_bleu_score_sem": 0.15644910179101512,
      "eval_ag_news_emb_cos_sim": 0.8232282400131226,
      "eval_ag_news_emb_cos_sim_sem": 0.00639512110720863,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.479858636856079,
      "eval_ag_news_n_ngrams_match_1": 14.61,
      "eval_ag_news_n_ngrams_match_2": 3.34,
      "eval_ag_news_n_ngrams_match_3": 1.0,
      "eval_ag_news_num_pred_words": 47.11,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.45513379181512,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3644997735690672,
      "eval_ag_news_runtime": 10.9843,
      "eval_ag_news_samples_per_second": 45.519,
      "eval_ag_news_steps_per_second": 0.091,
      "eval_ag_news_token_set_f1": 0.3636258052726045,
      "eval_ag_news_token_set_f1_sem": 0.0043789982790820685,
      "eval_ag_news_token_set_precision": 0.3518743547836162,
      "eval_ag_news_token_set_recall": 0.3894813692401069,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 195625
    },
    {
      "epoch": 37.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.11653125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2536863914236536,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12414063209611298,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6823281645774841,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008623482281351793,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2047436237335205,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.47,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.046,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.036,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.649179741632693,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9296875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21857256722718568,
      "eval_anthropic_toxic_prompts_runtime": 9.9715,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.143,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3652828021103452,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006487779074896755,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45705235729397486,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3291836020040149,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 195625
    },
    {
      "epoch": 37.56,
      "eval_arxiv_accuracy": 0.34915625,
      "eval_arxiv_bleu_score": 4.5476246633263235,
      "eval_arxiv_bleu_score_sem": 0.13177119287371816,
      "eval_arxiv_emb_cos_sim": 0.7904843091964722,
      "eval_arxiv_emb_cos_sim_sem": 0.006650471081634421,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3637514114379883,
      "eval_arxiv_n_ngrams_match_1": 15.692,
      "eval_arxiv_n_ngrams_match_2": 3.126,
      "eval_arxiv_n_ngrams_match_3": 0.694,
      "eval_arxiv_num_pred_words": 41.608,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.8973938091562,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3737865356288159,
      "eval_arxiv_runtime": 10.6201,
      "eval_arxiv_samples_per_second": 47.081,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.36484636584780045,
      "eval_arxiv_token_set_f1_sem": 0.004090870558584512,
      "eval_arxiv_token_set_precision": 0.3208891362951231,
      "eval_arxiv_token_set_recall": 0.4374011321819683,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 195625
    },
    {
      "epoch": 37.56,
      "eval_python_code_alpaca_accuracy": 0.1638125,
      "eval_python_code_alpaca_bleu_score": 4.856616076429742,
      "eval_python_code_alpaca_bleu_score_sem": 0.1576413255673956,
      "eval_python_code_alpaca_emb_cos_sim": 0.7713407278060913,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007635985572008593,
      "eval_python_code_alpaca_emb_top1_equal": 0.1875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.827100992202759,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.092,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.166,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.148,
      "eval_python_code_alpaca_num_pred_words": 45.384,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.896406939635312,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33791876503371887,
      "eval_python_code_alpaca_runtime": 10.1229,
      "eval_python_code_alpaca_samples_per_second": 49.393,
      "eval_python_code_alpaca_steps_per_second": 0.099,
      "eval_python_code_alpaca_token_set_f1": 0.4841025604174241,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005484594221514288,
      "eval_python_code_alpaca_token_set_precision": 0.5530509643743557,
      "eval_python_code_alpaca_token_set_recall": 0.45028786757907063,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 195625
    },
    {
      "epoch": 37.56,
      "eval_wikibio_accuracy": 0.3275,
      "eval_wikibio_bleu_score": 6.167448437456132,
      "eval_wikibio_bleu_score_sem": 0.2218471402877958,
      "eval_wikibio_emb_cos_sim": 0.7572469711303711,
      "eval_wikibio_emb_cos_sim_sem": 0.009437384682757222,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6589012145996094,
      "eval_wikibio_n_ngrams_match_1": 10.308,
      "eval_wikibio_n_ngrams_match_2": 3.49,
      "eval_wikibio_n_ngrams_match_3": 1.334,
      "eval_wikibio_num_pred_words": 36.57,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.81866604577605,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3622507798370945,
      "eval_wikibio_runtime": 9.9982,
      "eval_wikibio_samples_per_second": 50.009,
      "eval_wikibio_steps_per_second": 0.1,
      "eval_wikibio_token_set_f1": 0.3240266841869486,
      "eval_wikibio_token_set_f1_sem": 0.005392734000007475,
      "eval_wikibio_token_set_precision": 0.3332030008260182,
      "eval_wikibio_token_set_recall": 0.3291543991379892,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 195625
    },
    {
      "epoch": 37.56,
      "eval_nq_accuracy": 0.53659375,
      "eval_nq_bleu_score": 12.054164118492311,
      "eval_nq_bleu_score_sem": 0.49342112605169397,
      "eval_nq_emb_cos_sim": 0.8338325619697571,
      "eval_nq_emb_cos_sim_sem": 0.007536739621933057,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1246421337127686,
      "eval_nq_n_ngrams_match_1": 23.52,
      "eval_nq_n_ngrams_match_2": 8.734,
      "eval_nq_n_ngrams_match_3": 4.018,
      "eval_nq_num_pred_words": 49.55,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.369901646477157,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4547006044907649,
      "eval_nq_runtime": 10.4494,
      "eval_nq_samples_per_second": 47.85,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46850882076678196,
      "eval_nq_token_set_f1_sem": 0.004857155245198665,
      "eval_nq_token_set_precision": 0.42857432408107793,
      "eval_nq_token_set_recall": 0.525536764171176,
      "eval_nq_true_num_tokens": 64.0,
      "step": 195625
    },
    {
      "epoch": 37.56,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 195636
    },
    {
      "epoch": 37.57,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 195648
    },
    {
      "epoch": 37.57,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 195660
    },
    {
      "epoch": 37.57,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 195672
    },
    {
      "epoch": 37.57,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 195684
    },
    {
      "epoch": 37.58,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 195696
    },
    {
      "epoch": 37.58,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 195708
    },
    {
      "epoch": 37.58,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 195720
    },
    {
      "epoch": 37.58,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 195732
    },
    {
      "epoch": 37.59,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 195744
    },
    {
      "epoch": 37.59,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 195756
    },
    {
      "epoch": 37.59,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 195768
    },
    {
      "epoch": 37.59,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 195780
    },
    {
      "epoch": 37.59,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 195792
    },
    {
      "epoch": 37.6,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 195804
    },
    {
      "epoch": 37.6,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 195816
    },
    {
      "epoch": 37.6,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 195828
    },
    {
      "epoch": 37.6,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 195840
    },
    {
      "epoch": 37.61,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 195852
    },
    {
      "epoch": 37.61,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 195864
    },
    {
      "epoch": 37.61,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 195876
    },
    {
      "epoch": 37.61,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 195888
    },
    {
      "epoch": 37.62,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 195900
    },
    {
      "epoch": 37.62,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 195912
    },
    {
      "epoch": 37.62,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 195924
    },
    {
      "epoch": 37.62,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 195936
    },
    {
      "epoch": 37.62,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 195948
    },
    {
      "epoch": 37.63,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 195960
    },
    {
      "epoch": 37.63,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 195972
    },
    {
      "epoch": 37.63,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 195984
    },
    {
      "epoch": 37.63,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 195996
    },
    {
      "epoch": 37.64,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 196008
    },
    {
      "epoch": 37.64,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 196020
    },
    {
      "epoch": 37.64,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 196032
    },
    {
      "epoch": 37.64,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 196044
    },
    {
      "epoch": 37.65,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 196056
    },
    {
      "epoch": 37.65,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 196068
    },
    {
      "epoch": 37.65,
      "learning_rate": 0.001,
      "loss": 2.51,
      "step": 196080
    },
    {
      "epoch": 37.65,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 196092
    },
    {
      "epoch": 37.65,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 196104
    },
    {
      "epoch": 37.66,
      "learning_rate": 0.001,
      "loss": 2.5222,
      "step": 196116
    },
    {
      "epoch": 37.66,
      "learning_rate": 0.001,
      "loss": 2.5106,
      "step": 196128
    },
    {
      "epoch": 37.66,
      "learning_rate": 0.001,
      "loss": 2.5134,
      "step": 196140
    },
    {
      "epoch": 37.66,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 196152
    },
    {
      "epoch": 37.67,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 196164
    },
    {
      "epoch": 37.67,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 196176
    },
    {
      "epoch": 37.67,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 196188
    },
    {
      "epoch": 37.67,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 196200
    },
    {
      "epoch": 37.68,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 196212
    },
    {
      "epoch": 37.68,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 196224
    },
    {
      "epoch": 37.68,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 196236
    },
    {
      "epoch": 37.68,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 196248
    },
    {
      "epoch": 37.68,
      "eval_ag_news_accuracy": 0.328875,
      "eval_ag_news_bleu_score": 5.191943237557456,
      "eval_ag_news_bleu_score_sem": 0.1691118400321775,
      "eval_ag_news_emb_cos_sim": 0.8163424730300903,
      "eval_ag_news_emb_cos_sim_sem": 0.00759417128847423,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.471841335296631,
      "eval_ag_news_n_ngrams_match_1": 14.508,
      "eval_ag_news_n_ngrams_match_2": 3.338,
      "eval_ag_news_n_ngrams_match_3": 1.0,
      "eval_ag_news_num_pred_words": 46.768,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.19597147645399,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36278319533389847,
      "eval_ag_news_runtime": 10.5807,
      "eval_ag_news_samples_per_second": 47.256,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.3630105993233433,
      "eval_ag_news_token_set_f1_sem": 0.004483880190431542,
      "eval_ag_news_token_set_precision": 0.34978741208910036,
      "eval_ag_news_token_set_recall": 0.3899184770063757,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 196250
    },
    {
      "epoch": 37.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.11528125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3222841018869005,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12694519578173327,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6914851665496826,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008522632116854811,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2299375534057617,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.356,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.058,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.772,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.128,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.278078391770503,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2169343396150039,
      "eval_anthropic_toxic_prompts_runtime": 10.1541,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.241,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.098,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36212943352384325,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006544156448989002,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45090677936596274,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3282954790726491,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 196250
    },
    {
      "epoch": 37.68,
      "eval_arxiv_accuracy": 0.350625,
      "eval_arxiv_bleu_score": 4.348194417630073,
      "eval_arxiv_bleu_score_sem": 0.12087874354688675,
      "eval_arxiv_emb_cos_sim": 0.7824056148529053,
      "eval_arxiv_emb_cos_sim_sem": 0.006829816458760404,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3426730632781982,
      "eval_arxiv_n_ngrams_match_1": 15.338,
      "eval_arxiv_n_ngrams_match_2": 3.0,
      "eval_arxiv_n_ngrams_match_3": 0.636,
      "eval_arxiv_num_pred_words": 40.22,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.294659123238933,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3704195679433676,
      "eval_arxiv_runtime": 10.5147,
      "eval_arxiv_samples_per_second": 47.552,
      "eval_arxiv_steps_per_second": 0.095,
      "eval_arxiv_token_set_f1": 0.35889324267200223,
      "eval_arxiv_token_set_f1_sem": 0.004168044743930564,
      "eval_arxiv_token_set_precision": 0.3110964680727157,
      "eval_arxiv_token_set_recall": 0.44507439509782587,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 196250
    },
    {
      "epoch": 37.68,
      "eval_python_code_alpaca_accuracy": 0.16015625,
      "eval_python_code_alpaca_bleu_score": 4.470202827015937,
      "eval_python_code_alpaca_bleu_score_sem": 0.1369550803608607,
      "eval_python_code_alpaca_emb_cos_sim": 0.7453908920288086,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009904095175564575,
      "eval_python_code_alpaca_emb_top1_equal": 0.1953125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.862358808517456,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.532,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.864,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.958,
      "eval_python_code_alpaca_num_pred_words": 43.6,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.5027639510361,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3252311283517712,
      "eval_python_code_alpaca_runtime": 10.0341,
      "eval_python_code_alpaca_samples_per_second": 49.83,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.47097496401311145,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005564428237760059,
      "eval_python_code_alpaca_token_set_precision": 0.5172152055187773,
      "eval_python_code_alpaca_token_set_recall": 0.4558242906613513,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 196250
    },
    {
      "epoch": 37.68,
      "eval_wikibio_accuracy": 0.32784375,
      "eval_wikibio_bleu_score": 6.044218406578306,
      "eval_wikibio_bleu_score_sem": 0.20489907065935145,
      "eval_wikibio_emb_cos_sim": 0.7580497860908508,
      "eval_wikibio_emb_cos_sim_sem": 0.00828936643250422,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6197497844696045,
      "eval_wikibio_n_ngrams_match_1": 10.322,
      "eval_wikibio_n_ngrams_match_2": 3.464,
      "eval_wikibio_n_ngrams_match_3": 1.268,
      "eval_wikibio_num_pred_words": 36.502,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.328226551431605,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3640825541645696,
      "eval_wikibio_runtime": 10.3425,
      "eval_wikibio_samples_per_second": 48.344,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3264149641025879,
      "eval_wikibio_token_set_f1_sem": 0.005387873059022707,
      "eval_wikibio_token_set_precision": 0.33619901291779125,
      "eval_wikibio_token_set_recall": 0.3336449063810576,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 196250
    },
    {
      "epoch": 37.68,
      "eval_nq_accuracy": 0.534875,
      "eval_nq_bleu_score": 11.945385418077185,
      "eval_nq_bleu_score_sem": 0.48931319491620795,
      "eval_nq_emb_cos_sim": 0.8394231200218201,
      "eval_nq_emb_cos_sim_sem": 0.00646038879485029,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.129955530166626,
      "eval_nq_n_ngrams_match_1": 23.596,
      "eval_nq_n_ngrams_match_2": 8.612,
      "eval_nq_n_ngrams_match_3": 3.958,
      "eval_nq_num_pred_words": 49.094,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.414492612035515,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45782448353581756,
      "eval_nq_runtime": 10.419,
      "eval_nq_samples_per_second": 47.989,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4699820335881326,
      "eval_nq_token_set_f1_sem": 0.004957978387288264,
      "eval_nq_token_set_precision": 0.4284136995609277,
      "eval_nq_token_set_recall": 0.5287816983017264,
      "eval_nq_true_num_tokens": 64.0,
      "step": 196250
    },
    {
      "epoch": 37.68,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 196260
    },
    {
      "epoch": 37.69,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 196272
    },
    {
      "epoch": 37.69,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 196284
    },
    {
      "epoch": 37.69,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 196296
    },
    {
      "epoch": 37.69,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 196308
    },
    {
      "epoch": 37.7,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 196320
    },
    {
      "epoch": 37.7,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 196332
    },
    {
      "epoch": 37.7,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 196344
    },
    {
      "epoch": 37.7,
      "learning_rate": 0.001,
      "loss": 2.5158,
      "step": 196356
    },
    {
      "epoch": 37.71,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 196368
    },
    {
      "epoch": 37.71,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 196380
    },
    {
      "epoch": 37.71,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 196392
    },
    {
      "epoch": 37.71,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 196404
    },
    {
      "epoch": 37.71,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 196416
    },
    {
      "epoch": 37.72,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 196428
    },
    {
      "epoch": 37.72,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 196440
    },
    {
      "epoch": 37.72,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 196452
    },
    {
      "epoch": 37.72,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 196464
    },
    {
      "epoch": 37.73,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 196476
    },
    {
      "epoch": 37.73,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 196488
    },
    {
      "epoch": 37.73,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 196500
    },
    {
      "epoch": 37.73,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 196512
    },
    {
      "epoch": 37.74,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 196524
    },
    {
      "epoch": 37.74,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 196536
    },
    {
      "epoch": 37.74,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 196548
    },
    {
      "epoch": 37.74,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 196560
    },
    {
      "epoch": 37.74,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 196572
    },
    {
      "epoch": 37.75,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 196584
    },
    {
      "epoch": 37.75,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 196596
    },
    {
      "epoch": 37.75,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 196608
    },
    {
      "epoch": 37.75,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 196620
    },
    {
      "epoch": 37.76,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 196632
    },
    {
      "epoch": 37.76,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 196644
    },
    {
      "epoch": 37.76,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 196656
    },
    {
      "epoch": 37.76,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 196668
    },
    {
      "epoch": 37.76,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 196680
    },
    {
      "epoch": 37.77,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 196692
    },
    {
      "epoch": 37.77,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 196704
    },
    {
      "epoch": 37.77,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 196716
    },
    {
      "epoch": 37.77,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 196728
    },
    {
      "epoch": 37.78,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 196740
    },
    {
      "epoch": 37.78,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 196752
    },
    {
      "epoch": 37.78,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 196764
    },
    {
      "epoch": 37.78,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 196776
    },
    {
      "epoch": 37.79,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 196788
    },
    {
      "epoch": 37.79,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 196800
    },
    {
      "epoch": 37.79,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 196812
    },
    {
      "epoch": 37.79,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 196824
    },
    {
      "epoch": 37.79,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 196836
    },
    {
      "epoch": 37.8,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 196848
    },
    {
      "epoch": 37.8,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 196860
    },
    {
      "epoch": 37.8,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 196872
    },
    {
      "epoch": 37.8,
      "eval_ag_news_accuracy": 0.32965625,
      "eval_ag_news_bleu_score": 5.034891070714903,
      "eval_ag_news_bleu_score_sem": 0.16478578625660043,
      "eval_ag_news_emb_cos_sim": 0.82633376121521,
      "eval_ag_news_emb_cos_sim_sem": 0.006461327879318673,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4684040546417236,
      "eval_ag_news_n_ngrams_match_1": 14.482,
      "eval_ag_news_n_ngrams_match_2": 3.242,
      "eval_ag_news_n_ngrams_match_3": 0.932,
      "eval_ag_news_num_pred_words": 46.71,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.08549486486442,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3626753752710302,
      "eval_ag_news_runtime": 11.0597,
      "eval_ag_news_samples_per_second": 45.209,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.3601345675263891,
      "eval_ag_news_token_set_f1_sem": 0.004389001993053166,
      "eval_ag_news_token_set_precision": 0.3465120254155709,
      "eval_ag_news_token_set_recall": 0.3890541211196841,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 196875
    },
    {
      "epoch": 37.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.11653125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.297903386605315,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1217828454410264,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6917194724082947,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008183295327147951,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2022769451141357,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.354,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.064,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.8,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.47,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.588453064367823,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21787486151933474,
      "eval_anthropic_toxic_prompts_runtime": 13.2722,
      "eval_anthropic_toxic_prompts_samples_per_second": 37.673,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.075,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3601464756738046,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006612663323356015,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45008079394599904,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32665501895132754,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 196875
    },
    {
      "epoch": 37.8,
      "eval_arxiv_accuracy": 0.35278125,
      "eval_arxiv_bleu_score": 4.485032326418497,
      "eval_arxiv_bleu_score_sem": 0.13034045040757414,
      "eval_arxiv_emb_cos_sim": 0.782996416091919,
      "eval_arxiv_emb_cos_sim_sem": 0.006943852001396215,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3341031074523926,
      "eval_arxiv_n_ngrams_match_1": 15.504,
      "eval_arxiv_n_ngrams_match_2": 3.122,
      "eval_arxiv_n_ngrams_match_3": 0.692,
      "eval_arxiv_num_pred_words": 40.666,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.053211221116968,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3725994041119809,
      "eval_arxiv_runtime": 9.9953,
      "eval_arxiv_samples_per_second": 50.024,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.3624400607030841,
      "eval_arxiv_token_set_f1_sem": 0.004251640996338694,
      "eval_arxiv_token_set_precision": 0.31597757531856513,
      "eval_arxiv_token_set_recall": 0.44393532148918696,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 196875
    },
    {
      "epoch": 37.8,
      "eval_python_code_alpaca_accuracy": 0.16140625,
      "eval_python_code_alpaca_bleu_score": 4.850083712381379,
      "eval_python_code_alpaca_bleu_score_sem": 0.14978757826362565,
      "eval_python_code_alpaca_emb_cos_sim": 0.7660810947418213,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007439076812854712,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8679707050323486,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.138,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.116,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.092,
      "eval_python_code_alpaca_num_pred_words": 44.432,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.601263777964043,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33948634462601757,
      "eval_python_code_alpaca_runtime": 9.9249,
      "eval_python_code_alpaca_samples_per_second": 50.378,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.49157670046570423,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005254068065986003,
      "eval_python_code_alpaca_token_set_precision": 0.555358362906108,
      "eval_python_code_alpaca_token_set_recall": 0.46388114306897893,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 196875
    },
    {
      "epoch": 37.8,
      "eval_wikibio_accuracy": 0.33021875,
      "eval_wikibio_bleu_score": 6.139729124755908,
      "eval_wikibio_bleu_score_sem": 0.2158405026929467,
      "eval_wikibio_emb_cos_sim": 0.7544930577278137,
      "eval_wikibio_emb_cos_sim_sem": 0.00775497468643345,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6849443912506104,
      "eval_wikibio_n_ngrams_match_1": 10.18,
      "eval_wikibio_n_ngrams_match_2": 3.426,
      "eval_wikibio_n_ngrams_match_3": 1.29,
      "eval_wikibio_num_pred_words": 36.176,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.84290677403874,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36006466597621756,
      "eval_wikibio_runtime": 9.935,
      "eval_wikibio_samples_per_second": 50.327,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.32254195008755326,
      "eval_wikibio_token_set_f1_sem": 0.005338669202045662,
      "eval_wikibio_token_set_precision": 0.3309832938014916,
      "eval_wikibio_token_set_recall": 0.32899516700243975,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 196875
    },
    {
      "epoch": 37.8,
      "eval_nq_accuracy": 0.535375,
      "eval_nq_bleu_score": 12.135138218921748,
      "eval_nq_bleu_score_sem": 0.48553437251501824,
      "eval_nq_emb_cos_sim": 0.8353713750839233,
      "eval_nq_emb_cos_sim_sem": 0.007257361334044501,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1225414276123047,
      "eval_nq_n_ngrams_match_1": 23.664,
      "eval_nq_n_ngrams_match_2": 8.804,
      "eval_nq_n_ngrams_match_3": 4.082,
      "eval_nq_num_pred_words": 49.356,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.352337398149128,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4584016645809208,
      "eval_nq_runtime": 26.5183,
      "eval_nq_samples_per_second": 18.855,
      "eval_nq_steps_per_second": 0.038,
      "eval_nq_token_set_f1": 0.4715105559238299,
      "eval_nq_token_set_f1_sem": 0.00483828325260912,
      "eval_nq_token_set_precision": 0.43156210457097177,
      "eval_nq_token_set_recall": 0.5291180792544479,
      "eval_nq_true_num_tokens": 64.0,
      "step": 196875
    },
    {
      "epoch": 37.8,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 196884
    },
    {
      "epoch": 37.81,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 196896
    },
    {
      "epoch": 37.81,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 196908
    },
    {
      "epoch": 37.81,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 196920
    },
    {
      "epoch": 37.81,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 196932
    },
    {
      "epoch": 37.82,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 196944
    },
    {
      "epoch": 37.82,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 196956
    },
    {
      "epoch": 37.82,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 196968
    },
    {
      "epoch": 37.82,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 196980
    },
    {
      "epoch": 37.82,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 196992
    },
    {
      "epoch": 37.83,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 197004
    },
    {
      "epoch": 37.83,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 197016
    },
    {
      "epoch": 37.83,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 197028
    },
    {
      "epoch": 37.83,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 197040
    },
    {
      "epoch": 37.84,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 197052
    },
    {
      "epoch": 37.84,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 197064
    },
    {
      "epoch": 37.84,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 197076
    },
    {
      "epoch": 37.84,
      "learning_rate": 0.001,
      "loss": 2.5139,
      "step": 197088
    },
    {
      "epoch": 37.85,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 197100
    },
    {
      "epoch": 37.85,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 197112
    },
    {
      "epoch": 37.85,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 197124
    },
    {
      "epoch": 37.85,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 197136
    },
    {
      "epoch": 37.85,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 197148
    },
    {
      "epoch": 37.86,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 197160
    },
    {
      "epoch": 37.86,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 197172
    },
    {
      "epoch": 37.86,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 197184
    },
    {
      "epoch": 37.86,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 197196
    },
    {
      "epoch": 37.87,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 197208
    },
    {
      "epoch": 37.87,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 197220
    },
    {
      "epoch": 37.87,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 197232
    },
    {
      "epoch": 37.87,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 197244
    },
    {
      "epoch": 37.88,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 197256
    },
    {
      "epoch": 37.88,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 197268
    },
    {
      "epoch": 37.88,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 197280
    },
    {
      "epoch": 37.88,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 197292
    },
    {
      "epoch": 37.88,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 197304
    },
    {
      "epoch": 37.89,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 197316
    },
    {
      "epoch": 37.89,
      "learning_rate": 0.001,
      "loss": 2.5097,
      "step": 197328
    },
    {
      "epoch": 37.89,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 197340
    },
    {
      "epoch": 37.89,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 197352
    },
    {
      "epoch": 37.9,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 197364
    },
    {
      "epoch": 37.9,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 197376
    },
    {
      "epoch": 37.9,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 197388
    },
    {
      "epoch": 37.9,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 197400
    },
    {
      "epoch": 37.91,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 197412
    },
    {
      "epoch": 37.91,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 197424
    },
    {
      "epoch": 37.91,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 197436
    },
    {
      "epoch": 37.91,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 197448
    },
    {
      "epoch": 37.91,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 197460
    },
    {
      "epoch": 37.92,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 197472
    },
    {
      "epoch": 37.92,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 197484
    },
    {
      "epoch": 37.92,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 197496
    },
    {
      "epoch": 37.92,
      "eval_ag_news_accuracy": 0.32834375,
      "eval_ag_news_bleu_score": 4.861613269007893,
      "eval_ag_news_bleu_score_sem": 0.159717906672351,
      "eval_ag_news_emb_cos_sim": 0.8243966102600098,
      "eval_ag_news_emb_cos_sim_sem": 0.006059606509826103,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4781150817871094,
      "eval_ag_news_n_ngrams_match_1": 14.382,
      "eval_ag_news_n_ngrams_match_2": 3.182,
      "eval_ag_news_n_ngrams_match_3": 0.932,
      "eval_ag_news_num_pred_words": 47.022,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.39859578166829,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35717536589227183,
      "eval_ag_news_runtime": 11.6284,
      "eval_ag_news_samples_per_second": 42.998,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.35674726323291067,
      "eval_ag_news_token_set_f1_sem": 0.004488013295578366,
      "eval_ag_news_token_set_precision": 0.3437195601210618,
      "eval_ag_news_token_set_recall": 0.38333595013475924,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 197500
    },
    {
      "epoch": 37.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.11721875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2591161537375735,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12229344402211764,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6847412586212158,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008042622317708746,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2084994316101074,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.376,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.992,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.756,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.182,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.741931394722084,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22045067726602688,
      "eval_anthropic_toxic_prompts_runtime": 9.7483,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.291,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36311138438462426,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006420653005946579,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4520250464711051,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3285887161972419,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 197500
    },
    {
      "epoch": 37.92,
      "eval_arxiv_accuracy": 0.35121875,
      "eval_arxiv_bleu_score": 4.468312128326134,
      "eval_arxiv_bleu_score_sem": 0.13089383375985866,
      "eval_arxiv_emb_cos_sim": 0.7762400507926941,
      "eval_arxiv_emb_cos_sim_sem": 0.007188606653233073,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3464467525482178,
      "eval_arxiv_n_ngrams_match_1": 15.598,
      "eval_arxiv_n_ngrams_match_2": 3.082,
      "eval_arxiv_n_ngrams_match_3": 0.69,
      "eval_arxiv_num_pred_words": 40.828,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.401636096747204,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37067769859652583,
      "eval_arxiv_runtime": 27.5747,
      "eval_arxiv_samples_per_second": 18.133,
      "eval_arxiv_steps_per_second": 0.036,
      "eval_arxiv_token_set_f1": 0.36355016903606596,
      "eval_arxiv_token_set_f1_sem": 0.0042682182617868915,
      "eval_arxiv_token_set_precision": 0.31657228234954976,
      "eval_arxiv_token_set_recall": 0.4444480168321842,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 197500
    },
    {
      "epoch": 37.92,
      "eval_python_code_alpaca_accuracy": 0.162375,
      "eval_python_code_alpaca_bleu_score": 4.741918729269589,
      "eval_python_code_alpaca_bleu_score_sem": 0.14631391556279452,
      "eval_python_code_alpaca_emb_cos_sim": 0.7607436776161194,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008395980178535066,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8623900413513184,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.914,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.994,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.008,
      "eval_python_code_alpaca_num_pred_words": 43.538,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.50331062049169,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3392678720857105,
      "eval_python_code_alpaca_runtime": 27.1786,
      "eval_python_code_alpaca_samples_per_second": 18.397,
      "eval_python_code_alpaca_steps_per_second": 0.037,
      "eval_python_code_alpaca_token_set_f1": 0.4887016008546284,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005246184540964671,
      "eval_python_code_alpaca_token_set_precision": 0.5423432729635894,
      "eval_python_code_alpaca_token_set_recall": 0.4675765806219998,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 197500
    },
    {
      "epoch": 37.92,
      "eval_wikibio_accuracy": 0.32940625,
      "eval_wikibio_bleu_score": 6.266780440032098,
      "eval_wikibio_bleu_score_sem": 0.2138327169571218,
      "eval_wikibio_emb_cos_sim": 0.7499379515647888,
      "eval_wikibio_emb_cos_sim_sem": 0.008426136907189118,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.647045135498047,
      "eval_wikibio_n_ngrams_match_1": 10.314,
      "eval_wikibio_n_ngrams_match_2": 3.534,
      "eval_wikibio_n_ngrams_match_3": 1.322,
      "eval_wikibio_num_pred_words": 36.368,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.3611464242512,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3688827438497856,
      "eval_wikibio_runtime": 10.1861,
      "eval_wikibio_samples_per_second": 49.086,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.32924503510272013,
      "eval_wikibio_token_set_f1_sem": 0.005265673392254785,
      "eval_wikibio_token_set_precision": 0.3362216882441393,
      "eval_wikibio_token_set_recall": 0.3372910414871243,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 197500
    },
    {
      "epoch": 37.92,
      "eval_nq_accuracy": 0.5363125,
      "eval_nq_bleu_score": 12.215521925519054,
      "eval_nq_bleu_score_sem": 0.47856955015150066,
      "eval_nq_emb_cos_sim": 0.8401731252670288,
      "eval_nq_emb_cos_sim_sem": 0.006614980142465709,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.127146005630493,
      "eval_nq_n_ngrams_match_1": 23.614,
      "eval_nq_n_ngrams_match_2": 8.824,
      "eval_nq_n_ngrams_match_3": 4.06,
      "eval_nq_num_pred_words": 49.22,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.390885067099962,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45807987506689185,
      "eval_nq_runtime": 10.7838,
      "eval_nq_samples_per_second": 46.366,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.4720235415433743,
      "eval_nq_token_set_f1_sem": 0.004795410289763898,
      "eval_nq_token_set_precision": 0.4301537191288202,
      "eval_nq_token_set_recall": 0.5307444854441206,
      "eval_nq_true_num_tokens": 64.0,
      "step": 197500
    },
    {
      "epoch": 37.92,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 197508
    },
    {
      "epoch": 37.93,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 197520
    },
    {
      "epoch": 37.93,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 197532
    },
    {
      "epoch": 37.93,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 197544
    },
    {
      "epoch": 37.93,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 197556
    },
    {
      "epoch": 37.94,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 197568
    },
    {
      "epoch": 37.94,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 197580
    },
    {
      "epoch": 37.94,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 197592
    },
    {
      "epoch": 37.94,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 197604
    },
    {
      "epoch": 37.94,
      "learning_rate": 0.001,
      "loss": 2.5151,
      "step": 197616
    },
    {
      "epoch": 37.95,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 197628
    },
    {
      "epoch": 37.95,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 197640
    },
    {
      "epoch": 37.95,
      "learning_rate": 0.001,
      "loss": 2.5097,
      "step": 197652
    },
    {
      "epoch": 37.95,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 197664
    },
    {
      "epoch": 37.96,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 197676
    },
    {
      "epoch": 37.96,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 197688
    },
    {
      "epoch": 37.96,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 197700
    },
    {
      "epoch": 37.96,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 197712
    },
    {
      "epoch": 37.97,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 197724
    },
    {
      "epoch": 37.97,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 197736
    },
    {
      "epoch": 37.97,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 197748
    },
    {
      "epoch": 37.97,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 197760
    },
    {
      "epoch": 37.97,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 197772
    },
    {
      "epoch": 37.98,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 197784
    },
    {
      "epoch": 37.98,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 197796
    },
    {
      "epoch": 37.98,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 197808
    },
    {
      "epoch": 37.98,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 197820
    },
    {
      "epoch": 37.99,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 197832
    },
    {
      "epoch": 37.99,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 197844
    },
    {
      "epoch": 37.99,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 197856
    },
    {
      "epoch": 37.99,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 197868
    },
    {
      "epoch": 38.0,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 197880
    },
    {
      "epoch": 38.0,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 197892
    },
    {
      "epoch": 38.0,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 197904
    },
    {
      "epoch": 38.0,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 197916
    },
    {
      "epoch": 38.0,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 197928
    },
    {
      "epoch": 38.01,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 197940
    },
    {
      "epoch": 38.01,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 197952
    },
    {
      "epoch": 38.01,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 197964
    },
    {
      "epoch": 38.01,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 197976
    },
    {
      "epoch": 38.02,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 197988
    },
    {
      "epoch": 38.02,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 198000
    },
    {
      "epoch": 38.02,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 198012
    },
    {
      "epoch": 38.02,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 198024
    },
    {
      "epoch": 38.03,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 198036
    },
    {
      "epoch": 38.03,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 198048
    },
    {
      "epoch": 38.03,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 198060
    },
    {
      "epoch": 38.03,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 198072
    },
    {
      "epoch": 38.03,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 198084
    },
    {
      "epoch": 38.04,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 198096
    },
    {
      "epoch": 38.04,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 198108
    },
    {
      "epoch": 38.04,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 198120
    },
    {
      "epoch": 38.04,
      "eval_ag_news_accuracy": 0.3280625,
      "eval_ag_news_bleu_score": 4.910747375853292,
      "eval_ag_news_bleu_score_sem": 0.16110935687850506,
      "eval_ag_news_emb_cos_sim": 0.8183833360671997,
      "eval_ag_news_emb_cos_sim_sem": 0.006989458184207542,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4828641414642334,
      "eval_ag_news_n_ngrams_match_1": 14.482,
      "eval_ag_news_n_ngrams_match_2": 3.31,
      "eval_ag_news_n_ngrams_match_3": 0.922,
      "eval_ag_news_num_pred_words": 47.004,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.552824577500765,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3583929650329284,
      "eval_ag_news_runtime": 10.2549,
      "eval_ag_news_samples_per_second": 48.757,
      "eval_ag_news_steps_per_second": 0.098,
      "eval_ag_news_token_set_f1": 0.3588915372672611,
      "eval_ag_news_token_set_f1_sem": 0.004475782302359824,
      "eval_ag_news_token_set_precision": 0.34898361643974435,
      "eval_ag_news_token_set_recall": 0.382470079100238,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 198125
    },
    {
      "epoch": 38.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.1168125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.301207113242068,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12817840580299233,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6753901243209839,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009142267181127879,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.197187662124634,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.386,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.264,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.463633359260893,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22114043342891399,
      "eval_anthropic_toxic_prompts_runtime": 9.6947,
      "eval_anthropic_toxic_prompts_samples_per_second": 51.575,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.103,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3624377696756126,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065926288693393,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4477993879178638,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32928570147706593,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 198125
    },
    {
      "epoch": 38.04,
      "eval_arxiv_accuracy": 0.35209375,
      "eval_arxiv_bleu_score": 4.491844717531946,
      "eval_arxiv_bleu_score_sem": 0.13040605317656828,
      "eval_arxiv_emb_cos_sim": 0.7792878746986389,
      "eval_arxiv_emb_cos_sim_sem": 0.008038685538640025,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3622093200683594,
      "eval_arxiv_n_ngrams_match_1": 15.868,
      "eval_arxiv_n_ngrams_match_2": 3.126,
      "eval_arxiv_n_ngrams_match_3": 0.662,
      "eval_arxiv_num_pred_words": 41.138,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.852865729566105,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37843141426570354,
      "eval_arxiv_runtime": 12.4725,
      "eval_arxiv_samples_per_second": 40.088,
      "eval_arxiv_steps_per_second": 0.08,
      "eval_arxiv_token_set_f1": 0.3697806797510946,
      "eval_arxiv_token_set_f1_sem": 0.004175549794410236,
      "eval_arxiv_token_set_precision": 0.32548584849093193,
      "eval_arxiv_token_set_recall": 0.44412619507525863,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 198125
    },
    {
      "epoch": 38.04,
      "eval_python_code_alpaca_accuracy": 0.16153125,
      "eval_python_code_alpaca_bleu_score": 4.5971349748079025,
      "eval_python_code_alpaca_bleu_score_sem": 0.14031230680149287,
      "eval_python_code_alpaca_emb_cos_sim": 0.7603492736816406,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008283170845773118,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8606696128845215,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.908,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.896,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.972,
      "eval_python_code_alpaca_num_pred_words": 43.838,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.47322331558615,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3385536942912206,
      "eval_python_code_alpaca_runtime": 9.9127,
      "eval_python_code_alpaca_samples_per_second": 50.44,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4770394134582058,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00530785938213543,
      "eval_python_code_alpaca_token_set_precision": 0.5417601021033696,
      "eval_python_code_alpaca_token_set_recall": 0.4504018587054074,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 198125
    },
    {
      "epoch": 38.04,
      "eval_wikibio_accuracy": 0.33003125,
      "eval_wikibio_bleu_score": 6.547435990060557,
      "eval_wikibio_bleu_score_sem": 0.232108214409257,
      "eval_wikibio_emb_cos_sim": 0.758441686630249,
      "eval_wikibio_emb_cos_sim_sem": 0.0074278150432237,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6872265338897705,
      "eval_wikibio_n_ngrams_match_1": 10.488,
      "eval_wikibio_n_ngrams_match_2": 3.602,
      "eval_wikibio_n_ngrams_match_3": 1.388,
      "eval_wikibio_num_pred_words": 36.29,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.933937803844366,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36917191817795747,
      "eval_wikibio_runtime": 10.1784,
      "eval_wikibio_samples_per_second": 49.124,
      "eval_wikibio_steps_per_second": 0.098,
      "eval_wikibio_token_set_f1": 0.33310988103124406,
      "eval_wikibio_token_set_f1_sem": 0.005188332268974138,
      "eval_wikibio_token_set_precision": 0.3417388783542738,
      "eval_wikibio_token_set_recall": 0.3388453247062207,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 198125
    },
    {
      "epoch": 38.04,
      "eval_nq_accuracy": 0.5355625,
      "eval_nq_bleu_score": 12.033799415002097,
      "eval_nq_bleu_score_sem": 0.49216421229279367,
      "eval_nq_emb_cos_sim": 0.8342651128768921,
      "eval_nq_emb_cos_sim_sem": 0.007439761699143014,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.127384662628174,
      "eval_nq_n_ngrams_match_1": 23.448,
      "eval_nq_n_ngrams_match_2": 8.676,
      "eval_nq_n_ngrams_match_3": 4.052,
      "eval_nq_num_pred_words": 49.188,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.392887849517473,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4548787648948454,
      "eval_nq_runtime": 12.62,
      "eval_nq_samples_per_second": 39.62,
      "eval_nq_steps_per_second": 0.079,
      "eval_nq_token_set_f1": 0.4669965182066276,
      "eval_nq_token_set_f1_sem": 0.00503342605619811,
      "eval_nq_token_set_precision": 0.42673897101559916,
      "eval_nq_token_set_recall": 0.5234327128185235,
      "eval_nq_true_num_tokens": 64.0,
      "step": 198125
    },
    {
      "epoch": 38.04,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 198132
    },
    {
      "epoch": 38.05,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 198144
    },
    {
      "epoch": 38.05,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 198156
    },
    {
      "epoch": 38.05,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 198168
    },
    {
      "epoch": 38.05,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 198180
    },
    {
      "epoch": 38.06,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 198192
    },
    {
      "epoch": 38.06,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 198204
    },
    {
      "epoch": 38.06,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 198216
    },
    {
      "epoch": 38.06,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 198228
    },
    {
      "epoch": 38.06,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 198240
    },
    {
      "epoch": 38.07,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 198252
    },
    {
      "epoch": 38.07,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 198264
    },
    {
      "epoch": 38.07,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 198276
    },
    {
      "epoch": 38.07,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 198288
    },
    {
      "epoch": 38.08,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 198300
    },
    {
      "epoch": 38.08,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 198312
    },
    {
      "epoch": 38.08,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 198324
    },
    {
      "epoch": 38.08,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 198336
    },
    {
      "epoch": 38.09,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 198348
    },
    {
      "epoch": 38.09,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 198360
    },
    {
      "epoch": 38.09,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 198372
    },
    {
      "epoch": 38.09,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 198384
    },
    {
      "epoch": 38.09,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 198396
    },
    {
      "epoch": 38.1,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 198408
    },
    {
      "epoch": 38.1,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 198420
    },
    {
      "epoch": 38.1,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 198432
    },
    {
      "epoch": 38.1,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 198444
    },
    {
      "epoch": 38.11,
      "learning_rate": 0.001,
      "loss": 2.51,
      "step": 198456
    },
    {
      "epoch": 38.11,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 198468
    },
    {
      "epoch": 38.11,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 198480
    },
    {
      "epoch": 38.11,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 198492
    },
    {
      "epoch": 38.12,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 198504
    },
    {
      "epoch": 38.12,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 198516
    },
    {
      "epoch": 38.12,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 198528
    },
    {
      "epoch": 38.12,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 198540
    },
    {
      "epoch": 38.12,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 198552
    },
    {
      "epoch": 38.13,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 198564
    },
    {
      "epoch": 38.13,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 198576
    },
    {
      "epoch": 38.13,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 198588
    },
    {
      "epoch": 38.13,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 198600
    },
    {
      "epoch": 38.14,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 198612
    },
    {
      "epoch": 38.14,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 198624
    },
    {
      "epoch": 38.14,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 198636
    },
    {
      "epoch": 38.14,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 198648
    },
    {
      "epoch": 38.15,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 198660
    },
    {
      "epoch": 38.15,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 198672
    },
    {
      "epoch": 38.15,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 198684
    },
    {
      "epoch": 38.15,
      "learning_rate": 0.001,
      "loss": 2.5178,
      "step": 198696
    },
    {
      "epoch": 38.15,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 198708
    },
    {
      "epoch": 38.16,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 198720
    },
    {
      "epoch": 38.16,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 198732
    },
    {
      "epoch": 38.16,
      "learning_rate": 0.001,
      "loss": 2.5026,
      "step": 198744
    },
    {
      "epoch": 38.16,
      "eval_ag_news_accuracy": 0.32909375,
      "eval_ag_news_bleu_score": 4.862735768010554,
      "eval_ag_news_bleu_score_sem": 0.15753661699455201,
      "eval_ag_news_emb_cos_sim": 0.8131601214408875,
      "eval_ag_news_emb_cos_sim_sem": 0.007208148302962952,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.477194309234619,
      "eval_ag_news_n_ngrams_match_1": 14.306,
      "eval_ag_news_n_ngrams_match_2": 3.23,
      "eval_ag_news_n_ngrams_match_3": 0.892,
      "eval_ag_news_num_pred_words": 46.706,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.36877777384159,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35472643245210533,
      "eval_ag_news_runtime": 10.3812,
      "eval_ag_news_samples_per_second": 48.164,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.35454808679556604,
      "eval_ag_news_token_set_f1_sem": 0.004681066017851176,
      "eval_ag_news_token_set_precision": 0.3417000046428554,
      "eval_ag_news_token_set_recall": 0.38523533717129893,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 198750
    },
    {
      "epoch": 38.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.1173125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.296478468232397,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12471177678908144,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6765120029449463,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008981230658412833,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2036640644073486,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.35,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.08,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.796,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.278,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.6225838482655,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21715898210835177,
      "eval_anthropic_toxic_prompts_runtime": 9.8897,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.558,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36368491999903624,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006597898824306445,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4442149308136982,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33304464530685085,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 198750
    },
    {
      "epoch": 38.16,
      "eval_arxiv_accuracy": 0.35190625,
      "eval_arxiv_bleu_score": 4.451322987118088,
      "eval_arxiv_bleu_score_sem": 0.1275649117194889,
      "eval_arxiv_emb_cos_sim": 0.7822730541229248,
      "eval_arxiv_emb_cos_sim_sem": 0.007400052782400672,
      "eval_arxiv_emb_top1_equal": 0.328125,
      "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.34447979927063,
      "eval_arxiv_n_ngrams_match_1": 15.59,
      "eval_arxiv_n_ngrams_match_2": 3.084,
      "eval_arxiv_n_ngrams_match_3": 0.702,
      "eval_arxiv_num_pred_words": 41.076,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.34582631115176,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36946012229350955,
      "eval_arxiv_runtime": 10.0215,
      "eval_arxiv_samples_per_second": 49.893,
      "eval_arxiv_steps_per_second": 0.1,
      "eval_arxiv_token_set_f1": 0.3642653446787465,
      "eval_arxiv_token_set_f1_sem": 0.0040808309403072215,
      "eval_arxiv_token_set_precision": 0.317690930807214,
      "eval_arxiv_token_set_recall": 0.44532565597467866,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 198750
    },
    {
      "epoch": 38.16,
      "eval_python_code_alpaca_accuracy": 0.1618125,
      "eval_python_code_alpaca_bleu_score": 4.845829567155077,
      "eval_python_code_alpaca_bleu_score_sem": 0.14953287889336236,
      "eval_python_code_alpaca_emb_cos_sim": 0.7627565860748291,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007619878890432351,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.866325616836548,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.982,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.104,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.12,
      "eval_python_code_alpaca_num_pred_words": 43.956,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.5723319509202,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3374877262826108,
      "eval_python_code_alpaca_runtime": 11.1604,
      "eval_python_code_alpaca_samples_per_second": 44.801,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.48260256452000383,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005435004351010331,
      "eval_python_code_alpaca_token_set_precision": 0.544912521425928,
      "eval_python_code_alpaca_token_set_recall": 0.45590778148872574,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 198750
    },
    {
      "epoch": 38.16,
      "eval_wikibio_accuracy": 0.3309375,
      "eval_wikibio_bleu_score": 6.292063292076303,
      "eval_wikibio_bleu_score_sem": 0.22239109030590454,
      "eval_wikibio_emb_cos_sim": 0.7575108408927917,
      "eval_wikibio_emb_cos_sim_sem": 0.008050685536664487,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6480793952941895,
      "eval_wikibio_n_ngrams_match_1": 10.45,
      "eval_wikibio_n_ngrams_match_2": 3.588,
      "eval_wikibio_n_ngrams_match_3": 1.386,
      "eval_wikibio_num_pred_words": 37.068,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.40084234013815,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36807835747819223,
      "eval_wikibio_runtime": 10.1277,
      "eval_wikibio_samples_per_second": 49.37,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.32839154985238966,
      "eval_wikibio_token_set_f1_sem": 0.005251312121649202,
      "eval_wikibio_token_set_precision": 0.33912558808325355,
      "eval_wikibio_token_set_recall": 0.33362293170263874,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 198750
    },
    {
      "epoch": 38.16,
      "eval_nq_accuracy": 0.5350625,
      "eval_nq_bleu_score": 12.04315443277687,
      "eval_nq_bleu_score_sem": 0.4933023759468999,
      "eval_nq_emb_cos_sim": 0.8419560790061951,
      "eval_nq_emb_cos_sim_sem": 0.006557228423293656,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.133126974105835,
      "eval_nq_n_ngrams_match_1": 23.534,
      "eval_nq_n_ngrams_match_2": 8.64,
      "eval_nq_n_ngrams_match_3": 4.022,
      "eval_nq_num_pred_words": 49.282,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.441221065123727,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45750899903593667,
      "eval_nq_runtime": 10.3819,
      "eval_nq_samples_per_second": 48.161,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.46728131934242007,
      "eval_nq_token_set_f1_sem": 0.005001053555814796,
      "eval_nq_token_set_precision": 0.4269902145666502,
      "eval_nq_token_set_recall": 0.5235735900445585,
      "eval_nq_true_num_tokens": 64.0,
      "step": 198750
    },
    {
      "epoch": 38.16,
      "learning_rate": 0.001,
      "loss": 2.509,
      "step": 198756
    },
    {
      "epoch": 38.17,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 198768
    },
    {
      "epoch": 38.17,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 198780
    },
    {
      "epoch": 38.17,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 198792
    },
    {
      "epoch": 38.17,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 198804
    },
    {
      "epoch": 38.18,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 198816
    },
    {
      "epoch": 38.18,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 198828
    },
    {
      "epoch": 38.18,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 198840
    },
    {
      "epoch": 38.18,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 198852
    },
    {
      "epoch": 38.18,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 198864
    },
    {
      "epoch": 38.19,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 198876
    },
    {
      "epoch": 38.19,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 198888
    },
    {
      "epoch": 38.19,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 198900
    },
    {
      "epoch": 38.19,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 198912
    },
    {
      "epoch": 38.2,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 198924
    },
    {
      "epoch": 38.2,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 198936
    },
    {
      "epoch": 38.2,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 198948
    },
    {
      "epoch": 38.2,
      "learning_rate": 0.001,
      "loss": 2.5137,
      "step": 198960
    },
    {
      "epoch": 38.21,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 198972
    },
    {
      "epoch": 38.21,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 198984
    },
    {
      "epoch": 38.21,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 198996
    },
    {
      "epoch": 38.21,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 199008
    },
    {
      "epoch": 38.21,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 199020
    },
    {
      "epoch": 38.22,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 199032
    },
    {
      "epoch": 38.22,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 199044
    },
    {
      "epoch": 38.22,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 199056
    },
    {
      "epoch": 38.22,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 199068
    },
    {
      "epoch": 38.23,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 199080
    },
    {
      "epoch": 38.23,
      "learning_rate": 0.001,
      "loss": 2.5147,
      "step": 199092
    },
    {
      "epoch": 38.23,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 199104
    },
    {
      "epoch": 38.23,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 199116
    },
    {
      "epoch": 38.24,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 199128
    },
    {
      "epoch": 38.24,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 199140
    },
    {
      "epoch": 38.24,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 199152
    },
    {
      "epoch": 38.24,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 199164
    },
    {
      "epoch": 38.24,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 199176
    },
    {
      "epoch": 38.25,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 199188
    },
    {
      "epoch": 38.25,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 199200
    },
    {
      "epoch": 38.25,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 199212
    },
    {
      "epoch": 38.25,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 199224
    },
    {
      "epoch": 38.26,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 199236
    },
    {
      "epoch": 38.26,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 199248
    },
    {
      "epoch": 38.26,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 199260
    },
    {
      "epoch": 38.26,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 199272
    },
    {
      "epoch": 38.26,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 199284
    },
    {
      "epoch": 38.27,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 199296
    },
    {
      "epoch": 38.27,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 199308
    },
    {
      "epoch": 38.27,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 199320
    },
    {
      "epoch": 38.27,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 199332
    },
    {
      "epoch": 38.28,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 199344
    },
    {
      "epoch": 38.28,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 199356
    },
    {
      "epoch": 38.28,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 199368
    },
    {
      "epoch": 38.28,
      "eval_ag_news_accuracy": 0.327625,
      "eval_ag_news_bleu_score": 4.9417311987063215,
      "eval_ag_news_bleu_score_sem": 0.15424605211228998,
      "eval_ag_news_emb_cos_sim": 0.8186274766921997,
      "eval_ag_news_emb_cos_sim_sem": 0.007027419323825698,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.487560510635376,
      "eval_ag_news_n_ngrams_match_1": 14.42,
      "eval_ag_news_n_ngrams_match_2": 3.174,
      "eval_ag_news_n_ngrams_match_3": 0.882,
      "eval_ag_news_num_pred_words": 46.32,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.70606421257628,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3590849174322741,
      "eval_ag_news_runtime": 11.8147,
      "eval_ag_news_samples_per_second": 42.32,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.3580719528062785,
      "eval_ag_news_token_set_f1_sem": 0.00460088936336261,
      "eval_ag_news_token_set_precision": 0.34515331744894023,
      "eval_ag_news_token_set_recall": 0.3874022150564543,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 199375
    },
    {
      "epoch": 38.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.114625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.30288252904598,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12912518825207164,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6769576072692871,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008583475043947334,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2431907653808594,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.188,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.774,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.032,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.61532398030485,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9765625,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21343969703196963,
      "eval_anthropic_toxic_prompts_runtime": 13.5743,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.834,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.074,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36012047929585433,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006933007484110533,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4408752537883833,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32990273330221304,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 199375
    },
    {
      "epoch": 38.28,
      "eval_arxiv_accuracy": 0.3524375,
      "eval_arxiv_bleu_score": 4.500845138328313,
      "eval_arxiv_bleu_score_sem": 0.13359574834055554,
      "eval_arxiv_emb_cos_sim": 0.7729178071022034,
      "eval_arxiv_emb_cos_sim_sem": 0.008900314636172487,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.327899932861328,
      "eval_arxiv_n_ngrams_match_1": 15.324,
      "eval_arxiv_n_ngrams_match_2": 3.052,
      "eval_arxiv_n_ngrams_match_3": 0.696,
      "eval_arxiv_num_pred_words": 40.018,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.8797308747948,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3696750277898393,
      "eval_arxiv_runtime": 13.2087,
      "eval_arxiv_samples_per_second": 37.854,
      "eval_arxiv_steps_per_second": 0.076,
      "eval_arxiv_token_set_f1": 0.35973240314146787,
      "eval_arxiv_token_set_f1_sem": 0.00427649242879882,
      "eval_arxiv_token_set_precision": 0.311809219710262,
      "eval_arxiv_token_set_recall": 0.4420829017756489,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 199375
    },
    {
      "epoch": 38.28,
      "eval_python_code_alpaca_accuracy": 0.1625,
      "eval_python_code_alpaca_bleu_score": 4.8226810975350025,
      "eval_python_code_alpaca_bleu_score_sem": 0.15699190851218892,
      "eval_python_code_alpaca_emb_cos_sim": 0.7646726369857788,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008021080668308124,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8512156009674072,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.894,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.056,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.056,
      "eval_python_code_alpaca_num_pred_words": 44.07,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.308809663028644,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33691055931155556,
      "eval_python_code_alpaca_runtime": 11.0349,
      "eval_python_code_alpaca_samples_per_second": 45.311,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4823490759806321,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0051191226550350395,
      "eval_python_code_alpaca_token_set_precision": 0.5451297281940832,
      "eval_python_code_alpaca_token_set_recall": 0.45611962723311733,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 199375
    },
    {
      "epoch": 38.28,
      "eval_wikibio_accuracy": 0.32953125,
      "eval_wikibio_bleu_score": 6.042169552848864,
      "eval_wikibio_bleu_score_sem": 0.2188123058716265,
      "eval_wikibio_emb_cos_sim": 0.7414339184761047,
      "eval_wikibio_emb_cos_sim_sem": 0.008044945004188401,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6446495056152344,
      "eval_wikibio_n_ngrams_match_1": 9.962,
      "eval_wikibio_n_ngrams_match_2": 3.342,
      "eval_wikibio_n_ngrams_match_3": 1.214,
      "eval_wikibio_num_pred_words": 35.222,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.26935730581431,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35563535234548793,
      "eval_wikibio_runtime": 12.9718,
      "eval_wikibio_samples_per_second": 38.545,
      "eval_wikibio_steps_per_second": 0.077,
      "eval_wikibio_token_set_f1": 0.3181026646514537,
      "eval_wikibio_token_set_f1_sem": 0.00552625825197284,
      "eval_wikibio_token_set_precision": 0.323409945130816,
      "eval_wikibio_token_set_recall": 0.3296236853527382,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 199375
    },
    {
      "epoch": 38.28,
      "eval_nq_accuracy": 0.5341875,
      "eval_nq_bleu_score": 11.837841350754823,
      "eval_nq_bleu_score_sem": 0.4807961040164993,
      "eval_nq_emb_cos_sim": 0.8370886445045471,
      "eval_nq_emb_cos_sim_sem": 0.006791194799000741,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1303882598876953,
      "eval_nq_n_ngrams_match_1": 23.45,
      "eval_nq_n_ngrams_match_2": 8.582,
      "eval_nq_n_ngrams_match_3": 3.936,
      "eval_nq_num_pred_words": 49.21,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.418134601018068,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4552353713490796,
      "eval_nq_runtime": 12.048,
      "eval_nq_samples_per_second": 41.501,
      "eval_nq_steps_per_second": 0.083,
      "eval_nq_token_set_f1": 0.47160475008168445,
      "eval_nq_token_set_f1_sem": 0.004894827177870064,
      "eval_nq_token_set_precision": 0.42939985785051693,
      "eval_nq_token_set_recall": 0.5324201956013755,
      "eval_nq_true_num_tokens": 64.0,
      "step": 199375
    },
    {
      "epoch": 38.28,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 199380
    },
    {
      "epoch": 38.29,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 199392
    },
    {
      "epoch": 38.29,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 199404
    },
    {
      "epoch": 38.29,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 199416
    },
    {
      "epoch": 38.29,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 199428
    },
    {
      "epoch": 38.29,
      "learning_rate": 0.001,
      "loss": 2.516,
      "step": 199440
    },
    {
      "epoch": 38.3,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 199452
    },
    {
      "epoch": 38.3,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 199464
    },
    {
      "epoch": 38.3,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 199476
    },
    {
      "epoch": 38.3,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 199488
    },
    {
      "epoch": 38.31,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 199500
    },
    {
      "epoch": 38.31,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 199512
    },
    {
      "epoch": 38.31,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 199524
    },
    {
      "epoch": 38.31,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 199536
    },
    {
      "epoch": 38.32,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 199548
    },
    {
      "epoch": 38.32,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 199560
    },
    {
      "epoch": 38.32,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 199572
    },
    {
      "epoch": 38.32,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 199584
    },
    {
      "epoch": 38.32,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 199596
    },
    {
      "epoch": 38.33,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 199608
    },
    {
      "epoch": 38.33,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 199620
    },
    {
      "epoch": 38.33,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 199632
    },
    {
      "epoch": 38.33,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 199644
    },
    {
      "epoch": 38.34,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 199656
    },
    {
      "epoch": 38.34,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 199668
    },
    {
      "epoch": 38.34,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 199680
    },
    {
      "epoch": 38.34,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 199692
    },
    {
      "epoch": 38.35,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 199704
    },
    {
      "epoch": 38.35,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 199716
    },
    {
      "epoch": 38.35,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 199728
    },
    {
      "epoch": 38.35,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 199740
    },
    {
      "epoch": 38.35,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 199752
    },
    {
      "epoch": 38.36,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 199764
    },
    {
      "epoch": 38.36,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 199776
    },
    {
      "epoch": 38.36,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 199788
    },
    {
      "epoch": 38.36,
      "learning_rate": 0.001,
      "loss": 2.5134,
      "step": 199800
    },
    {
      "epoch": 38.37,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 199812
    },
    {
      "epoch": 38.37,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 199824
    },
    {
      "epoch": 38.37,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 199836
    },
    {
      "epoch": 38.37,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 199848
    },
    {
      "epoch": 38.38,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 199860
    },
    {
      "epoch": 38.38,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 199872
    },
    {
      "epoch": 38.38,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 199884
    },
    {
      "epoch": 38.38,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 199896
    },
    {
      "epoch": 38.38,
      "learning_rate": 0.001,
      "loss": 2.5125,
      "step": 199908
    },
    {
      "epoch": 38.39,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 199920
    },
    {
      "epoch": 38.39,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 199932
    },
    {
      "epoch": 38.39,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 199944
    },
    {
      "epoch": 38.39,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 199956
    },
    {
      "epoch": 38.4,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 199968
    },
    {
      "epoch": 38.4,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 199980
    },
    {
      "epoch": 38.4,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 199992
    },
    {
      "epoch": 38.4,
      "eval_ag_news_accuracy": 0.32825,
      "eval_ag_news_bleu_score": 5.172552201596351,
      "eval_ag_news_bleu_score_sem": 0.16181223343041456,
      "eval_ag_news_emb_cos_sim": 0.8215479850769043,
      "eval_ag_news_emb_cos_sim_sem": 0.007498894386148306,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.488349676132202,
      "eval_ag_news_n_ngrams_match_1": 14.652,
      "eval_ag_news_n_ngrams_match_2": 3.414,
      "eval_ag_news_n_ngrams_match_3": 1.0,
      "eval_ag_news_num_pred_words": 46.966,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.73188489704641,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3624800378888058,
      "eval_ag_news_runtime": 12.8154,
      "eval_ag_news_samples_per_second": 39.016,
      "eval_ag_news_steps_per_second": 0.078,
      "eval_ag_news_token_set_f1": 0.3634194636723417,
      "eval_ag_news_token_set_f1_sem": 0.004405233955182402,
      "eval_ag_news_token_set_precision": 0.3511867546689802,
      "eval_ag_news_token_set_recall": 0.38956414681062035,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 200000
    },
    {
      "epoch": 38.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.1160625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3744040639541364,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1311318100792003,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6873105764389038,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007988750742777728,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.232280969619751,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.316,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.07,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.826,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.116,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.337384913323735,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21686406936866284,
      "eval_anthropic_toxic_prompts_runtime": 9.9469,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.267,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3590520345784447,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006762216294785205,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44224462054379854,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32948871745396646,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 200000
    },
    {
      "epoch": 38.4,
      "eval_arxiv_accuracy": 0.35278125,
      "eval_arxiv_bleu_score": 4.380235097489727,
      "eval_arxiv_bleu_score_sem": 0.12933162875966375,
      "eval_arxiv_emb_cos_sim": 0.7866820096969604,
      "eval_arxiv_emb_cos_sim_sem": 0.006539146108193467,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.340620756149292,
      "eval_arxiv_n_ngrams_match_1": 15.772,
      "eval_arxiv_n_ngrams_match_2": 2.996,
      "eval_arxiv_n_ngrams_match_3": 0.656,
      "eval_arxiv_num_pred_words": 40.778,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.236649339917527,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3748535188797494,
      "eval_arxiv_runtime": 10.1305,
      "eval_arxiv_samples_per_second": 49.356,
      "eval_arxiv_steps_per_second": 0.099,
      "eval_arxiv_token_set_f1": 0.36868663200142515,
      "eval_arxiv_token_set_f1_sem": 0.004493028079803148,
      "eval_arxiv_token_set_precision": 0.3241241758652874,
      "eval_arxiv_token_set_recall": 0.44456752717847087,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 200000
    },
    {
      "epoch": 38.4,
      "eval_python_code_alpaca_accuracy": 0.16425,
      "eval_python_code_alpaca_bleu_score": 4.915179625615341,
      "eval_python_code_alpaca_bleu_score_sem": 0.15284102400875269,
      "eval_python_code_alpaca_emb_cos_sim": 0.7691056132316589,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007020678462241987,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.838472843170166,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.202,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.154,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.076,
      "eval_python_code_alpaca_num_pred_words": 43.7,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.08964702751432,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34479702451640815,
      "eval_python_code_alpaca_runtime": 10.5125,
      "eval_python_code_alpaca_samples_per_second": 47.563,
      "eval_python_code_alpaca_steps_per_second": 0.095,
      "eval_python_code_alpaca_token_set_f1": 0.49356710271560095,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00512612959845217,
      "eval_python_code_alpaca_token_set_precision": 0.5574205363902113,
      "eval_python_code_alpaca_token_set_recall": 0.465838193643123,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 200000
    },
    {
      "epoch": 38.4,
      "eval_wikibio_accuracy": 0.32740625,
      "eval_wikibio_bleu_score": 6.209100645734727,
      "eval_wikibio_bleu_score_sem": 0.22584343950398492,
      "eval_wikibio_emb_cos_sim": 0.7476376295089722,
      "eval_wikibio_emb_cos_sim_sem": 0.009153704123598014,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6737782955169678,
      "eval_wikibio_n_ngrams_match_1": 10.224,
      "eval_wikibio_n_ngrams_match_2": 3.502,
      "eval_wikibio_n_ngrams_match_3": 1.344,
      "eval_wikibio_num_pred_words": 36.268,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.40049168406325,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3583372177560415,
      "eval_wikibio_runtime": 9.9414,
      "eval_wikibio_samples_per_second": 50.294,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.32298246517111684,
      "eval_wikibio_token_set_f1_sem": 0.00558874538217453,
      "eval_wikibio_token_set_precision": 0.3335038329257763,
      "eval_wikibio_token_set_recall": 0.32766481741272097,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 200000
    },
    {
      "epoch": 38.4,
      "eval_nq_accuracy": 0.53621875,
      "eval_nq_bleu_score": 12.159658329574835,
      "eval_nq_bleu_score_sem": 0.48712114140477525,
      "eval_nq_emb_cos_sim": 0.8404883146286011,
      "eval_nq_emb_cos_sim_sem": 0.006631654489407832,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.127622604370117,
      "eval_nq_n_ngrams_match_1": 23.456,
      "eval_nq_n_ngrams_match_2": 8.694,
      "eval_nq_n_ngrams_match_3": 4.064,
      "eval_nq_num_pred_words": 49.04,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.394885105478181,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45384860546852174,
      "eval_nq_runtime": 10.4328,
      "eval_nq_samples_per_second": 47.926,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4700611381379461,
      "eval_nq_token_set_f1_sem": 0.0049640822515344156,
      "eval_nq_token_set_precision": 0.42943715348694084,
      "eval_nq_token_set_recall": 0.5279278469408664,
      "eval_nq_true_num_tokens": 64.0,
      "step": 200000
    },
    {
      "epoch": 38.4,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 200004
    },
    {
      "epoch": 38.41,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 200016
    },
    {
      "epoch": 38.41,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 200028
    },
    {
      "epoch": 38.41,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 200040
    },
    {
      "epoch": 38.41,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 200052
    },
    {
      "epoch": 38.41,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 200064
    },
    {
      "epoch": 38.42,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 200076
    },
    {
      "epoch": 38.42,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 200088
    },
    {
      "epoch": 38.42,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 200100
    },
    {
      "epoch": 38.42,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 200112
    },
    {
      "epoch": 38.43,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 200124
    },
    {
      "epoch": 38.43,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 200136
    },
    {
      "epoch": 38.43,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 200148
    },
    {
      "epoch": 38.43,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 200160
    },
    {
      "epoch": 38.44,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 200172
    },
    {
      "epoch": 38.44,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 200184
    },
    {
      "epoch": 38.44,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 200196
    },
    {
      "epoch": 38.44,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 200208
    },
    {
      "epoch": 38.44,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 200220
    },
    {
      "epoch": 38.45,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 200232
    },
    {
      "epoch": 38.45,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 200244
    },
    {
      "epoch": 38.45,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 200256
    },
    {
      "epoch": 38.45,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 200268
    },
    {
      "epoch": 38.46,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 200280
    },
    {
      "epoch": 38.46,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 200292
    },
    {
      "epoch": 38.46,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 200304
    },
    {
      "epoch": 38.46,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 200316
    },
    {
      "epoch": 38.47,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 200328
    },
    {
      "epoch": 38.47,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 200340
    },
    {
      "epoch": 38.47,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 200352
    },
    {
      "epoch": 38.47,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 200364
    },
    {
      "epoch": 38.47,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 200376
    },
    {
      "epoch": 38.48,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 200388
    },
    {
      "epoch": 38.48,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 200400
    },
    {
      "epoch": 38.48,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 200412
    },
    {
      "epoch": 38.48,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 200424
    },
    {
      "epoch": 38.49,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 200436
    },
    {
      "epoch": 38.49,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 200448
    },
    {
      "epoch": 38.49,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 200460
    },
    {
      "epoch": 38.49,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 200472
    },
    {
      "epoch": 38.5,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 200484
    },
    {
      "epoch": 38.5,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 200496
    },
    {
      "epoch": 38.5,
      "learning_rate": 0.001,
      "loss": 2.5089,
      "step": 200508
    },
    {
      "epoch": 38.5,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 200520
    },
    {
      "epoch": 38.5,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 200532
    },
    {
      "epoch": 38.51,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 200544
    },
    {
      "epoch": 38.51,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 200556
    },
    {
      "epoch": 38.51,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 200568
    },
    {
      "epoch": 38.51,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 200580
    },
    {
      "epoch": 38.52,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 200592
    },
    {
      "epoch": 38.52,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 200604
    },
    {
      "epoch": 38.52,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 200616
    },
    {
      "epoch": 38.52,
      "eval_ag_news_accuracy": 0.32696875,
      "eval_ag_news_bleu_score": 5.124490805927564,
      "eval_ag_news_bleu_score_sem": 0.16806403085212412,
      "eval_ag_news_emb_cos_sim": 0.8210431337356567,
      "eval_ag_news_emb_cos_sim_sem": 0.006155401686679809,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.482522487640381,
      "eval_ag_news_n_ngrams_match_1": 14.35,
      "eval_ag_news_n_ngrams_match_2": 3.246,
      "eval_ag_news_n_ngrams_match_3": 0.95,
      "eval_ag_news_num_pred_words": 46.392,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.541704680192545,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3591422003217143,
      "eval_ag_news_runtime": 11.5848,
      "eval_ag_news_samples_per_second": 43.16,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.3602993094966992,
      "eval_ag_news_token_set_f1_sem": 0.004505170963543107,
      "eval_ag_news_token_set_precision": 0.34517211413055204,
      "eval_ag_news_token_set_recall": 0.39151240323973885,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 200625
    },
    {
      "epoch": 38.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.1169375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3712601061706833,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13318402728049128,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6785003542900085,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008789142445057385,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.224536657333374,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.314,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.082,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.828,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.176,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.141922131342337,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21586434959105966,
      "eval_anthropic_toxic_prompts_runtime": 10.0031,
      "eval_anthropic_toxic_prompts_samples_per_second": 49.985,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.1,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36242229949343746,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006938019150534992,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4416903305306638,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3343296823954955,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 200625
    },
    {
      "epoch": 38.52,
      "eval_arxiv_accuracy": 0.35140625,
      "eval_arxiv_bleu_score": 4.579195338607506,
      "eval_arxiv_bleu_score_sem": 0.1405080470622127,
      "eval_arxiv_emb_cos_sim": 0.7835668921470642,
      "eval_arxiv_emb_cos_sim_sem": 0.006613090119727798,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.343899965286255,
      "eval_arxiv_n_ngrams_match_1": 15.592,
      "eval_arxiv_n_ngrams_match_2": 3.15,
      "eval_arxiv_n_ngrams_match_3": 0.752,
      "eval_arxiv_num_pred_words": 40.6,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.3293952018595,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37238043762067263,
      "eval_arxiv_runtime": 10.864,
      "eval_arxiv_samples_per_second": 46.023,
      "eval_arxiv_steps_per_second": 0.092,
      "eval_arxiv_token_set_f1": 0.365866023498149,
      "eval_arxiv_token_set_f1_sem": 0.004428000742937261,
      "eval_arxiv_token_set_precision": 0.31829054517088734,
      "eval_arxiv_token_set_recall": 0.4480372605531303,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 200625
    },
    {
      "epoch": 38.52,
      "eval_python_code_alpaca_accuracy": 0.16246875,
      "eval_python_code_alpaca_bleu_score": 4.902811574303316,
      "eval_python_code_alpaca_bleu_score_sem": 0.1572073012295978,
      "eval_python_code_alpaca_emb_cos_sim": 0.7700563669204712,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007293403474966395,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8625295162200928,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.88,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.012,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.086,
      "eval_python_code_alpaca_num_pred_words": 43.388,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.505752062699557,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3385448207373508,
      "eval_python_code_alpaca_runtime": 9.9187,
      "eval_python_code_alpaca_samples_per_second": 50.41,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.48832918345736936,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005340555258537011,
      "eval_python_code_alpaca_token_set_precision": 0.5415004190520915,
      "eval_python_code_alpaca_token_set_recall": 0.46614241816551255,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 200625
    },
    {
      "epoch": 38.52,
      "eval_wikibio_accuracy": 0.32828125,
      "eval_wikibio_bleu_score": 6.021161512863206,
      "eval_wikibio_bleu_score_sem": 0.22085702483213818,
      "eval_wikibio_emb_cos_sim": 0.744111955165863,
      "eval_wikibio_emb_cos_sim_sem": 0.009517643477578601,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.642890691757202,
      "eval_wikibio_n_ngrams_match_1": 9.96,
      "eval_wikibio_n_ngrams_match_2": 3.35,
      "eval_wikibio_n_ngrams_match_3": 1.242,
      "eval_wikibio_num_pred_words": 35.786,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.202107786875736,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3531122350233396,
      "eval_wikibio_runtime": 14.9241,
      "eval_wikibio_samples_per_second": 33.503,
      "eval_wikibio_steps_per_second": 0.067,
      "eval_wikibio_token_set_f1": 0.3167796918315341,
      "eval_wikibio_token_set_f1_sem": 0.005669717027321806,
      "eval_wikibio_token_set_precision": 0.3235410572755014,
      "eval_wikibio_token_set_recall": 0.32786112063324696,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 200625
    },
    {
      "epoch": 38.52,
      "eval_nq_accuracy": 0.53546875,
      "eval_nq_bleu_score": 11.67899940002113,
      "eval_nq_bleu_score_sem": 0.45548232175890974,
      "eval_nq_emb_cos_sim": 0.8368167877197266,
      "eval_nq_emb_cos_sim_sem": 0.0069787383967066,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1302053928375244,
      "eval_nq_n_ngrams_match_1": 23.304,
      "eval_nq_n_ngrams_match_2": 8.47,
      "eval_nq_n_ngrams_match_3": 3.836,
      "eval_nq_num_pred_words": 48.926,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.416595342319775,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45345869442626835,
      "eval_nq_runtime": 10.2932,
      "eval_nq_samples_per_second": 48.576,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4690944405401308,
      "eval_nq_token_set_f1_sem": 0.005093111331519668,
      "eval_nq_token_set_precision": 0.42536645564826464,
      "eval_nq_token_set_recall": 0.5313037685433415,
      "eval_nq_true_num_tokens": 64.0,
      "step": 200625
    },
    {
      "epoch": 38.52,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 200628
    },
    {
      "epoch": 38.53,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 200640
    },
    {
      "epoch": 38.53,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 200652
    },
    {
      "epoch": 38.53,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 200664
    },
    {
      "epoch": 38.53,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 200676
    },
    {
      "epoch": 38.53,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 200688
    },
    {
      "epoch": 38.54,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 200700
    },
    {
      "epoch": 38.54,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 200712
    },
    {
      "epoch": 38.54,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 200724
    },
    {
      "epoch": 38.54,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 200736
    },
    {
      "epoch": 38.55,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 200748
    },
    {
      "epoch": 38.55,
      "learning_rate": 0.001,
      "loss": 2.5108,
      "step": 200760
    },
    {
      "epoch": 38.55,
      "learning_rate": 0.001,
      "loss": 2.5146,
      "step": 200772
    },
    {
      "epoch": 38.55,
      "learning_rate": 0.001,
      "loss": 2.5142,
      "step": 200784
    },
    {
      "epoch": 38.56,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 200796
    },
    {
      "epoch": 38.56,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 200808
    },
    {
      "epoch": 38.56,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 200820
    },
    {
      "epoch": 38.56,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 200832
    },
    {
      "epoch": 38.56,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 200844
    },
    {
      "epoch": 38.57,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 200856
    },
    {
      "epoch": 38.57,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 200868
    },
    {
      "epoch": 38.57,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 200880
    },
    {
      "epoch": 38.57,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 200892
    },
    {
      "epoch": 38.58,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 200904
    },
    {
      "epoch": 38.58,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 200916
    },
    {
      "epoch": 38.58,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 200928
    },
    {
      "epoch": 38.58,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 200940
    },
    {
      "epoch": 38.59,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 200952
    },
    {
      "epoch": 38.59,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 200964
    },
    {
      "epoch": 38.59,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 200976
    },
    {
      "epoch": 38.59,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 200988
    },
    {
      "epoch": 38.59,
      "learning_rate": 0.001,
      "loss": 2.5097,
      "step": 201000
    },
    {
      "epoch": 38.6,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 201012
    },
    {
      "epoch": 38.6,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 201024
    },
    {
      "epoch": 38.6,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 201036
    },
    {
      "epoch": 38.6,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 201048
    },
    {
      "epoch": 38.61,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 201060
    },
    {
      "epoch": 38.61,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 201072
    },
    {
      "epoch": 38.61,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 201084
    },
    {
      "epoch": 38.61,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 201096
    },
    {
      "epoch": 38.62,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 201108
    },
    {
      "epoch": 38.62,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 201120
    },
    {
      "epoch": 38.62,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 201132
    },
    {
      "epoch": 38.62,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 201144
    },
    {
      "epoch": 38.62,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 201156
    },
    {
      "epoch": 38.63,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 201168
    },
    {
      "epoch": 38.63,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 201180
    },
    {
      "epoch": 38.63,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 201192
    },
    {
      "epoch": 38.63,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 201204
    },
    {
      "epoch": 38.64,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 201216
    },
    {
      "epoch": 38.64,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 201228
    },
    {
      "epoch": 38.64,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 201240
    },
    {
      "epoch": 38.64,
      "eval_ag_news_accuracy": 0.3296875,
      "eval_ag_news_bleu_score": 4.916234398109618,
      "eval_ag_news_bleu_score_sem": 0.16556309465932698,
      "eval_ag_news_emb_cos_sim": 0.8227593898773193,
      "eval_ag_news_emb_cos_sim_sem": 0.006438031208804276,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4632151126861572,
      "eval_ag_news_n_ngrams_match_1": 14.44,
      "eval_ag_news_n_ngrams_match_2": 3.234,
      "eval_ag_news_n_ngrams_match_3": 0.902,
      "eval_ag_news_num_pred_words": 46.448,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.919436301114658,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3606588435428477,
      "eval_ag_news_runtime": 10.4227,
      "eval_ag_news_samples_per_second": 47.972,
      "eval_ag_news_steps_per_second": 0.096,
      "eval_ag_news_token_set_f1": 0.36046218119862616,
      "eval_ag_news_token_set_f1_sem": 0.00453020238609333,
      "eval_ag_news_token_set_precision": 0.34579195831015946,
      "eval_ag_news_token_set_recall": 0.3906633539178233,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 201250
    },
    {
      "epoch": 38.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.11621875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3607888777637753,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13015718057924883,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6823102235794067,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00962869252087071,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2163546085357666,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.37,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.044,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.596,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.937048981602853,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.953125,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21896378837396635,
      "eval_anthropic_toxic_prompts_runtime": 9.877,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.623,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.101,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36041119788731574,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00669103893115273,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4449990491056998,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3286289655604711,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 201250
    },
    {
      "epoch": 38.64,
      "eval_arxiv_accuracy": 0.3535,
      "eval_arxiv_bleu_score": 4.318002499163436,
      "eval_arxiv_bleu_score_sem": 0.13218365332714507,
      "eval_arxiv_emb_cos_sim": 0.7763446569442749,
      "eval_arxiv_emb_cos_sim_sem": 0.007534848940650639,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3291878700256348,
      "eval_arxiv_n_ngrams_match_1": 15.296,
      "eval_arxiv_n_ngrams_match_2": 2.898,
      "eval_arxiv_n_ngrams_match_3": 0.67,
      "eval_arxiv_num_pred_words": 39.494,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.915661349449397,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36947079046896836,
      "eval_arxiv_runtime": 14.7458,
      "eval_arxiv_samples_per_second": 33.908,
      "eval_arxiv_steps_per_second": 0.068,
      "eval_arxiv_token_set_f1": 0.36258928552527303,
      "eval_arxiv_token_set_f1_sem": 0.0044621756079646765,
      "eval_arxiv_token_set_precision": 0.31356423140260836,
      "eval_arxiv_token_set_recall": 0.44947400262918213,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 201250
    },
    {
      "epoch": 38.64,
      "eval_python_code_alpaca_accuracy": 0.1628125,
      "eval_python_code_alpaca_bleu_score": 4.653238948438494,
      "eval_python_code_alpaca_bleu_score_sem": 0.1484399888376643,
      "eval_python_code_alpaca_emb_cos_sim": 0.7610523700714111,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008129999978661418,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8632025718688965,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.8,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.876,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.986,
      "eval_python_code_alpaca_num_pred_words": 43.418,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.51753837398867,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3318641293223034,
      "eval_python_code_alpaca_runtime": 9.6409,
      "eval_python_code_alpaca_samples_per_second": 51.862,
      "eval_python_code_alpaca_steps_per_second": 0.104,
      "eval_python_code_alpaca_token_set_f1": 0.47870735390552044,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005561339573159282,
      "eval_python_code_alpaca_token_set_precision": 0.533145417806957,
      "eval_python_code_alpaca_token_set_recall": 0.4581533491929219,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 201250
    },
    {
      "epoch": 38.64,
      "eval_wikibio_accuracy": 0.33134375,
      "eval_wikibio_bleu_score": 6.0463956689639975,
      "eval_wikibio_bleu_score_sem": 0.21884906055287173,
      "eval_wikibio_emb_cos_sim": 0.7410796880722046,
      "eval_wikibio_emb_cos_sim_sem": 0.009447447242839198,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6174113750457764,
      "eval_wikibio_n_ngrams_match_1": 9.648,
      "eval_wikibio_n_ngrams_match_2": 3.286,
      "eval_wikibio_n_ngrams_match_3": 1.216,
      "eval_wikibio_num_pred_words": 34.904,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.24103985351619,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3474419480356657,
      "eval_wikibio_runtime": 14.4,
      "eval_wikibio_samples_per_second": 34.722,
      "eval_wikibio_steps_per_second": 0.069,
      "eval_wikibio_token_set_f1": 0.3133670602315977,
      "eval_wikibio_token_set_f1_sem": 0.005841852625337701,
      "eval_wikibio_token_set_precision": 0.31555529983383407,
      "eval_wikibio_token_set_recall": 0.3307085213568827,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 201250
    },
    {
      "epoch": 38.64,
      "eval_nq_accuracy": 0.53721875,
      "eval_nq_bleu_score": 12.095965169289103,
      "eval_nq_bleu_score_sem": 0.48117336932309007,
      "eval_nq_emb_cos_sim": 0.8377463221549988,
      "eval_nq_emb_cos_sim_sem": 0.00777562927638182,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.124088764190674,
      "eval_nq_n_ngrams_match_1": 23.472,
      "eval_nq_n_ngrams_match_2": 8.668,
      "eval_nq_n_ngrams_match_3": 3.992,
      "eval_nq_num_pred_words": 48.832,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.365271279273268,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4600786336278272,
      "eval_nq_runtime": 10.457,
      "eval_nq_samples_per_second": 47.815,
      "eval_nq_steps_per_second": 0.096,
      "eval_nq_token_set_f1": 0.4733094220281565,
      "eval_nq_token_set_f1_sem": 0.005083996611667977,
      "eval_nq_token_set_precision": 0.42903930774077786,
      "eval_nq_token_set_recall": 0.5364713139797934,
      "eval_nq_true_num_tokens": 64.0,
      "step": 201250
    },
    {
      "epoch": 38.64,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 201252
    },
    {
      "epoch": 38.65,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 201264
    },
    {
      "epoch": 38.65,
      "learning_rate": 0.001,
      "loss": 2.5153,
      "step": 201276
    },
    {
      "epoch": 38.65,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 201288
    },
    {
      "epoch": 38.65,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 201300
    },
    {
      "epoch": 38.65,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 201312
    },
    {
      "epoch": 38.66,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 201324
    },
    {
      "epoch": 38.66,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 201336
    },
    {
      "epoch": 38.66,
      "learning_rate": 0.001,
      "loss": 2.5124,
      "step": 201348
    },
    {
      "epoch": 38.66,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 201360
    },
    {
      "epoch": 38.67,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 201372
    },
    {
      "epoch": 38.67,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 201384
    },
    {
      "epoch": 38.67,
      "learning_rate": 0.001,
      "loss": 2.5082,
      "step": 201396
    },
    {
      "epoch": 38.67,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 201408
    },
    {
      "epoch": 38.68,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 201420
    },
    {
      "epoch": 38.68,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 201432
    },
    {
      "epoch": 38.68,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 201444
    },
    {
      "epoch": 38.68,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 201456
    },
    {
      "epoch": 38.68,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 201468
    },
    {
      "epoch": 38.69,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 201480
    },
    {
      "epoch": 38.69,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 201492
    },
    {
      "epoch": 38.69,
      "learning_rate": 0.001,
      "loss": 2.5184,
      "step": 201504
    },
    {
      "epoch": 38.69,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 201516
    },
    {
      "epoch": 38.7,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 201528
    },
    {
      "epoch": 38.7,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 201540
    },
    {
      "epoch": 38.7,
      "learning_rate": 0.001,
      "loss": 2.5155,
      "step": 201552
    },
    {
      "epoch": 38.7,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 201564
    },
    {
      "epoch": 38.71,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 201576
    },
    {
      "epoch": 38.71,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 201588
    },
    {
      "epoch": 38.71,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 201600
    },
    {
      "epoch": 38.71,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 201612
    },
    {
      "epoch": 38.71,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 201624
    },
    {
      "epoch": 38.72,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 201636
    },
    {
      "epoch": 38.72,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 201648
    },
    {
      "epoch": 38.72,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 201660
    },
    {
      "epoch": 38.72,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 201672
    },
    {
      "epoch": 38.73,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 201684
    },
    {
      "epoch": 38.73,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 201696
    },
    {
      "epoch": 38.73,
      "learning_rate": 0.001,
      "loss": 2.5186,
      "step": 201708
    },
    {
      "epoch": 38.73,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 201720
    },
    {
      "epoch": 38.74,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 201732
    },
    {
      "epoch": 38.74,
      "learning_rate": 0.001,
      "loss": 2.5188,
      "step": 201744
    },
    {
      "epoch": 38.74,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 201756
    },
    {
      "epoch": 38.74,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 201768
    },
    {
      "epoch": 38.74,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 201780
    },
    {
      "epoch": 38.75,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 201792
    },
    {
      "epoch": 38.75,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 201804
    },
    {
      "epoch": 38.75,
      "learning_rate": 0.001,
      "loss": 2.5128,
      "step": 201816
    },
    {
      "epoch": 38.75,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 201828
    },
    {
      "epoch": 38.76,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 201840
    },
    {
      "epoch": 38.76,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 201852
    },
    {
      "epoch": 38.76,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 201864
    },
    {
      "epoch": 38.76,
      "eval_ag_news_accuracy": 0.3266875,
      "eval_ag_news_bleu_score": 4.917675719839939,
      "eval_ag_news_bleu_score_sem": 0.15777813278336045,
      "eval_ag_news_emb_cos_sim": 0.8131592273712158,
      "eval_ag_news_emb_cos_sim_sem": 0.0074618334768725625,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.488602876663208,
      "eval_ag_news_n_ngrams_match_1": 14.318,
      "eval_ag_news_n_ngrams_match_2": 3.134,
      "eval_ag_news_n_ngrams_match_3": 0.938,
      "eval_ag_news_num_pred_words": 46.674,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.74017367700063,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.356389705023823,
      "eval_ag_news_runtime": 10.3043,
      "eval_ag_news_samples_per_second": 48.524,
      "eval_ag_news_steps_per_second": 0.097,
      "eval_ag_news_token_set_f1": 0.3554133112421214,
      "eval_ag_news_token_set_f1_sem": 0.004590297937167434,
      "eval_ag_news_token_set_precision": 0.3431914770540108,
      "eval_ag_news_token_set_recall": 0.3843889410895245,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 201875
    },
    {
      "epoch": 38.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.1173125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.242991435867587,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1304039427410768,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.679387629032135,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008760039387616105,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.187056064605713,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.306,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.978,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.766,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.026,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.217029029597594,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21501595188965897,
      "eval_anthropic_toxic_prompts_runtime": 10.9332,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.732,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.091,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36261502689196223,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066560905457200776,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4416186025811198,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33389050103492146,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 201875
    },
    {
      "epoch": 38.76,
      "eval_arxiv_accuracy": 0.35465625,
      "eval_arxiv_bleu_score": 4.55229740831341,
      "eval_arxiv_bleu_score_sem": 0.13688962556401163,
      "eval_arxiv_emb_cos_sim": 0.783316969871521,
      "eval_arxiv_emb_cos_sim_sem": 0.0069025401873214435,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3419687747955322,
      "eval_arxiv_n_ngrams_match_1": 15.376,
      "eval_arxiv_n_ngrams_match_2": 3.126,
      "eval_arxiv_n_ngrams_match_3": 0.744,
      "eval_arxiv_num_pred_words": 40.752,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.27473853644082,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3692863261658166,
      "eval_arxiv_runtime": 10.1712,
      "eval_arxiv_samples_per_second": 49.158,
      "eval_arxiv_steps_per_second": 0.098,
      "eval_arxiv_token_set_f1": 0.36152102203325975,
      "eval_arxiv_token_set_f1_sem": 0.004236665988412645,
      "eval_arxiv_token_set_precision": 0.3137950745892967,
      "eval_arxiv_token_set_recall": 0.44747270361294245,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 201875
    },
    {
      "epoch": 38.76,
      "eval_python_code_alpaca_accuracy": 0.1630625,
      "eval_python_code_alpaca_bleu_score": 4.512972974426608,
      "eval_python_code_alpaca_bleu_score_sem": 0.1402479341852449,
      "eval_python_code_alpaca_emb_cos_sim": 0.7586859464645386,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00823875992124378,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8622183799743652,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.832,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.836,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.96,
      "eval_python_code_alpaca_num_pred_words": 44.33,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.500306235965105,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.32899641063380053,
      "eval_python_code_alpaca_runtime": 10.0203,
      "eval_python_code_alpaca_samples_per_second": 49.899,
      "eval_python_code_alpaca_steps_per_second": 0.1,
      "eval_python_code_alpaca_token_set_f1": 0.48051080934504997,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005507289813331886,
      "eval_python_code_alpaca_token_set_precision": 0.536617700549984,
      "eval_python_code_alpaca_token_set_recall": 0.45787161406975196,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 201875
    },
    {
      "epoch": 38.76,
      "eval_wikibio_accuracy": 0.329,
      "eval_wikibio_bleu_score": 6.317726898131954,
      "eval_wikibio_bleu_score_sem": 0.23536335607271658,
      "eval_wikibio_emb_cos_sim": 0.7388870120048523,
      "eval_wikibio_emb_cos_sim_sem": 0.00989785810052947,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6539878845214844,
      "eval_wikibio_n_ngrams_match_1": 10.048,
      "eval_wikibio_n_ngrams_match_2": 3.446,
      "eval_wikibio_n_ngrams_match_3": 1.338,
      "eval_wikibio_num_pred_words": 35.838,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.6284049169189,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35968329711628844,
      "eval_wikibio_runtime": 11.4744,
      "eval_wikibio_samples_per_second": 43.575,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.3213910517015467,
      "eval_wikibio_token_set_f1_sem": 0.005781536706777318,
      "eval_wikibio_token_set_precision": 0.32744484232953247,
      "eval_wikibio_token_set_recall": 0.33254251410522806,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 201875
    },
    {
      "epoch": 38.76,
      "eval_nq_accuracy": 0.5334375,
      "eval_nq_bleu_score": 12.264501630654925,
      "eval_nq_bleu_score_sem": 0.48704479745097623,
      "eval_nq_emb_cos_sim": 0.8376739025115967,
      "eval_nq_emb_cos_sim_sem": 0.006610134571975972,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1322991847991943,
      "eval_nq_n_ngrams_match_1": 23.564,
      "eval_nq_n_ngrams_match_2": 8.8,
      "eval_nq_n_ngrams_match_3": 4.11,
      "eval_nq_num_pred_words": 49.27,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.434236403903807,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45702857873677716,
      "eval_nq_runtime": 10.7031,
      "eval_nq_samples_per_second": 46.715,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.47155928446618667,
      "eval_nq_token_set_f1_sem": 0.004943600773138105,
      "eval_nq_token_set_precision": 0.4303732457619844,
      "eval_nq_token_set_recall": 0.5303012318140418,
      "eval_nq_true_num_tokens": 64.0,
      "step": 201875
    },
    {
      "epoch": 38.76,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 201876
    },
    {
      "epoch": 38.76,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 201888
    },
    {
      "epoch": 38.77,
      "learning_rate": 0.001,
      "loss": 2.5177,
      "step": 201900
    },
    {
      "epoch": 38.77,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 201912
    },
    {
      "epoch": 38.77,
      "learning_rate": 0.001,
      "loss": 2.5204,
      "step": 201924
    },
    {
      "epoch": 38.77,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 201936
    },
    {
      "epoch": 38.78,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 201948
    },
    {
      "epoch": 38.78,
      "learning_rate": 0.001,
      "loss": 2.5071,
      "step": 201960
    },
    {
      "epoch": 38.78,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 201972
    },
    {
      "epoch": 38.78,
      "learning_rate": 0.001,
      "loss": 2.5097,
      "step": 201984
    },
    {
      "epoch": 38.79,
      "learning_rate": 0.001,
      "loss": 2.5154,
      "step": 201996
    },
    {
      "epoch": 38.79,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 202008
    },
    {
      "epoch": 38.79,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 202020
    },
    {
      "epoch": 38.79,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 202032
    },
    {
      "epoch": 38.79,
      "learning_rate": 0.001,
      "loss": 2.5159,
      "step": 202044
    },
    {
      "epoch": 38.8,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 202056
    },
    {
      "epoch": 38.8,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 202068
    },
    {
      "epoch": 38.8,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 202080
    },
    {
      "epoch": 38.8,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 202092
    },
    {
      "epoch": 38.81,
      "learning_rate": 0.001,
      "loss": 2.5136,
      "step": 202104
    },
    {
      "epoch": 38.81,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 202116
    },
    {
      "epoch": 38.81,
      "learning_rate": 0.001,
      "loss": 2.5119,
      "step": 202128
    },
    {
      "epoch": 38.81,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 202140
    },
    {
      "epoch": 38.82,
      "learning_rate": 0.001,
      "loss": 2.5156,
      "step": 202152
    },
    {
      "epoch": 38.82,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 202164
    },
    {
      "epoch": 38.82,
      "learning_rate": 0.001,
      "loss": 2.513,
      "step": 202176
    },
    {
      "epoch": 38.82,
      "learning_rate": 0.001,
      "loss": 2.5153,
      "step": 202188
    },
    {
      "epoch": 38.82,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 202200
    },
    {
      "epoch": 38.83,
      "learning_rate": 0.001,
      "loss": 2.5091,
      "step": 202212
    },
    {
      "epoch": 38.83,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 202224
    },
    {
      "epoch": 38.83,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 202236
    },
    {
      "epoch": 38.83,
      "learning_rate": 0.001,
      "loss": 2.5153,
      "step": 202248
    },
    {
      "epoch": 38.84,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 202260
    },
    {
      "epoch": 38.84,
      "learning_rate": 0.001,
      "loss": 2.518,
      "step": 202272
    },
    {
      "epoch": 38.84,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 202284
    },
    {
      "epoch": 38.84,
      "learning_rate": 0.001,
      "loss": 2.529,
      "step": 202296
    },
    {
      "epoch": 38.85,
      "learning_rate": 0.001,
      "loss": 2.5186,
      "step": 202308
    },
    {
      "epoch": 38.85,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 202320
    },
    {
      "epoch": 38.85,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 202332
    },
    {
      "epoch": 38.85,
      "learning_rate": 0.001,
      "loss": 2.5163,
      "step": 202344
    },
    {
      "epoch": 38.85,
      "learning_rate": 0.001,
      "loss": 2.5174,
      "step": 202356
    },
    {
      "epoch": 38.86,
      "learning_rate": 0.001,
      "loss": 2.5114,
      "step": 202368
    },
    {
      "epoch": 38.86,
      "learning_rate": 0.001,
      "loss": 2.5263,
      "step": 202380
    },
    {
      "epoch": 38.86,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 202392
    },
    {
      "epoch": 38.86,
      "learning_rate": 0.001,
      "loss": 2.514,
      "step": 202404
    },
    {
      "epoch": 38.87,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 202416
    },
    {
      "epoch": 38.87,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 202428
    },
    {
      "epoch": 38.87,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 202440
    },
    {
      "epoch": 38.87,
      "learning_rate": 0.001,
      "loss": 2.5148,
      "step": 202452
    },
    {
      "epoch": 38.88,
      "learning_rate": 0.001,
      "loss": 2.5132,
      "step": 202464
    },
    {
      "epoch": 38.88,
      "learning_rate": 0.001,
      "loss": 2.5192,
      "step": 202476
    },
    {
      "epoch": 38.88,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 202488
    },
    {
      "epoch": 38.88,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 202500
    },
    {
      "epoch": 38.88,
      "eval_ag_news_accuracy": 0.328375,
      "eval_ag_news_bleu_score": 5.1527155688806054,
      "eval_ag_news_bleu_score_sem": 0.16033182908607604,
      "eval_ag_news_emb_cos_sim": 0.8261315822601318,
      "eval_ag_news_emb_cos_sim_sem": 0.006198742476383676,
      "eval_ag_news_emb_top1_equal": 0.1875,
      "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4841575622558594,
      "eval_ag_news_n_ngrams_match_1": 14.65,
      "eval_ag_news_n_ngrams_match_2": 3.39,
      "eval_ag_news_n_ngrams_match_3": 0.992,
      "eval_ag_news_num_pred_words": 47.036,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.5949563187969,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3638596673897808,
      "eval_ag_news_runtime": 29.9712,
      "eval_ag_news_samples_per_second": 16.683,
      "eval_ag_news_steps_per_second": 0.033,
      "eval_ag_news_token_set_f1": 0.3621769282116598,
      "eval_ag_news_token_set_f1_sem": 0.0044052607054207,
      "eval_ag_news_token_set_precision": 0.3486148355316827,
      "eval_ag_news_token_set_recall": 0.3895147200352479,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 202500
    },
    {
      "epoch": 38.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.115375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3330506736959085,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12584082822832918,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6926984786987305,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008459507991586636,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2154390811920166,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.43,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.066,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.796,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.64,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.914228879195598,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22039217915760645,
      "eval_anthropic_toxic_prompts_runtime": 11.1155,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.982,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.09,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3622944960966976,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00657948807338437,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4565711999608672,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32324141954753727,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 202500
    },
    {
      "epoch": 38.88,
      "eval_arxiv_accuracy": 0.35040625,
      "eval_arxiv_bleu_score": 4.576550455069013,
      "eval_arxiv_bleu_score_sem": 0.13265521502907604,
      "eval_arxiv_emb_cos_sim": 0.7901520133018494,
      "eval_arxiv_emb_cos_sim_sem": 0.006274881416763113,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.353154182434082,
      "eval_arxiv_n_ngrams_match_1": 15.728,
      "eval_arxiv_n_ngrams_match_2": 3.108,
      "eval_arxiv_n_ngrams_match_3": 0.726,
      "eval_arxiv_num_pred_words": 40.996,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.59277839968988,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3747648255053755,
      "eval_arxiv_runtime": 10.8355,
      "eval_arxiv_samples_per_second": 46.145,
      "eval_arxiv_steps_per_second": 0.092,
      "eval_arxiv_token_set_f1": 0.3652804947432187,
      "eval_arxiv_token_set_f1_sem": 0.004229053235562321,
      "eval_arxiv_token_set_precision": 0.31876671598538453,
      "eval_arxiv_token_set_recall": 0.44378250447759915,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 202500
    },
    {
      "epoch": 38.88,
      "eval_python_code_alpaca_accuracy": 0.16284375,
      "eval_python_code_alpaca_bleu_score": 4.501096018473689,
      "eval_python_code_alpaca_bleu_score_sem": 0.13955968690839765,
      "eval_python_code_alpaca_emb_cos_sim": 0.7713341116905212,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007082156884784714,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.865617275238037,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.062,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.938,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.0,
      "eval_python_code_alpaca_num_pred_words": 45.6,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.559889144615443,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33501372485379877,
      "eval_python_code_alpaca_runtime": 10.1982,
      "eval_python_code_alpaca_samples_per_second": 49.028,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4856092523507604,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005396022704446391,
      "eval_python_code_alpaca_token_set_precision": 0.5490716266038246,
      "eval_python_code_alpaca_token_set_recall": 0.45765075648728376,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 202500
    },
    {
      "epoch": 38.88,
      "eval_wikibio_accuracy": 0.32696875,
      "eval_wikibio_bleu_score": 6.145897708698071,
      "eval_wikibio_bleu_score_sem": 0.2171454817931754,
      "eval_wikibio_emb_cos_sim": 0.7525253295898438,
      "eval_wikibio_emb_cos_sim_sem": 0.008551091776311791,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6499736309051514,
      "eval_wikibio_n_ngrams_match_1": 10.256,
      "eval_wikibio_n_ngrams_match_2": 3.462,
      "eval_wikibio_n_ngrams_match_3": 1.272,
      "eval_wikibio_num_pred_words": 36.164,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.47365152028997,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.361788703810096,
      "eval_wikibio_runtime": 9.9363,
      "eval_wikibio_samples_per_second": 50.32,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.3209350589139348,
      "eval_wikibio_token_set_f1_sem": 0.005467771706450652,
      "eval_wikibio_token_set_precision": 0.3316562099364624,
      "eval_wikibio_token_set_recall": 0.3265698305645069,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 202500
    },
    {
      "epoch": 38.88,
      "eval_nq_accuracy": 0.53553125,
      "eval_nq_bleu_score": 12.218923377094882,
      "eval_nq_bleu_score_sem": 0.4981483004349057,
      "eval_nq_emb_cos_sim": 0.8463374376296997,
      "eval_nq_emb_cos_sim_sem": 0.0064939376862649615,
      "eval_nq_emb_top1_equal": 0.3359375,
      "eval_nq_emb_top1_equal_sem": 0.04191137143408563,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.129981517791748,
      "eval_nq_n_ngrams_match_1": 23.772,
      "eval_nq_n_ngrams_match_2": 8.866,
      "eval_nq_n_ngrams_match_3": 4.08,
      "eval_nq_num_pred_words": 49.104,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.414711287556525,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46200736068215403,
      "eval_nq_runtime": 10.6016,
      "eval_nq_samples_per_second": 47.163,
      "eval_nq_steps_per_second": 0.094,
      "eval_nq_token_set_f1": 0.475367129495662,
      "eval_nq_token_set_f1_sem": 0.0048558998034506995,
      "eval_nq_token_set_precision": 0.4335902694750858,
      "eval_nq_token_set_recall": 0.5337332485971581,
      "eval_nq_true_num_tokens": 64.0,
      "step": 202500
    },
    {
      "epoch": 38.88,
      "learning_rate": 0.001,
      "loss": 2.5191,
      "step": 202512
    },
    {
      "epoch": 38.89,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 202524
    },
    {
      "epoch": 38.89,
      "learning_rate": 0.001,
      "loss": 2.52,
      "step": 202536
    },
    {
      "epoch": 38.89,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 202548
    },
    {
      "epoch": 38.89,
      "learning_rate": 0.001,
      "loss": 2.5146,
      "step": 202560
    },
    {
      "epoch": 38.9,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 202572
    },
    {
      "epoch": 38.9,
      "learning_rate": 0.001,
      "loss": 2.5176,
      "step": 202584
    },
    {
      "epoch": 38.9,
      "learning_rate": 0.001,
      "loss": 2.5065,
      "step": 202596
    },
    {
      "epoch": 38.9,
      "learning_rate": 0.001,
      "loss": 2.5172,
      "step": 202608
    },
    {
      "epoch": 38.91,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 202620
    },
    {
      "epoch": 38.91,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 202632
    },
    {
      "epoch": 38.91,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 202644
    },
    {
      "epoch": 38.91,
      "learning_rate": 0.001,
      "loss": 2.5126,
      "step": 202656
    },
    {
      "epoch": 38.91,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 202668
    },
    {
      "epoch": 38.92,
      "learning_rate": 0.001,
      "loss": 2.5165,
      "step": 202680
    },
    {
      "epoch": 38.92,
      "learning_rate": 0.001,
      "loss": 2.5106,
      "step": 202692
    },
    {
      "epoch": 38.92,
      "learning_rate": 0.001,
      "loss": 2.5098,
      "step": 202704
    },
    {
      "epoch": 38.92,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 202716
    },
    {
      "epoch": 38.93,
      "learning_rate": 0.001,
      "loss": 2.5146,
      "step": 202728
    },
    {
      "epoch": 38.93,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 202740
    },
    {
      "epoch": 38.93,
      "learning_rate": 0.001,
      "loss": 2.515,
      "step": 202752
    },
    {
      "epoch": 38.93,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 202764
    },
    {
      "epoch": 38.94,
      "learning_rate": 0.001,
      "loss": 2.5187,
      "step": 202776
    },
    {
      "epoch": 38.94,
      "learning_rate": 0.001,
      "loss": 2.5104,
      "step": 202788
    },
    {
      "epoch": 38.94,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 202800
    },
    {
      "epoch": 38.94,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 202812
    },
    {
      "epoch": 38.94,
      "learning_rate": 0.001,
      "loss": 2.5143,
      "step": 202824
    },
    {
      "epoch": 38.95,
      "learning_rate": 0.001,
      "loss": 2.5199,
      "step": 202836
    },
    {
      "epoch": 38.95,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 202848
    },
    {
      "epoch": 38.95,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 202860
    },
    {
      "epoch": 38.95,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 202872
    },
    {
      "epoch": 38.96,
      "learning_rate": 0.001,
      "loss": 2.5183,
      "step": 202884
    },
    {
      "epoch": 38.96,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 202896
    },
    {
      "epoch": 38.96,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 202908
    },
    {
      "epoch": 38.96,
      "learning_rate": 0.001,
      "loss": 2.5115,
      "step": 202920
    },
    {
      "epoch": 38.97,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 202932
    },
    {
      "epoch": 38.97,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 202944
    },
    {
      "epoch": 38.97,
      "learning_rate": 0.001,
      "loss": 2.5205,
      "step": 202956
    },
    {
      "epoch": 38.97,
      "learning_rate": 0.001,
      "loss": 2.5173,
      "step": 202968
    },
    {
      "epoch": 38.97,
      "learning_rate": 0.001,
      "loss": 2.5073,
      "step": 202980
    },
    {
      "epoch": 38.98,
      "learning_rate": 0.001,
      "loss": 2.5107,
      "step": 202992
    },
    {
      "epoch": 38.98,
      "learning_rate": 0.001,
      "loss": 2.5146,
      "step": 203004
    },
    {
      "epoch": 38.98,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 203016
    },
    {
      "epoch": 38.98,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 203028
    },
    {
      "epoch": 38.99,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 203040
    },
    {
      "epoch": 38.99,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 203052
    },
    {
      "epoch": 38.99,
      "learning_rate": 0.001,
      "loss": 2.5168,
      "step": 203064
    },
    {
      "epoch": 38.99,
      "learning_rate": 0.001,
      "loss": 2.5112,
      "step": 203076
    },
    {
      "epoch": 39.0,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 203088
    },
    {
      "epoch": 39.0,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 203100
    },
    {
      "epoch": 39.0,
      "learning_rate": 0.001,
      "loss": 2.5241,
      "step": 203112
    },
    {
      "epoch": 39.0,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 203124
    },
    {
      "epoch": 39.0,
      "eval_ag_news_accuracy": 0.32684375,
      "eval_ag_news_bleu_score": 4.930483665400879,
      "eval_ag_news_bleu_score_sem": 0.1525254372087238,
      "eval_ag_news_emb_cos_sim": 0.8217231035232544,
      "eval_ag_news_emb_cos_sim_sem": 0.006145553812107435,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.483769416809082,
      "eval_ag_news_n_ngrams_match_1": 14.442,
      "eval_ag_news_n_ngrams_match_2": 3.224,
      "eval_ag_news_n_ngrams_match_3": 0.912,
      "eval_ag_news_num_pred_words": 46.874,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.58230718992355,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3589650751193681,
      "eval_ag_news_runtime": 10.4713,
      "eval_ag_news_samples_per_second": 47.75,
      "eval_ag_news_steps_per_second": 0.095,
      "eval_ag_news_token_set_f1": 0.35973880612370873,
      "eval_ag_news_token_set_f1_sem": 0.004445421841233903,
      "eval_ag_news_token_set_precision": 0.3482240008313258,
      "eval_ag_news_token_set_recall": 0.38569646833944277,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 203125
    },
    {
      "epoch": 39.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.11621875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2150605163469677,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1231294136668318,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6852802038192749,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008253135947853043,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2176363468170166,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.406,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.992,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.69,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.969032244611608,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21995527486306599,
      "eval_anthropic_toxic_prompts_runtime": 9.8172,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.931,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3630440112872896,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006359723865418526,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45075556344855516,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33287990041219423,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 203125
    },
    {
      "epoch": 39.0,
      "eval_arxiv_accuracy": 0.351125,
      "eval_arxiv_bleu_score": 4.744306671947361,
      "eval_arxiv_bleu_score_sem": 0.13858429074770123,
      "eval_arxiv_emb_cos_sim": 0.788700520992279,
      "eval_arxiv_emb_cos_sim_sem": 0.00591725487043605,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3417587280273438,
      "eval_arxiv_n_ngrams_match_1": 15.732,
      "eval_arxiv_n_ngrams_match_2": 3.208,
      "eval_arxiv_n_ngrams_match_3": 0.774,
      "eval_arxiv_num_pred_words": 41.684,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.26880014268191,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3719481559322388,
      "eval_arxiv_runtime": 10.6133,
      "eval_arxiv_samples_per_second": 47.111,
      "eval_arxiv_steps_per_second": 0.094,
      "eval_arxiv_token_set_f1": 0.3664804712129069,
      "eval_arxiv_token_set_f1_sem": 0.00401123671147056,
      "eval_arxiv_token_set_precision": 0.3214731191284198,
      "eval_arxiv_token_set_recall": 0.4385559628883223,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 203125
    },
    {
      "epoch": 39.0,
      "eval_python_code_alpaca_accuracy": 0.1634375,
      "eval_python_code_alpaca_bleu_score": 4.646376557469093,
      "eval_python_code_alpaca_bleu_score_sem": 0.14656820083468533,
      "eval_python_code_alpaca_emb_cos_sim": 0.7661327123641968,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007154286606199553,
      "eval_python_code_alpaca_emb_top1_equal": 0.1875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8423428535461426,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.934,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.978,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.008,
      "eval_python_code_alpaca_num_pred_words": 44.414,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.15591227973498,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33536381990607,
      "eval_python_code_alpaca_runtime": 18.6613,
      "eval_python_code_alpaca_samples_per_second": 26.793,
      "eval_python_code_alpaca_steps_per_second": 0.054,
      "eval_python_code_alpaca_token_set_f1": 0.48069464667014317,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005202922550663726,
      "eval_python_code_alpaca_token_set_precision": 0.5388164144056561,
      "eval_python_code_alpaca_token_set_recall": 0.4559315599631794,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 203125
    },
    {
      "epoch": 39.0,
      "eval_wikibio_accuracy": 0.32928125,
      "eval_wikibio_bleu_score": 6.274163216293377,
      "eval_wikibio_bleu_score_sem": 0.22766949167906855,
      "eval_wikibio_emb_cos_sim": 0.7473706007003784,
      "eval_wikibio_emb_cos_sim_sem": 0.008597789825917355,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.662121057510376,
      "eval_wikibio_n_ngrams_match_1": 10.224,
      "eval_wikibio_n_ngrams_match_2": 3.522,
      "eval_wikibio_n_ngrams_match_3": 1.31,
      "eval_wikibio_num_pred_words": 36.074,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.943857492686355,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3627067170521525,
      "eval_wikibio_runtime": 29.8847,
      "eval_wikibio_samples_per_second": 16.731,
      "eval_wikibio_steps_per_second": 0.033,
      "eval_wikibio_token_set_f1": 0.32544113241180084,
      "eval_wikibio_token_set_f1_sem": 0.0053832983976861174,
      "eval_wikibio_token_set_precision": 0.3316719645727253,
      "eval_wikibio_token_set_recall": 0.3361491030281298,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 203125
    },
    {
      "epoch": 39.0,
      "eval_nq_accuracy": 0.536,
      "eval_nq_bleu_score": 12.234332571274141,
      "eval_nq_bleu_score_sem": 0.49121778967894003,
      "eval_nq_emb_cos_sim": 0.8396992683410645,
      "eval_nq_emb_cos_sim_sem": 0.006906599455668649,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.128368616104126,
      "eval_nq_n_ngrams_match_1": 23.756,
      "eval_nq_n_ngrams_match_2": 8.774,
      "eval_nq_n_ngrams_match_3": 4.074,
      "eval_nq_num_pred_words": 49.208,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.401150124870956,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4587641294054045,
      "eval_nq_runtime": 30.3867,
      "eval_nq_samples_per_second": 16.455,
      "eval_nq_steps_per_second": 0.033,
      "eval_nq_token_set_f1": 0.47121116363681237,
      "eval_nq_token_set_f1_sem": 0.0046953435380699795,
      "eval_nq_token_set_precision": 0.43083229617691476,
      "eval_nq_token_set_recall": 0.5270876495552392,
      "eval_nq_true_num_tokens": 64.0,
      "step": 203125
    },
    {
      "epoch": 39.0,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 203136
    },
    {
      "epoch": 39.01,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 203148
    },
    {
      "epoch": 39.01,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 203160
    },
    {
      "epoch": 39.01,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 203172
    },
    {
      "epoch": 39.01,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 203184
    },
    {
      "epoch": 39.02,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 203196
    },
    {
      "epoch": 39.02,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 203208
    },
    {
      "epoch": 39.02,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 203220
    },
    {
      "epoch": 39.02,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 203232
    },
    {
      "epoch": 39.03,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 203244
    },
    {
      "epoch": 39.03,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 203256
    },
    {
      "epoch": 39.03,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 203268
    },
    {
      "epoch": 39.03,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 203280
    },
    {
      "epoch": 39.03,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 203292
    },
    {
      "epoch": 39.04,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 203304
    },
    {
      "epoch": 39.04,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 203316
    },
    {
      "epoch": 39.04,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 203328
    },
    {
      "epoch": 39.04,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 203340
    },
    {
      "epoch": 39.05,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 203352
    },
    {
      "epoch": 39.05,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 203364
    },
    {
      "epoch": 39.05,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 203376
    },
    {
      "epoch": 39.05,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 203388
    },
    {
      "epoch": 39.06,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 203400
    },
    {
      "epoch": 39.06,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 203412
    },
    {
      "epoch": 39.06,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 203424
    },
    {
      "epoch": 39.06,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 203436
    },
    {
      "epoch": 39.06,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 203448
    },
    {
      "epoch": 39.07,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 203460
    },
    {
      "epoch": 39.07,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 203472
    },
    {
      "epoch": 39.07,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 203484
    },
    {
      "epoch": 39.07,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 203496
    },
    {
      "epoch": 39.08,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 203508
    },
    {
      "epoch": 39.08,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 203520
    },
    {
      "epoch": 39.08,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 203532
    },
    {
      "epoch": 39.08,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 203544
    },
    {
      "epoch": 39.09,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 203556
    },
    {
      "epoch": 39.09,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 203568
    },
    {
      "epoch": 39.09,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 203580
    },
    {
      "epoch": 39.09,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 203592
    },
    {
      "epoch": 39.09,
      "learning_rate": 0.001,
      "loss": 2.511,
      "step": 203604
    },
    {
      "epoch": 39.1,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 203616
    },
    {
      "epoch": 39.1,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 203628
    },
    {
      "epoch": 39.1,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 203640
    },
    {
      "epoch": 39.1,
      "learning_rate": 0.001,
      "loss": 2.5141,
      "step": 203652
    },
    {
      "epoch": 39.11,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 203664
    },
    {
      "epoch": 39.11,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 203676
    },
    {
      "epoch": 39.11,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 203688
    },
    {
      "epoch": 39.11,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 203700
    },
    {
      "epoch": 39.12,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 203712
    },
    {
      "epoch": 39.12,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 203724
    },
    {
      "epoch": 39.12,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 203736
    },
    {
      "epoch": 39.12,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 203748
    },
    {
      "epoch": 39.12,
      "eval_ag_news_accuracy": 0.32875,
      "eval_ag_news_bleu_score": 4.9473473248805,
      "eval_ag_news_bleu_score_sem": 0.16153919793596638,
      "eval_ag_news_emb_cos_sim": 0.8240107893943787,
      "eval_ag_news_emb_cos_sim_sem": 0.0062118824149514544,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.472358465194702,
      "eval_ag_news_n_ngrams_match_1": 14.482,
      "eval_ag_news_n_ngrams_match_2": 3.24,
      "eval_ag_news_n_ngrams_match_3": 0.9,
      "eval_ag_news_num_pred_words": 46.866,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.212625281621065,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3573148274794353,
      "eval_ag_news_runtime": 31.1177,
      "eval_ag_news_samples_per_second": 16.068,
      "eval_ag_news_steps_per_second": 0.032,
      "eval_ag_news_token_set_f1": 0.358938561080545,
      "eval_ag_news_token_set_f1_sem": 0.004421656460898802,
      "eval_ag_news_token_set_precision": 0.3436742059885483,
      "eval_ag_news_token_set_recall": 0.3899739805137839,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 203750
    },
    {
      "epoch": 39.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.1155625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.335496479194916,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13568852905974288,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6861573457717896,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008483738478213352,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2237801551818848,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.326,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.106,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.12290940564809,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21685699248327955,
      "eval_anthropic_toxic_prompts_runtime": 29.42,
      "eval_anthropic_toxic_prompts_samples_per_second": 16.995,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.034,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3638831479368107,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006794694963805851,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44146976634021673,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33590747740380744,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 203750
    },
    {
      "epoch": 39.12,
      "eval_arxiv_accuracy": 0.35075,
      "eval_arxiv_bleu_score": 4.260086412890832,
      "eval_arxiv_bleu_score_sem": 0.12473198770508127,
      "eval_arxiv_emb_cos_sim": 0.7841321229934692,
      "eval_arxiv_emb_cos_sim_sem": 0.008339556743635615,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3372983932495117,
      "eval_arxiv_n_ngrams_match_1": 15.346,
      "eval_arxiv_n_ngrams_match_2": 2.978,
      "eval_arxiv_n_ngrams_match_3": 0.614,
      "eval_arxiv_num_pred_words": 40.292,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.14299261070701,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3702762396043655,
      "eval_arxiv_runtime": 31.5376,
      "eval_arxiv_samples_per_second": 15.854,
      "eval_arxiv_steps_per_second": 0.032,
      "eval_arxiv_token_set_f1": 0.35932383291701797,
      "eval_arxiv_token_set_f1_sem": 0.004228270528906881,
      "eval_arxiv_token_set_precision": 0.3108394176820865,
      "eval_arxiv_token_set_recall": 0.44266503946504865,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 203750
    },
    {
      "epoch": 39.12,
      "eval_python_code_alpaca_accuracy": 0.16259375,
      "eval_python_code_alpaca_bleu_score": 4.964613270381475,
      "eval_python_code_alpaca_bleu_score_sem": 0.16002868777585713,
      "eval_python_code_alpaca_emb_cos_sim": 0.7728931903839111,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007140004751454932,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8552279472351074,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.98,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.12,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.13,
      "eval_python_code_alpaca_num_pred_words": 43.832,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.378398114000966,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3398192028790994,
      "eval_python_code_alpaca_runtime": 30.4794,
      "eval_python_code_alpaca_samples_per_second": 16.405,
      "eval_python_code_alpaca_steps_per_second": 0.033,
      "eval_python_code_alpaca_token_set_f1": 0.48745195903516697,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005331404605602125,
      "eval_python_code_alpaca_token_set_precision": 0.5450476333221513,
      "eval_python_code_alpaca_token_set_recall": 0.460519920061498,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 203750
    },
    {
      "epoch": 39.12,
      "eval_wikibio_accuracy": 0.32909375,
      "eval_wikibio_bleu_score": 5.943904832216061,
      "eval_wikibio_bleu_score_sem": 0.21462632240466614,
      "eval_wikibio_emb_cos_sim": 0.7392363548278809,
      "eval_wikibio_emb_cos_sim_sem": 0.010813693972147514,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.675292730331421,
      "eval_wikibio_n_ngrams_match_1": 10.082,
      "eval_wikibio_n_ngrams_match_2": 3.346,
      "eval_wikibio_n_ngrams_match_3": 1.25,
      "eval_wikibio_num_pred_words": 36.616,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.46020636595973,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3569254595229421,
      "eval_wikibio_runtime": 30.7658,
      "eval_wikibio_samples_per_second": 16.252,
      "eval_wikibio_steps_per_second": 0.033,
      "eval_wikibio_token_set_f1": 0.31921761263118037,
      "eval_wikibio_token_set_f1_sem": 0.005456603898546164,
      "eval_wikibio_token_set_precision": 0.3264479250203626,
      "eval_wikibio_token_set_recall": 0.32812690487486335,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 203750
    },
    {
      "epoch": 39.12,
      "eval_nq_accuracy": 0.534875,
      "eval_nq_bleu_score": 11.846666092724572,
      "eval_nq_bleu_score_sem": 0.47097923772056954,
      "eval_nq_emb_cos_sim": 0.8380686044692993,
      "eval_nq_emb_cos_sim_sem": 0.007021667596092977,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.127319574356079,
      "eval_nq_n_ngrams_match_1": 23.394,
      "eval_nq_n_ngrams_match_2": 8.606,
      "eval_nq_n_ngrams_match_3": 3.964,
      "eval_nq_num_pred_words": 49.172,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.392341588727241,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4520691614068224,
      "eval_nq_runtime": 29.1737,
      "eval_nq_samples_per_second": 17.139,
      "eval_nq_steps_per_second": 0.034,
      "eval_nq_token_set_f1": 0.466333941026941,
      "eval_nq_token_set_f1_sem": 0.004832038928368858,
      "eval_nq_token_set_precision": 0.4244480202142972,
      "eval_nq_token_set_recall": 0.5258094064124522,
      "eval_nq_true_num_tokens": 64.0,
      "step": 203750
    },
    {
      "epoch": 39.12,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 203760
    },
    {
      "epoch": 39.13,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 203772
    },
    {
      "epoch": 39.13,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 203784
    },
    {
      "epoch": 39.13,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 203796
    },
    {
      "epoch": 39.13,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 203808
    },
    {
      "epoch": 39.14,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 203820
    },
    {
      "epoch": 39.14,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 203832
    },
    {
      "epoch": 39.14,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 203844
    },
    {
      "epoch": 39.14,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 203856
    },
    {
      "epoch": 39.15,
      "learning_rate": 0.001,
      "loss": 2.5123,
      "step": 203868
    },
    {
      "epoch": 39.15,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 203880
    },
    {
      "epoch": 39.15,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 203892
    },
    {
      "epoch": 39.15,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 203904
    },
    {
      "epoch": 39.15,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 203916
    },
    {
      "epoch": 39.16,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 203928
    },
    {
      "epoch": 39.16,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 203940
    },
    {
      "epoch": 39.16,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 203952
    },
    {
      "epoch": 39.16,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 203964
    },
    {
      "epoch": 39.17,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 203976
    },
    {
      "epoch": 39.17,
      "learning_rate": 0.001,
      "loss": 2.4993,
      "step": 203988
    },
    {
      "epoch": 39.17,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 204000
    },
    {
      "epoch": 39.17,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 204012
    },
    {
      "epoch": 39.18,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 204024
    },
    {
      "epoch": 39.18,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 204036
    },
    {
      "epoch": 39.18,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 204048
    },
    {
      "epoch": 39.18,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 204060
    },
    {
      "epoch": 39.18,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 204072
    },
    {
      "epoch": 39.19,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 204084
    },
    {
      "epoch": 39.19,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 204096
    },
    {
      "epoch": 39.19,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 204108
    },
    {
      "epoch": 39.19,
      "learning_rate": 0.001,
      "loss": 2.508,
      "step": 204120
    },
    {
      "epoch": 39.2,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 204132
    },
    {
      "epoch": 39.2,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 204144
    },
    {
      "epoch": 39.2,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 204156
    },
    {
      "epoch": 39.2,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 204168
    },
    {
      "epoch": 39.21,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 204180
    },
    {
      "epoch": 39.21,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 204192
    },
    {
      "epoch": 39.21,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 204204
    },
    {
      "epoch": 39.21,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 204216
    },
    {
      "epoch": 39.21,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 204228
    },
    {
      "epoch": 39.22,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 204240
    },
    {
      "epoch": 39.22,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 204252
    },
    {
      "epoch": 39.22,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 204264
    },
    {
      "epoch": 39.22,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 204276
    },
    {
      "epoch": 39.23,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 204288
    },
    {
      "epoch": 39.23,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 204300
    },
    {
      "epoch": 39.23,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 204312
    },
    {
      "epoch": 39.23,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 204324
    },
    {
      "epoch": 39.24,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 204336
    },
    {
      "epoch": 39.24,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 204348
    },
    {
      "epoch": 39.24,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 204360
    },
    {
      "epoch": 39.24,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 204372
    },
    {
      "epoch": 39.24,
      "eval_ag_news_accuracy": 0.32759375,
      "eval_ag_news_bleu_score": 4.812495436231566,
      "eval_ag_news_bleu_score_sem": 0.15096311663611467,
      "eval_ag_news_emb_cos_sim": 0.8207637667655945,
      "eval_ag_news_emb_cos_sim_sem": 0.00670883129593185,
      "eval_ag_news_emb_top1_equal": 0.3046875,
      "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4924023151397705,
      "eval_ag_news_n_ngrams_match_1": 14.48,
      "eval_ag_news_n_ngrams_match_2": 3.228,
      "eval_ag_news_n_ngrams_match_3": 0.884,
      "eval_ag_news_num_pred_words": 47.206,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.864804566368434,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35893640554544903,
      "eval_ag_news_runtime": 31.7315,
      "eval_ag_news_samples_per_second": 15.757,
      "eval_ag_news_steps_per_second": 0.032,
      "eval_ag_news_token_set_f1": 0.3602045285771965,
      "eval_ag_news_token_set_f1_sem": 0.004520542981490912,
      "eval_ag_news_token_set_precision": 0.3464958004265387,
      "eval_ag_news_token_set_recall": 0.3897373353445049,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 204375
    },
    {
      "epoch": 39.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.11640625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.275336911045023,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12975173034238963,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.682098925113678,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008458190244026161,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2052226066589355,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.336,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.034,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.784,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.042,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.660989105870556,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21746538669019225,
      "eval_anthropic_toxic_prompts_runtime": 11.5974,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.113,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.086,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36053459678352867,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0068794499294335745,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4444711280808593,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3300423361431506,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 204375
    },
    {
      "epoch": 39.24,
      "eval_arxiv_accuracy": 0.35346875,
      "eval_arxiv_bleu_score": 4.539169202684817,
      "eval_arxiv_bleu_score_sem": 0.13164209352990341,
      "eval_arxiv_emb_cos_sim": 0.7800302505493164,
      "eval_arxiv_emb_cos_sim_sem": 0.006690054216375739,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.339123010635376,
      "eval_arxiv_n_ngrams_match_1": 15.482,
      "eval_arxiv_n_ngrams_match_2": 3.072,
      "eval_arxiv_n_ngrams_match_3": 0.734,
      "eval_arxiv_num_pred_words": 41.21,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.194389680048527,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3691642799597068,
      "eval_arxiv_runtime": 10.7989,
      "eval_arxiv_samples_per_second": 46.301,
      "eval_arxiv_steps_per_second": 0.093,
      "eval_arxiv_token_set_f1": 0.362099512088158,
      "eval_arxiv_token_set_f1_sem": 0.0040664446910102505,
      "eval_arxiv_token_set_precision": 0.31570788709091974,
      "eval_arxiv_token_set_recall": 0.44104465378559937,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 204375
    },
    {
      "epoch": 39.24,
      "eval_python_code_alpaca_accuracy": 0.16290625,
      "eval_python_code_alpaca_bleu_score": 4.549148174301849,
      "eval_python_code_alpaca_bleu_score_sem": 0.14668840249181564,
      "eval_python_code_alpaca_emb_cos_sim": 0.7676301002502441,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006873681449892624,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8761374950408936,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.74,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.838,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.958,
      "eval_python_code_alpaca_num_pred_words": 43.51,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.745598175265357,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33312843667493164,
      "eval_python_code_alpaca_runtime": 9.9423,
      "eval_python_code_alpaca_samples_per_second": 50.29,
      "eval_python_code_alpaca_steps_per_second": 0.101,
      "eval_python_code_alpaca_token_set_f1": 0.4818592495707684,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005614890313518356,
      "eval_python_code_alpaca_token_set_precision": 0.5306696535406978,
      "eval_python_code_alpaca_token_set_recall": 0.46558071392198613,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 204375
    },
    {
      "epoch": 39.24,
      "eval_wikibio_accuracy": 0.32940625,
      "eval_wikibio_bleu_score": 6.3679576527504285,
      "eval_wikibio_bleu_score_sem": 0.2383635099425456,
      "eval_wikibio_emb_cos_sim": 0.745114803314209,
      "eval_wikibio_emb_cos_sim_sem": 0.009390952026588377,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6570096015930176,
      "eval_wikibio_n_ngrams_match_1": 10.156,
      "eval_wikibio_n_ngrams_match_2": 3.47,
      "eval_wikibio_n_ngrams_match_3": 1.348,
      "eval_wikibio_num_pred_words": 36.118,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.745305558885775,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3614692441132036,
      "eval_wikibio_runtime": 10.1342,
      "eval_wikibio_samples_per_second": 49.338,
      "eval_wikibio_steps_per_second": 0.099,
      "eval_wikibio_token_set_f1": 0.32359679767768096,
      "eval_wikibio_token_set_f1_sem": 0.0056194700104635,
      "eval_wikibio_token_set_precision": 0.3303050107824481,
      "eval_wikibio_token_set_recall": 0.33378729511068844,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 204375
    },
    {
      "epoch": 39.24,
      "eval_nq_accuracy": 0.53715625,
      "eval_nq_bleu_score": 12.009216620232321,
      "eval_nq_bleu_score_sem": 0.4765669954716235,
      "eval_nq_emb_cos_sim": 0.8332507610321045,
      "eval_nq_emb_cos_sim_sem": 0.007234609279838205,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.128572463989258,
      "eval_nq_n_ngrams_match_1": 23.572,
      "eval_nq_n_ngrams_match_2": 8.698,
      "eval_nq_n_ngrams_match_3": 4.028,
      "eval_nq_num_pred_words": 49.216,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.402862856118976,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45629042965050415,
      "eval_nq_runtime": 11.1603,
      "eval_nq_samples_per_second": 44.802,
      "eval_nq_steps_per_second": 0.09,
      "eval_nq_token_set_f1": 0.472371337056887,
      "eval_nq_token_set_f1_sem": 0.004851472801572849,
      "eval_nq_token_set_precision": 0.43052903678363896,
      "eval_nq_token_set_recall": 0.5312989999453681,
      "eval_nq_true_num_tokens": 64.0,
      "step": 204375
    },
    {
      "epoch": 39.24,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 204384
    },
    {
      "epoch": 39.25,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 204396
    },
    {
      "epoch": 39.25,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 204408
    },
    {
      "epoch": 39.25,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 204420
    },
    {
      "epoch": 39.25,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 204432
    },
    {
      "epoch": 39.26,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 204444
    },
    {
      "epoch": 39.26,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 204456
    },
    {
      "epoch": 39.26,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 204468
    },
    {
      "epoch": 39.26,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 204480
    },
    {
      "epoch": 39.26,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 204492
    },
    {
      "epoch": 39.27,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 204504
    },
    {
      "epoch": 39.27,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 204516
    },
    {
      "epoch": 39.27,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 204528
    },
    {
      "epoch": 39.27,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 204540
    },
    {
      "epoch": 39.28,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 204552
    },
    {
      "epoch": 39.28,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 204564
    },
    {
      "epoch": 39.28,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 204576
    },
    {
      "epoch": 39.28,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 204588
    },
    {
      "epoch": 39.29,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 204600
    },
    {
      "epoch": 39.29,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 204612
    },
    {
      "epoch": 39.29,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 204624
    },
    {
      "epoch": 39.29,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 204636
    },
    {
      "epoch": 39.29,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 204648
    },
    {
      "epoch": 39.3,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 204660
    },
    {
      "epoch": 39.3,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 204672
    },
    {
      "epoch": 39.3,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 204684
    },
    {
      "epoch": 39.3,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 204696
    },
    {
      "epoch": 39.31,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 204708
    },
    {
      "epoch": 39.31,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 204720
    },
    {
      "epoch": 39.31,
      "learning_rate": 0.001,
      "loss": 2.5103,
      "step": 204732
    },
    {
      "epoch": 39.31,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 204744
    },
    {
      "epoch": 39.32,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 204756
    },
    {
      "epoch": 39.32,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 204768
    },
    {
      "epoch": 39.32,
      "learning_rate": 0.001,
      "loss": 2.5106,
      "step": 204780
    },
    {
      "epoch": 39.32,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 204792
    },
    {
      "epoch": 39.32,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 204804
    },
    {
      "epoch": 39.33,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 204816
    },
    {
      "epoch": 39.33,
      "learning_rate": 0.001,
      "loss": 2.5053,
      "step": 204828
    },
    {
      "epoch": 39.33,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 204840
    },
    {
      "epoch": 39.33,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 204852
    },
    {
      "epoch": 39.34,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 204864
    },
    {
      "epoch": 39.34,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 204876
    },
    {
      "epoch": 39.34,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 204888
    },
    {
      "epoch": 39.34,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 204900
    },
    {
      "epoch": 39.35,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 204912
    },
    {
      "epoch": 39.35,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 204924
    },
    {
      "epoch": 39.35,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 204936
    },
    {
      "epoch": 39.35,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 204948
    },
    {
      "epoch": 39.35,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 204960
    },
    {
      "epoch": 39.36,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 204972
    },
    {
      "epoch": 39.36,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 204984
    },
    {
      "epoch": 39.36,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 204996
    },
    {
      "epoch": 39.36,
      "eval_ag_news_accuracy": 0.32890625,
      "eval_ag_news_bleu_score": 4.972089043523076,
      "eval_ag_news_bleu_score_sem": 0.15269235607015177,
      "eval_ag_news_emb_cos_sim": 0.8183099031448364,
      "eval_ag_news_emb_cos_sim_sem": 0.00684426294963202,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.465911626815796,
      "eval_ag_news_n_ngrams_match_1": 14.298,
      "eval_ag_news_n_ngrams_match_2": 3.22,
      "eval_ag_news_n_ngrams_match_3": 0.918,
      "eval_ag_news_num_pred_words": 46.454,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.005623662606034,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3569282486716935,
      "eval_ag_news_runtime": 11.0802,
      "eval_ag_news_samples_per_second": 45.126,
      "eval_ag_news_steps_per_second": 0.09,
      "eval_ag_news_token_set_f1": 0.35690292047766675,
      "eval_ag_news_token_set_f1_sem": 0.00436639409262066,
      "eval_ag_news_token_set_precision": 0.34312029375680186,
      "eval_ag_news_token_set_recall": 0.386596390311987,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 205000
    },
    {
      "epoch": 39.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.11609375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.195115955667645,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12260503525819286,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6771164536476135,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009827696768648992,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.20070743560791,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.192,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.99,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.492,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.549891522808853,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21549260545630755,
      "eval_anthropic_toxic_prompts_runtime": 27.6955,
      "eval_anthropic_toxic_prompts_samples_per_second": 18.053,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.036,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35354395396152655,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006709371489207184,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43306553432635153,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3236904524492781,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 205000
    },
    {
      "epoch": 39.36,
      "eval_arxiv_accuracy": 0.3525,
      "eval_arxiv_bleu_score": 4.426560125726239,
      "eval_arxiv_bleu_score_sem": 0.13234200413543656,
      "eval_arxiv_emb_cos_sim": 0.7778306603431702,
      "eval_arxiv_emb_cos_sim_sem": 0.007646142962499719,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.323012113571167,
      "eval_arxiv_n_ngrams_match_1": 15.5,
      "eval_arxiv_n_ngrams_match_2": 3.05,
      "eval_arxiv_n_ngrams_match_3": 0.666,
      "eval_arxiv_num_pred_words": 39.892,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.743792280701502,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3725569742263649,
      "eval_arxiv_runtime": 29.1768,
      "eval_arxiv_samples_per_second": 17.137,
      "eval_arxiv_steps_per_second": 0.034,
      "eval_arxiv_token_set_f1": 0.3633950572510826,
      "eval_arxiv_token_set_f1_sem": 0.004459922468865658,
      "eval_arxiv_token_set_precision": 0.31620526554235473,
      "eval_arxiv_token_set_recall": 0.4464147904051689,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 205000
    },
    {
      "epoch": 39.36,
      "eval_python_code_alpaca_accuracy": 0.16215625,
      "eval_python_code_alpaca_bleu_score": 4.8797278329786495,
      "eval_python_code_alpaca_bleu_score_sem": 0.1540356088352136,
      "eval_python_code_alpaca_emb_cos_sim": 0.7656465172767639,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007427849946082622,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.865713357925415,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.878,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.07,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.102,
      "eval_python_code_alpaca_num_pred_words": 43.934,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.56157642701253,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33499245697364344,
      "eval_python_code_alpaca_runtime": 30.4813,
      "eval_python_code_alpaca_samples_per_second": 16.403,
      "eval_python_code_alpaca_steps_per_second": 0.033,
      "eval_python_code_alpaca_token_set_f1": 0.48794981355749495,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005702809367852026,
      "eval_python_code_alpaca_token_set_precision": 0.5412429496627671,
      "eval_python_code_alpaca_token_set_recall": 0.46541709498234735,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 205000
    },
    {
      "epoch": 39.36,
      "eval_wikibio_accuracy": 0.33003125,
      "eval_wikibio_bleu_score": 6.252296456260381,
      "eval_wikibio_bleu_score_sem": 0.23739503021767203,
      "eval_wikibio_emb_cos_sim": 0.7440297603607178,
      "eval_wikibio_emb_cos_sim_sem": 0.009114521383917551,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.63110089302063,
      "eval_wikibio_n_ngrams_match_1": 9.842,
      "eval_wikibio_n_ngrams_match_2": 3.398,
      "eval_wikibio_n_ngrams_match_3": 1.314,
      "eval_wikibio_num_pred_words": 34.428,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.754357255487804,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34894261351182987,
      "eval_wikibio_runtime": 11.1381,
      "eval_wikibio_samples_per_second": 44.891,
      "eval_wikibio_steps_per_second": 0.09,
      "eval_wikibio_token_set_f1": 0.31831396800720774,
      "eval_wikibio_token_set_f1_sem": 0.005887976119773076,
      "eval_wikibio_token_set_precision": 0.31992828361884634,
      "eval_wikibio_token_set_recall": 0.33741098380256,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 205000
    },
    {
      "epoch": 39.36,
      "eval_nq_accuracy": 0.534375,
      "eval_nq_bleu_score": 12.085231910151327,
      "eval_nq_bleu_score_sem": 0.4852618752677989,
      "eval_nq_emb_cos_sim": 0.8346148133277893,
      "eval_nq_emb_cos_sim_sem": 0.006960163490314439,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.127124547958374,
      "eval_nq_n_ngrams_match_1": 23.294,
      "eval_nq_n_ngrams_match_2": 8.686,
      "eval_nq_n_ngrams_match_3": 4.052,
      "eval_nq_num_pred_words": 48.678,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.390705020171104,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45202098172641186,
      "eval_nq_runtime": 10.937,
      "eval_nq_samples_per_second": 45.716,
      "eval_nq_steps_per_second": 0.091,
      "eval_nq_token_set_f1": 0.46591854253911574,
      "eval_nq_token_set_f1_sem": 0.004851088053524866,
      "eval_nq_token_set_precision": 0.42455476004033643,
      "eval_nq_token_set_recall": 0.5256758889383211,
      "eval_nq_true_num_tokens": 64.0,
      "step": 205000
    },
    {
      "epoch": 39.36,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 205008
    },
    {
      "epoch": 39.37,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 205020
    },
    {
      "epoch": 39.37,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 205032
    },
    {
      "epoch": 39.37,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 205044
    },
    {
      "epoch": 39.37,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 205056
    },
    {
      "epoch": 39.38,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 205068
    },
    {
      "epoch": 39.38,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 205080
    },
    {
      "epoch": 39.38,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 205092
    },
    {
      "epoch": 39.38,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 205104
    },
    {
      "epoch": 39.38,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 205116
    },
    {
      "epoch": 39.39,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 205128
    },
    {
      "epoch": 39.39,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 205140
    },
    {
      "epoch": 39.39,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 205152
    },
    {
      "epoch": 39.39,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 205164
    },
    {
      "epoch": 39.4,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 205176
    },
    {
      "epoch": 39.4,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 205188
    },
    {
      "epoch": 39.4,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 205200
    },
    {
      "epoch": 39.4,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 205212
    },
    {
      "epoch": 39.41,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 205224
    },
    {
      "epoch": 39.41,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 205236
    },
    {
      "epoch": 39.41,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 205248
    },
    {
      "epoch": 39.41,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 205260
    },
    {
      "epoch": 39.41,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 205272
    },
    {
      "epoch": 39.42,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 205284
    },
    {
      "epoch": 39.42,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 205296
    },
    {
      "epoch": 39.42,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 205308
    },
    {
      "epoch": 39.42,
      "learning_rate": 0.001,
      "loss": 2.5081,
      "step": 205320
    },
    {
      "epoch": 39.43,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 205332
    },
    {
      "epoch": 39.43,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 205344
    },
    {
      "epoch": 39.43,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 205356
    },
    {
      "epoch": 39.43,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 205368
    },
    {
      "epoch": 39.44,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 205380
    },
    {
      "epoch": 39.44,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 205392
    },
    {
      "epoch": 39.44,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 205404
    },
    {
      "epoch": 39.44,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 205416
    },
    {
      "epoch": 39.44,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 205428
    },
    {
      "epoch": 39.45,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 205440
    },
    {
      "epoch": 39.45,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 205452
    },
    {
      "epoch": 39.45,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 205464
    },
    {
      "epoch": 39.45,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 205476
    },
    {
      "epoch": 39.46,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 205488
    },
    {
      "epoch": 39.46,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 205500
    },
    {
      "epoch": 39.46,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 205512
    },
    {
      "epoch": 39.46,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 205524
    },
    {
      "epoch": 39.47,
      "learning_rate": 0.001,
      "loss": 2.5129,
      "step": 205536
    },
    {
      "epoch": 39.47,
      "learning_rate": 0.001,
      "loss": 2.5116,
      "step": 205548
    },
    {
      "epoch": 39.47,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 205560
    },
    {
      "epoch": 39.47,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 205572
    },
    {
      "epoch": 39.47,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 205584
    },
    {
      "epoch": 39.48,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 205596
    },
    {
      "epoch": 39.48,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 205608
    },
    {
      "epoch": 39.48,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 205620
    },
    {
      "epoch": 39.48,
      "eval_ag_news_accuracy": 0.32765625,
      "eval_ag_news_bleu_score": 4.758995001411007,
      "eval_ag_news_bleu_score_sem": 0.1511947463405855,
      "eval_ag_news_emb_cos_sim": 0.8204512596130371,
      "eval_ag_news_emb_cos_sim_sem": 0.007165907282587416,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4871253967285156,
      "eval_ag_news_n_ngrams_match_1": 14.36,
      "eval_ag_news_n_ngrams_match_2": 3.142,
      "eval_ag_news_n_ngrams_match_3": 0.87,
      "eval_ag_news_num_pred_words": 46.626,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.69183644477301,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3563840613557255,
      "eval_ag_news_runtime": 22.9417,
      "eval_ag_news_samples_per_second": 21.794,
      "eval_ag_news_steps_per_second": 0.044,
      "eval_ag_news_token_set_f1": 0.35745356870307254,
      "eval_ag_news_token_set_f1_sem": 0.004486212538546395,
      "eval_ag_news_token_set_precision": 0.3436420648906715,
      "eval_ag_news_token_set_recall": 0.3863338606938275,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 205625
    },
    {
      "epoch": 39.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.11615625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.293664304154164,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12637628066087048,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6872742772102356,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008042833710495809,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1951358318328857,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.27,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.8,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.214,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.41348959612635,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2154265671085718,
      "eval_anthropic_toxic_prompts_runtime": 9.837,
      "eval_anthropic_toxic_prompts_samples_per_second": 50.828,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.102,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3579592530906661,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006544063633919236,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44162066025403757,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3285182505153322,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 205625
    },
    {
      "epoch": 39.48,
      "eval_arxiv_accuracy": 0.35128125,
      "eval_arxiv_bleu_score": 4.420155284020418,
      "eval_arxiv_bleu_score_sem": 0.13189450532811015,
      "eval_arxiv_emb_cos_sim": 0.7840715050697327,
      "eval_arxiv_emb_cos_sim_sem": 0.006795955417248948,
      "eval_arxiv_emb_top1_equal": 0.21875,
      "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.35141921043396,
      "eval_arxiv_n_ngrams_match_1": 15.516,
      "eval_arxiv_n_ngrams_match_2": 3.014,
      "eval_arxiv_n_ngrams_match_3": 0.656,
      "eval_arxiv_num_pred_words": 40.762,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.5432137388431,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36983822139778655,
      "eval_arxiv_runtime": 10.2965,
      "eval_arxiv_samples_per_second": 48.56,
      "eval_arxiv_steps_per_second": 0.097,
      "eval_arxiv_token_set_f1": 0.3616314861062852,
      "eval_arxiv_token_set_f1_sem": 0.0042738420918988045,
      "eval_arxiv_token_set_precision": 0.31616064222926643,
      "eval_arxiv_token_set_recall": 0.43917531572056145,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 205625
    },
    {
      "epoch": 39.48,
      "eval_python_code_alpaca_accuracy": 0.161875,
      "eval_python_code_alpaca_bleu_score": 4.653499177932582,
      "eval_python_code_alpaca_bleu_score_sem": 0.1508105252176043,
      "eval_python_code_alpaca_emb_cos_sim": 0.7711987495422363,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007043589884219214,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8417410850524902,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.842,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.87,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.956,
      "eval_python_code_alpaca_num_pred_words": 43.16,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.14559149791729,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33916703918604696,
      "eval_python_code_alpaca_runtime": 10.1591,
      "eval_python_code_alpaca_samples_per_second": 49.217,
      "eval_python_code_alpaca_steps_per_second": 0.098,
      "eval_python_code_alpaca_token_set_f1": 0.4796166415804258,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005395598108368435,
      "eval_python_code_alpaca_token_set_precision": 0.5369490874312501,
      "eval_python_code_alpaca_token_set_recall": 0.4539022940867618,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 205625
    },
    {
      "epoch": 39.48,
      "eval_wikibio_accuracy": 0.32878125,
      "eval_wikibio_bleu_score": 5.947674251706092,
      "eval_wikibio_bleu_score_sem": 0.21861251412515462,
      "eval_wikibio_emb_cos_sim": 0.7443200349807739,
      "eval_wikibio_emb_cos_sim_sem": 0.0097666088629995,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.673942804336548,
      "eval_wikibio_n_ngrams_match_1": 10.05,
      "eval_wikibio_n_ngrams_match_2": 3.418,
      "eval_wikibio_n_ngrams_match_3": 1.244,
      "eval_wikibio_num_pred_words": 35.824,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 39.40697394562105,
      "eval_wikibio_pred_num_tokens": 62.875,
      "eval_wikibio_rouge_score": 0.35283360241892536,
      "eval_wikibio_runtime": 10.3324,
      "eval_wikibio_samples_per_second": 48.392,
      "eval_wikibio_steps_per_second": 0.097,
      "eval_wikibio_token_set_f1": 0.3192528381879088,
      "eval_wikibio_token_set_f1_sem": 0.005905905546061624,
      "eval_wikibio_token_set_precision": 0.3262740030259134,
      "eval_wikibio_token_set_recall": 0.3307886234579299,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 205625
    },
    {
      "epoch": 39.48,
      "eval_nq_accuracy": 0.53646875,
      "eval_nq_bleu_score": 11.96204997457386,
      "eval_nq_bleu_score_sem": 0.47779306403289684,
      "eval_nq_emb_cos_sim": 0.8318569660186768,
      "eval_nq_emb_cos_sim_sem": 0.007318271432676838,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1247787475585938,
      "eval_nq_n_ngrams_match_1": 23.398,
      "eval_nq_n_ngrams_match_2": 8.666,
      "eval_nq_n_ngrams_match_3": 3.998,
      "eval_nq_num_pred_words": 49.308,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.37104516903899,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45392492208944635,
      "eval_nq_runtime": 10.6998,
      "eval_nq_samples_per_second": 46.73,
      "eval_nq_steps_per_second": 0.093,
      "eval_nq_token_set_f1": 0.46797845611138295,
      "eval_nq_token_set_f1_sem": 0.0049779903721139055,
      "eval_nq_token_set_precision": 0.42704323674352884,
      "eval_nq_token_set_recall": 0.5263336533959055,
      "eval_nq_true_num_tokens": 64.0,
      "step": 205625
    },
    {
      "epoch": 39.48,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 205632
    },
    {
      "epoch": 39.49,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 205644
    },
    {
      "epoch": 39.49,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 205656
    },
    {
      "epoch": 39.49,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 205668
    },
    {
      "epoch": 39.49,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 205680
    },
    {
      "epoch": 39.5,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 205692
    },
    {
      "epoch": 39.5,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 205704
    },
    {
      "epoch": 39.5,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 205716
    },
    {
      "epoch": 39.5,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 205728
    },
    {
      "epoch": 39.5,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 205740
    },
    {
      "epoch": 39.51,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 205752
    },
    {
      "epoch": 39.51,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 205764
    },
    {
      "epoch": 39.51,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 205776
    },
    {
      "epoch": 39.51,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 205788
    },
    {
      "epoch": 39.52,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 205800
    },
    {
      "epoch": 39.52,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 205812
    },
    {
      "epoch": 39.52,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 205824
    },
    {
      "epoch": 39.52,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 205836
    },
    {
      "epoch": 39.53,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 205848
    },
    {
      "epoch": 39.53,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 205860
    },
    {
      "epoch": 39.53,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 205872
    },
    {
      "epoch": 39.53,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 205884
    },
    {
      "epoch": 39.53,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 205896
    },
    {
      "epoch": 39.54,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 205908
    },
    {
      "epoch": 39.54,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 205920
    },
    {
      "epoch": 39.54,
      "learning_rate": 0.001,
      "loss": 2.5092,
      "step": 205932
    },
    {
      "epoch": 39.54,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 205944
    },
    {
      "epoch": 39.55,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 205956
    },
    {
      "epoch": 39.55,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 205968
    },
    {
      "epoch": 39.55,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 205980
    },
    {
      "epoch": 39.55,
      "learning_rate": 0.001,
      "loss": 2.5122,
      "step": 205992
    },
    {
      "epoch": 39.56,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 206004
    },
    {
      "epoch": 39.56,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 206016
    },
    {
      "epoch": 39.56,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 206028
    },
    {
      "epoch": 39.56,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 206040
    },
    {
      "epoch": 39.56,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 206052
    },
    {
      "epoch": 39.57,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 206064
    },
    {
      "epoch": 39.57,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 206076
    },
    {
      "epoch": 39.57,
      "learning_rate": 0.001,
      "loss": 2.4993,
      "step": 206088
    },
    {
      "epoch": 39.57,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 206100
    },
    {
      "epoch": 39.58,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 206112
    },
    {
      "epoch": 39.58,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 206124
    },
    {
      "epoch": 39.58,
      "learning_rate": 0.001,
      "loss": 2.5083,
      "step": 206136
    },
    {
      "epoch": 39.58,
      "learning_rate": 0.001,
      "loss": 2.5053,
      "step": 206148
    },
    {
      "epoch": 39.59,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 206160
    },
    {
      "epoch": 39.59,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 206172
    },
    {
      "epoch": 39.59,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 206184
    },
    {
      "epoch": 39.59,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 206196
    },
    {
      "epoch": 39.59,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 206208
    },
    {
      "epoch": 39.6,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 206220
    },
    {
      "epoch": 39.6,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 206232
    },
    {
      "epoch": 39.6,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 206244
    },
    {
      "epoch": 39.6,
      "eval_ag_news_accuracy": 0.32921875,
      "eval_ag_news_bleu_score": 4.884203174248183,
      "eval_ag_news_bleu_score_sem": 0.15401255351595175,
      "eval_ag_news_emb_cos_sim": 0.822045624256134,
      "eval_ag_news_emb_cos_sim_sem": 0.006645820440319048,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4755210876464844,
      "eval_ag_news_n_ngrams_match_1": 14.374,
      "eval_ag_news_n_ngrams_match_2": 3.19,
      "eval_ag_news_n_ngrams_match_3": 0.896,
      "eval_ag_news_num_pred_words": 46.452,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.31466292178353,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3585548501456335,
      "eval_ag_news_runtime": 28.1772,
      "eval_ag_news_samples_per_second": 17.745,
      "eval_ag_news_steps_per_second": 0.035,
      "eval_ag_news_token_set_f1": 0.35929547386418587,
      "eval_ag_news_token_set_f1_sem": 0.004474480874861666,
      "eval_ag_news_token_set_precision": 0.3449567106477639,
      "eval_ag_news_token_set_recall": 0.39057899466923424,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 206250
    },
    {
      "epoch": 39.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.116,
      "eval_anthropic_toxic_prompts_bleu_score": 3.219723676354917,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12922401070142656,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6761558055877686,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008352271262451237,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.188995122909546,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.338,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.974,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.342,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.26403281764672,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2167797885681345,
      "eval_anthropic_toxic_prompts_runtime": 19.4513,
      "eval_anthropic_toxic_prompts_samples_per_second": 25.705,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.051,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.356023255124861,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006553045984357989,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44371696485369744,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3210130147500456,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 206250
    },
    {
      "epoch": 39.6,
      "eval_arxiv_accuracy": 0.351625,
      "eval_arxiv_bleu_score": 4.416606303699746,
      "eval_arxiv_bleu_score_sem": 0.13074291373240818,
      "eval_arxiv_emb_cos_sim": 0.7801247239112854,
      "eval_arxiv_emb_cos_sim_sem": 0.006129065174716564,
      "eval_arxiv_emb_top1_equal": 0.328125,
      "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.350700616836548,
      "eval_arxiv_n_ngrams_match_1": 15.32,
      "eval_arxiv_n_ngrams_match_2": 2.974,
      "eval_arxiv_n_ngrams_match_3": 0.692,
      "eval_arxiv_num_pred_words": 39.632,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.52271013596198,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37201024135752503,
      "eval_arxiv_runtime": 12.3612,
      "eval_arxiv_samples_per_second": 40.449,
      "eval_arxiv_steps_per_second": 0.081,
      "eval_arxiv_token_set_f1": 0.3590328214546753,
      "eval_arxiv_token_set_f1_sem": 0.004241096979003804,
      "eval_arxiv_token_set_precision": 0.3130754958349433,
      "eval_arxiv_token_set_recall": 0.4406278496156961,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 206250
    },
    {
      "epoch": 39.6,
      "eval_python_code_alpaca_accuracy": 0.1638125,
      "eval_python_code_alpaca_bleu_score": 4.768408780132859,
      "eval_python_code_alpaca_bleu_score_sem": 0.15065037302790762,
      "eval_python_code_alpaca_emb_cos_sim": 0.7646748423576355,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007684574302896574,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.829019069671631,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.888,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.978,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.012,
      "eval_python_code_alpaca_num_pred_words": 43.214,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.92884665809212,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34163067713443895,
      "eval_python_code_alpaca_runtime": 14.698,
      "eval_python_code_alpaca_samples_per_second": 34.018,
      "eval_python_code_alpaca_steps_per_second": 0.068,
      "eval_python_code_alpaca_token_set_f1": 0.4790692803649071,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005652008607884245,
      "eval_python_code_alpaca_token_set_precision": 0.5426187165129907,
      "eval_python_code_alpaca_token_set_recall": 0.45155630006571473,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 206250
    },
    {
      "epoch": 39.6,
      "eval_wikibio_accuracy": 0.33040625,
      "eval_wikibio_bleu_score": 6.024209986316364,
      "eval_wikibio_bleu_score_sem": 0.21936321620038762,
      "eval_wikibio_emb_cos_sim": 0.714040219783783,
      "eval_wikibio_emb_cos_sim_sem": 0.011807102435538876,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6262500286102295,
      "eval_wikibio_n_ngrams_match_1": 9.524,
      "eval_wikibio_n_ngrams_match_2": 3.28,
      "eval_wikibio_n_ngrams_match_3": 1.252,
      "eval_wikibio_num_pred_words": 34.202,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.57165946689315,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34425747474653673,
      "eval_wikibio_runtime": 9.8873,
      "eval_wikibio_samples_per_second": 50.57,
      "eval_wikibio_steps_per_second": 0.101,
      "eval_wikibio_token_set_f1": 0.30739173183559565,
      "eval_wikibio_token_set_f1_sem": 0.005977212403921819,
      "eval_wikibio_token_set_precision": 0.30895612659226857,
      "eval_wikibio_token_set_recall": 0.32452173528551265,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 206250
    },
    {
      "epoch": 39.6,
      "eval_nq_accuracy": 0.5366875,
      "eval_nq_bleu_score": 12.075448880867258,
      "eval_nq_bleu_score_sem": 0.4823829974590332,
      "eval_nq_emb_cos_sim": 0.8319193124771118,
      "eval_nq_emb_cos_sim_sem": 0.007117690629346445,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.127696990966797,
      "eval_nq_n_ngrams_match_1": 23.326,
      "eval_nq_n_ngrams_match_2": 8.69,
      "eval_nq_n_ngrams_match_3": 4.026,
      "eval_nq_num_pred_words": 48.624,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.395509595637256,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.455331581858606,
      "eval_nq_runtime": 10.3617,
      "eval_nq_samples_per_second": 48.255,
      "eval_nq_steps_per_second": 0.097,
      "eval_nq_token_set_f1": 0.4683670883532629,
      "eval_nq_token_set_f1_sem": 0.004948844677081082,
      "eval_nq_token_set_precision": 0.4257490155971734,
      "eval_nq_token_set_recall": 0.5319635142423168,
      "eval_nq_true_num_tokens": 64.0,
      "step": 206250
    },
    {
      "epoch": 39.6,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 206256
    },
    {
      "epoch": 39.61,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 206268
    },
    {
      "epoch": 39.61,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 206280
    },
    {
      "epoch": 39.61,
      "learning_rate": 0.001,
      "loss": 2.5026,
      "step": 206292
    },
    {
      "epoch": 39.61,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 206304
    },
    {
      "epoch": 39.62,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 206316
    },
    {
      "epoch": 39.62,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 206328
    },
    {
      "epoch": 39.62,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 206340
    },
    {
      "epoch": 39.62,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 206352
    },
    {
      "epoch": 39.62,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 206364
    },
    {
      "epoch": 39.63,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 206376
    },
    {
      "epoch": 39.63,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 206388
    },
    {
      "epoch": 39.63,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 206400
    },
    {
      "epoch": 39.63,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 206412
    },
    {
      "epoch": 39.64,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 206424
    },
    {
      "epoch": 39.64,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 206436
    },
    {
      "epoch": 39.64,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 206448
    },
    {
      "epoch": 39.64,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 206460
    },
    {
      "epoch": 39.65,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 206472
    },
    {
      "epoch": 39.65,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 206484
    },
    {
      "epoch": 39.65,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 206496
    },
    {
      "epoch": 39.65,
      "learning_rate": 0.001,
      "loss": 2.5079,
      "step": 206508
    },
    {
      "epoch": 39.65,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 206520
    },
    {
      "epoch": 39.66,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 206532
    },
    {
      "epoch": 39.66,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 206544
    },
    {
      "epoch": 39.66,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 206556
    },
    {
      "epoch": 39.66,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 206568
    },
    {
      "epoch": 39.67,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 206580
    },
    {
      "epoch": 39.67,
      "learning_rate": 0.001,
      "loss": 2.5084,
      "step": 206592
    },
    {
      "epoch": 39.67,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 206604
    },
    {
      "epoch": 39.67,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 206616
    },
    {
      "epoch": 39.68,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 206628
    },
    {
      "epoch": 39.68,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 206640
    },
    {
      "epoch": 39.68,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 206652
    },
    {
      "epoch": 39.68,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 206664
    },
    {
      "epoch": 39.68,
      "learning_rate": 0.001,
      "loss": 2.5117,
      "step": 206676
    },
    {
      "epoch": 39.69,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 206688
    },
    {
      "epoch": 39.69,
      "learning_rate": 0.001,
      "loss": 2.5026,
      "step": 206700
    },
    {
      "epoch": 39.69,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 206712
    },
    {
      "epoch": 39.69,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 206724
    },
    {
      "epoch": 39.7,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 206736
    },
    {
      "epoch": 39.7,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 206748
    },
    {
      "epoch": 39.7,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 206760
    },
    {
      "epoch": 39.7,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 206772
    },
    {
      "epoch": 39.71,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 206784
    },
    {
      "epoch": 39.71,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 206796
    },
    {
      "epoch": 39.71,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 206808
    },
    {
      "epoch": 39.71,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 206820
    },
    {
      "epoch": 39.71,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 206832
    },
    {
      "epoch": 39.72,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 206844
    },
    {
      "epoch": 39.72,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 206856
    },
    {
      "epoch": 39.72,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 206868
    },
    {
      "epoch": 39.72,
      "eval_ag_news_accuracy": 0.33003125,
      "eval_ag_news_bleu_score": 5.0389521107507935,
      "eval_ag_news_bleu_score_sem": 0.15799023083732622,
      "eval_ag_news_emb_cos_sim": 0.815719723701477,
      "eval_ag_news_emb_cos_sim_sem": 0.006692581710197377,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.480170965194702,
      "eval_ag_news_n_ngrams_match_1": 14.488,
      "eval_ag_news_n_ngrams_match_2": 3.346,
      "eval_ag_news_n_ngrams_match_3": 0.984,
      "eval_ag_news_num_pred_words": 47.4,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.465272032979705,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3583619457881534,
      "eval_ag_news_runtime": 30.5734,
      "eval_ag_news_samples_per_second": 16.354,
      "eval_ag_news_steps_per_second": 0.033,
      "eval_ag_news_token_set_f1": 0.35789815677468456,
      "eval_ag_news_token_set_f1_sem": 0.004543490543371862,
      "eval_ag_news_token_set_precision": 0.34557749570836926,
      "eval_ag_news_token_set_recall": 0.3856556188351927,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 206875
    },
    {
      "epoch": 39.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.11675,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2455844921988,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12387271822905203,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6814239025115967,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00827429695852805,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1947433948516846,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.29,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.998,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.258,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.40391071964428,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2164375772817482,
      "eval_anthropic_toxic_prompts_runtime": 22.4068,
      "eval_anthropic_toxic_prompts_samples_per_second": 22.315,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.045,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36258321846646896,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006652217700374055,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4451837827278284,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3351589472883476,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 206875
    },
    {
      "epoch": 39.72,
      "eval_arxiv_accuracy": 0.35040625,
      "eval_arxiv_bleu_score": 4.585184052532934,
      "eval_arxiv_bleu_score_sem": 0.14132065716788414,
      "eval_arxiv_emb_cos_sim": 0.7889276146888733,
      "eval_arxiv_emb_cos_sim_sem": 0.006292711509316745,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3383877277374268,
      "eval_arxiv_n_ngrams_match_1": 15.538,
      "eval_arxiv_n_ngrams_match_2": 3.134,
      "eval_arxiv_n_ngrams_match_3": 0.754,
      "eval_arxiv_num_pred_words": 40.716,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.173666447151692,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37131630759312895,
      "eval_arxiv_runtime": 11.4428,
      "eval_arxiv_samples_per_second": 43.695,
      "eval_arxiv_steps_per_second": 0.087,
      "eval_arxiv_token_set_f1": 0.3652076707006868,
      "eval_arxiv_token_set_f1_sem": 0.004213045804341446,
      "eval_arxiv_token_set_precision": 0.3178682300620975,
      "eval_arxiv_token_set_recall": 0.44796678101824844,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 206875
    },
    {
      "epoch": 39.72,
      "eval_python_code_alpaca_accuracy": 0.16375,
      "eval_python_code_alpaca_bleu_score": 5.053320707424584,
      "eval_python_code_alpaca_bleu_score_sem": 0.16023282589836868,
      "eval_python_code_alpaca_emb_cos_sim": 0.763613224029541,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00747103070947105,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8547465801239014,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.928,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.088,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.086,
      "eval_python_code_alpaca_num_pred_words": 42.504,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.370034737792057,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3442229310993092,
      "eval_python_code_alpaca_runtime": 11.0653,
      "eval_python_code_alpaca_samples_per_second": 45.186,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.48335399384702316,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005360806054945681,
      "eval_python_code_alpaca_token_set_precision": 0.5444294017872537,
      "eval_python_code_alpaca_token_set_recall": 0.4543502961454699,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 206875
    },
    {
      "epoch": 39.72,
      "eval_wikibio_accuracy": 0.33128125,
      "eval_wikibio_bleu_score": 6.448642743853439,
      "eval_wikibio_bleu_score_sem": 0.22362736590252225,
      "eval_wikibio_emb_cos_sim": 0.7393869161605835,
      "eval_wikibio_emb_cos_sim_sem": 0.010121785623756867,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6404178142547607,
      "eval_wikibio_n_ngrams_match_1": 10.3,
      "eval_wikibio_n_ngrams_match_2": 3.6,
      "eval_wikibio_n_ngrams_match_3": 1.374,
      "eval_wikibio_num_pred_words": 35.588,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.107755363058196,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3654155712334787,
      "eval_wikibio_runtime": 10.9774,
      "eval_wikibio_samples_per_second": 45.548,
      "eval_wikibio_steps_per_second": 0.091,
      "eval_wikibio_token_set_f1": 0.3291532832861972,
      "eval_wikibio_token_set_f1_sem": 0.005412232160063852,
      "eval_wikibio_token_set_precision": 0.3356559131664926,
      "eval_wikibio_token_set_recall": 0.3401851146789689,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 206875
    },
    {
      "epoch": 39.72,
      "eval_nq_accuracy": 0.537125,
      "eval_nq_bleu_score": 12.091661840283177,
      "eval_nq_bleu_score_sem": 0.48048349052468936,
      "eval_nq_emb_cos_sim": 0.8348127007484436,
      "eval_nq_emb_cos_sim_sem": 0.007264742959434478,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.122670888900757,
      "eval_nq_n_ngrams_match_1": 23.528,
      "eval_nq_n_ngrams_match_2": 8.684,
      "eval_nq_n_ngrams_match_3": 4.042,
      "eval_nq_num_pred_words": 48.902,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.353418772506828,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4552505289813184,
      "eval_nq_runtime": 11.6148,
      "eval_nq_samples_per_second": 43.049,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4709065843451083,
      "eval_nq_token_set_f1_sem": 0.004988196633602711,
      "eval_nq_token_set_precision": 0.4293234465148486,
      "eval_nq_token_set_recall": 0.5294117325746042,
      "eval_nq_true_num_tokens": 64.0,
      "step": 206875
    },
    {
      "epoch": 39.72,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 206880
    },
    {
      "epoch": 39.73,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 206892
    },
    {
      "epoch": 39.73,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 206904
    },
    {
      "epoch": 39.73,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 206916
    },
    {
      "epoch": 39.73,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 206928
    },
    {
      "epoch": 39.74,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 206940
    },
    {
      "epoch": 39.74,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 206952
    },
    {
      "epoch": 39.74,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 206964
    },
    {
      "epoch": 39.74,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 206976
    },
    {
      "epoch": 39.74,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 206988
    },
    {
      "epoch": 39.75,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 207000
    },
    {
      "epoch": 39.75,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 207012
    },
    {
      "epoch": 39.75,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 207024
    },
    {
      "epoch": 39.75,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 207036
    },
    {
      "epoch": 39.76,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 207048
    },
    {
      "epoch": 39.76,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 207060
    },
    {
      "epoch": 39.76,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 207072
    },
    {
      "epoch": 39.76,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 207084
    },
    {
      "epoch": 39.76,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 207096
    },
    {
      "epoch": 39.77,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 207108
    },
    {
      "epoch": 39.77,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 207120
    },
    {
      "epoch": 39.77,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 207132
    },
    {
      "epoch": 39.77,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 207144
    },
    {
      "epoch": 39.78,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 207156
    },
    {
      "epoch": 39.78,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 207168
    },
    {
      "epoch": 39.78,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 207180
    },
    {
      "epoch": 39.78,
      "learning_rate": 0.001,
      "loss": 2.5109,
      "step": 207192
    },
    {
      "epoch": 39.79,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 207204
    },
    {
      "epoch": 39.79,
      "learning_rate": 0.001,
      "loss": 2.5068,
      "step": 207216
    },
    {
      "epoch": 39.79,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 207228
    },
    {
      "epoch": 39.79,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 207240
    },
    {
      "epoch": 39.79,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 207252
    },
    {
      "epoch": 39.8,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 207264
    },
    {
      "epoch": 39.8,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 207276
    },
    {
      "epoch": 39.8,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 207288
    },
    {
      "epoch": 39.8,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 207300
    },
    {
      "epoch": 39.81,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 207312
    },
    {
      "epoch": 39.81,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 207324
    },
    {
      "epoch": 39.81,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 207336
    },
    {
      "epoch": 39.81,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 207348
    },
    {
      "epoch": 39.82,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 207360
    },
    {
      "epoch": 39.82,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 207372
    },
    {
      "epoch": 39.82,
      "learning_rate": 0.001,
      "loss": 2.5138,
      "step": 207384
    },
    {
      "epoch": 39.82,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 207396
    },
    {
      "epoch": 39.82,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 207408
    },
    {
      "epoch": 39.83,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 207420
    },
    {
      "epoch": 39.83,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 207432
    },
    {
      "epoch": 39.83,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 207444
    },
    {
      "epoch": 39.83,
      "learning_rate": 0.001,
      "loss": 2.5168,
      "step": 207456
    },
    {
      "epoch": 39.84,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 207468
    },
    {
      "epoch": 39.84,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 207480
    },
    {
      "epoch": 39.84,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 207492
    },
    {
      "epoch": 39.84,
      "eval_ag_news_accuracy": 0.32965625,
      "eval_ag_news_bleu_score": 4.974430581480158,
      "eval_ag_news_bleu_score_sem": 0.15357801433268847,
      "eval_ag_news_emb_cos_sim": 0.8197583556175232,
      "eval_ag_news_emb_cos_sim_sem": 0.007557454139431654,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4721226692199707,
      "eval_ag_news_n_ngrams_match_1": 14.406,
      "eval_ag_news_n_ngrams_match_2": 3.306,
      "eval_ag_news_n_ngrams_match_3": 0.956,
      "eval_ag_news_num_pred_words": 46.86,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.205030569680574,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35729248455953744,
      "eval_ag_news_runtime": 29.8942,
      "eval_ag_news_samples_per_second": 16.726,
      "eval_ag_news_steps_per_second": 0.033,
      "eval_ag_news_token_set_f1": 0.3589743507365347,
      "eval_ag_news_token_set_f1_sem": 0.0045018404347702,
      "eval_ag_news_token_set_precision": 0.34413415343274584,
      "eval_ag_news_token_set_recall": 0.3897680680992472,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 207500
    },
    {
      "epoch": 39.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.11621875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.180429764852126,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12068592920934888,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6749402284622192,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008165359867473042,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.202605962753296,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.262,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.926,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.748,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.024,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.596544430173704,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21634137899409395,
      "eval_anthropic_toxic_prompts_runtime": 19.5564,
      "eval_anthropic_toxic_prompts_samples_per_second": 25.567,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.051,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.359952372641166,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006684297985333396,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4437842390260149,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3302393170791224,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 207500
    },
    {
      "epoch": 39.84,
      "eval_arxiv_accuracy": 0.353625,
      "eval_arxiv_bleu_score": 4.4837706668470565,
      "eval_arxiv_bleu_score_sem": 0.13277773425486106,
      "eval_arxiv_emb_cos_sim": 0.785962700843811,
      "eval_arxiv_emb_cos_sim_sem": 0.006809564239506411,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.327991247177124,
      "eval_arxiv_n_ngrams_match_1": 15.388,
      "eval_arxiv_n_ngrams_match_2": 3.092,
      "eval_arxiv_n_ngrams_match_3": 0.736,
      "eval_arxiv_num_pred_words": 40.114,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.88227680958258,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3718989531713206,
      "eval_arxiv_runtime": 12.3234,
      "eval_arxiv_samples_per_second": 40.573,
      "eval_arxiv_steps_per_second": 0.081,
      "eval_arxiv_token_set_f1": 0.36151538580594356,
      "eval_arxiv_token_set_f1_sem": 0.004413554332080352,
      "eval_arxiv_token_set_precision": 0.31321404093939337,
      "eval_arxiv_token_set_recall": 0.44813877961170784,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 207500
    },
    {
      "epoch": 39.84,
      "eval_python_code_alpaca_accuracy": 0.16325,
      "eval_python_code_alpaca_bleu_score": 4.81153566685242,
      "eval_python_code_alpaca_bleu_score_sem": 0.14915521931042197,
      "eval_python_code_alpaca_emb_cos_sim": 0.7589878439903259,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007668823893899151,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.857896566390991,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.894,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.998,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.03,
      "eval_python_code_alpaca_num_pred_words": 42.856,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.42483637556447,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3362432062250609,
      "eval_python_code_alpaca_runtime": 18.2458,
      "eval_python_code_alpaca_samples_per_second": 27.404,
      "eval_python_code_alpaca_steps_per_second": 0.055,
      "eval_python_code_alpaca_token_set_f1": 0.4818597306786629,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005618661827092928,
      "eval_python_code_alpaca_token_set_precision": 0.5397537359191535,
      "eval_python_code_alpaca_token_set_recall": 0.4600229760708083,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 207500
    },
    {
      "epoch": 39.84,
      "eval_wikibio_accuracy": 0.332875,
      "eval_wikibio_bleu_score": 6.378910879757371,
      "eval_wikibio_bleu_score_sem": 0.2347065515807592,
      "eval_wikibio_emb_cos_sim": 0.7445087432861328,
      "eval_wikibio_emb_cos_sim_sem": 0.008973689665251923,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.60418963432312,
      "eval_wikibio_n_ngrams_match_1": 10.088,
      "eval_wikibio_n_ngrams_match_2": 3.48,
      "eval_wikibio_n_ngrams_match_3": 1.308,
      "eval_wikibio_num_pred_words": 35.004,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.7518893169753,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36013711711533297,
      "eval_wikibio_runtime": 12.503,
      "eval_wikibio_samples_per_second": 39.99,
      "eval_wikibio_steps_per_second": 0.08,
      "eval_wikibio_token_set_f1": 0.3248800530088759,
      "eval_wikibio_token_set_f1_sem": 0.005520006533132245,
      "eval_wikibio_token_set_precision": 0.3289126554818462,
      "eval_wikibio_token_set_recall": 0.3396739876203575,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 207500
    },
    {
      "epoch": 39.84,
      "eval_nq_accuracy": 0.5355,
      "eval_nq_bleu_score": 12.053846841680388,
      "eval_nq_bleu_score_sem": 0.48184386502529686,
      "eval_nq_emb_cos_sim": 0.8340513706207275,
      "eval_nq_emb_cos_sim_sem": 0.0069508845981969625,
      "eval_nq_emb_top1_equal": 0.2421875,
      "eval_nq_emb_top1_equal_sem": 0.038014990119662626,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.122384786605835,
      "eval_nq_n_ngrams_match_1": 23.456,
      "eval_nq_n_ngrams_match_2": 8.716,
      "eval_nq_n_ngrams_match_3": 4.034,
      "eval_nq_num_pred_words": 49.132,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.351029182075523,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4557837840848542,
      "eval_nq_runtime": 31.412,
      "eval_nq_samples_per_second": 15.917,
      "eval_nq_steps_per_second": 0.032,
      "eval_nq_token_set_f1": 0.46939636316425354,
      "eval_nq_token_set_f1_sem": 0.004888744949717722,
      "eval_nq_token_set_precision": 0.4279788650858496,
      "eval_nq_token_set_recall": 0.5278032065376848,
      "eval_nq_true_num_tokens": 64.0,
      "step": 207500
    },
    {
      "epoch": 39.84,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 207504
    },
    {
      "epoch": 39.85,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 207516
    },
    {
      "epoch": 39.85,
      "learning_rate": 0.001,
      "loss": 2.5172,
      "step": 207528
    },
    {
      "epoch": 39.85,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 207540
    },
    {
      "epoch": 39.85,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 207552
    },
    {
      "epoch": 39.85,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 207564
    },
    {
      "epoch": 39.86,
      "learning_rate": 0.001,
      "loss": 2.5157,
      "step": 207576
    },
    {
      "epoch": 39.86,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 207588
    },
    {
      "epoch": 39.86,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 207600
    },
    {
      "epoch": 39.86,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 207612
    },
    {
      "epoch": 39.87,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 207624
    },
    {
      "epoch": 39.87,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 207636
    },
    {
      "epoch": 39.87,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 207648
    },
    {
      "epoch": 39.87,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 207660
    },
    {
      "epoch": 39.88,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 207672
    },
    {
      "epoch": 39.88,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 207684
    },
    {
      "epoch": 39.88,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 207696
    },
    {
      "epoch": 39.88,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 207708
    },
    {
      "epoch": 39.88,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 207720
    },
    {
      "epoch": 39.89,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 207732
    },
    {
      "epoch": 39.89,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 207744
    },
    {
      "epoch": 39.89,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 207756
    },
    {
      "epoch": 39.89,
      "learning_rate": 0.001,
      "loss": 2.5133,
      "step": 207768
    },
    {
      "epoch": 39.9,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 207780
    },
    {
      "epoch": 39.9,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 207792
    },
    {
      "epoch": 39.9,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 207804
    },
    {
      "epoch": 39.9,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 207816
    },
    {
      "epoch": 39.91,
      "learning_rate": 0.001,
      "loss": 2.512,
      "step": 207828
    },
    {
      "epoch": 39.91,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 207840
    },
    {
      "epoch": 39.91,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 207852
    },
    {
      "epoch": 39.91,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 207864
    },
    {
      "epoch": 39.91,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 207876
    },
    {
      "epoch": 39.92,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 207888
    },
    {
      "epoch": 39.92,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 207900
    },
    {
      "epoch": 39.92,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 207912
    },
    {
      "epoch": 39.92,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 207924
    },
    {
      "epoch": 39.93,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 207936
    },
    {
      "epoch": 39.93,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 207948
    },
    {
      "epoch": 39.93,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 207960
    },
    {
      "epoch": 39.93,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 207972
    },
    {
      "epoch": 39.94,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 207984
    },
    {
      "epoch": 39.94,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 207996
    },
    {
      "epoch": 39.94,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 208008
    },
    {
      "epoch": 39.94,
      "learning_rate": 0.001,
      "loss": 2.5076,
      "step": 208020
    },
    {
      "epoch": 39.94,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 208032
    },
    {
      "epoch": 39.95,
      "learning_rate": 0.001,
      "loss": 2.5131,
      "step": 208044
    },
    {
      "epoch": 39.95,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 208056
    },
    {
      "epoch": 39.95,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 208068
    },
    {
      "epoch": 39.95,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 208080
    },
    {
      "epoch": 39.96,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 208092
    },
    {
      "epoch": 39.96,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 208104
    },
    {
      "epoch": 39.96,
      "learning_rate": 0.001,
      "loss": 2.507,
      "step": 208116
    },
    {
      "epoch": 39.96,
      "eval_ag_news_accuracy": 0.32915625,
      "eval_ag_news_bleu_score": 4.957941693897078,
      "eval_ag_news_bleu_score_sem": 0.15837617001652107,
      "eval_ag_news_emb_cos_sim": 0.82401442527771,
      "eval_ag_news_emb_cos_sim_sem": 0.006690185925277335,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.481919527053833,
      "eval_ag_news_n_ngrams_match_1": 14.496,
      "eval_ag_news_n_ngrams_match_2": 3.234,
      "eval_ag_news_n_ngrams_match_3": 0.91,
      "eval_ag_news_num_pred_words": 46.938,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.52208922911754,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35757241730470135,
      "eval_ag_news_runtime": 11.8374,
      "eval_ag_news_samples_per_second": 42.239,
      "eval_ag_news_steps_per_second": 0.084,
      "eval_ag_news_token_set_f1": 0.3621840493357217,
      "eval_ag_news_token_set_f1_sem": 0.004443586697038994,
      "eval_ag_news_token_set_precision": 0.34816432493038174,
      "eval_ag_news_token_set_recall": 0.3933903747258254,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 208125
    },
    {
      "epoch": 39.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.1175,
      "eval_anthropic_toxic_prompts_bleu_score": 3.204275397146809,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12370260761442398,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.681277871131897,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00874392414496126,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2054481506347656,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.32,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.958,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.742,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.162,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.666551870701863,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2163057434799895,
      "eval_anthropic_toxic_prompts_runtime": 16.3233,
      "eval_anthropic_toxic_prompts_samples_per_second": 30.631,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.061,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36520158966222227,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065147292234425174,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44653479021577364,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3343949383960443,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 208125
    },
    {
      "epoch": 39.96,
      "eval_arxiv_accuracy": 0.351625,
      "eval_arxiv_bleu_score": 4.428123532189055,
      "eval_arxiv_bleu_score_sem": 0.12908601435282482,
      "eval_arxiv_emb_cos_sim": 0.7822293639183044,
      "eval_arxiv_emb_cos_sim_sem": 0.006974802934726895,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3303310871124268,
      "eval_arxiv_n_ngrams_match_1": 15.374,
      "eval_arxiv_n_ngrams_match_2": 3.042,
      "eval_arxiv_n_ngrams_match_3": 0.698,
      "eval_arxiv_num_pred_words": 40.46,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.947593259568045,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36891843774496025,
      "eval_arxiv_runtime": 13.0162,
      "eval_arxiv_samples_per_second": 38.414,
      "eval_arxiv_steps_per_second": 0.077,
      "eval_arxiv_token_set_f1": 0.362821681666496,
      "eval_arxiv_token_set_f1_sem": 0.004411087348191074,
      "eval_arxiv_token_set_precision": 0.31337042526699593,
      "eval_arxiv_token_set_recall": 0.4524449360911324,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 208125
    },
    {
      "epoch": 39.96,
      "eval_python_code_alpaca_accuracy": 0.1635625,
      "eval_python_code_alpaca_bleu_score": 4.82214657008156,
      "eval_python_code_alpaca_bleu_score_sem": 0.15081900208830815,
      "eval_python_code_alpaca_emb_cos_sim": 0.7682631015777588,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007369671490069397,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8760225772857666,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.924,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.968,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.02,
      "eval_python_code_alpaca_num_pred_words": 42.676,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.74355900813017,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33868127528350833,
      "eval_python_code_alpaca_runtime": 10.896,
      "eval_python_code_alpaca_samples_per_second": 45.888,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.48473469986698553,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005543439296479784,
      "eval_python_code_alpaca_token_set_precision": 0.5427007918187946,
      "eval_python_code_alpaca_token_set_recall": 0.46240800856523556,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 208125
    },
    {
      "epoch": 39.96,
      "eval_wikibio_accuracy": 0.32965625,
      "eval_wikibio_bleu_score": 6.263466919901522,
      "eval_wikibio_bleu_score_sem": 0.22157059711157456,
      "eval_wikibio_emb_cos_sim": 0.7542101144790649,
      "eval_wikibio_emb_cos_sim_sem": 0.008448111878875994,
      "eval_wikibio_emb_top1_equal": 0.2578125,
      "eval_wikibio_emb_top1_equal_sem": 0.038815656435002115,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.624647855758667,
      "eval_wikibio_n_ngrams_match_1": 10.376,
      "eval_wikibio_n_ngrams_match_2": 3.536,
      "eval_wikibio_n_ngrams_match_3": 1.336,
      "eval_wikibio_num_pred_words": 36.55,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.51151137079693,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36463240139282665,
      "eval_wikibio_runtime": 10.7165,
      "eval_wikibio_samples_per_second": 46.657,
      "eval_wikibio_steps_per_second": 0.093,
      "eval_wikibio_token_set_f1": 0.3266759581809525,
      "eval_wikibio_token_set_f1_sem": 0.005431868781735417,
      "eval_wikibio_token_set_precision": 0.3381562381859853,
      "eval_wikibio_token_set_recall": 0.3308893163261011,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 208125
    },
    {
      "epoch": 39.96,
      "eval_nq_accuracy": 0.536375,
      "eval_nq_bleu_score": 12.16683357808994,
      "eval_nq_bleu_score_sem": 0.46993456627543373,
      "eval_nq_emb_cos_sim": 0.8329232931137085,
      "eval_nq_emb_cos_sim_sem": 0.007862013193671946,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.12465238571167,
      "eval_nq_n_ngrams_match_1": 23.45,
      "eval_nq_n_ngrams_match_2": 8.714,
      "eval_nq_n_ngrams_match_3": 4.078,
      "eval_nq_num_pred_words": 49.314,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.369987455139494,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45513151392729906,
      "eval_nq_runtime": 15.7634,
      "eval_nq_samples_per_second": 31.719,
      "eval_nq_steps_per_second": 0.063,
      "eval_nq_token_set_f1": 0.4673853917081633,
      "eval_nq_token_set_f1_sem": 0.0050488571731012065,
      "eval_nq_token_set_precision": 0.4244986155749227,
      "eval_nq_token_set_recall": 0.5287762809407234,
      "eval_nq_true_num_tokens": 64.0,
      "step": 208125
    },
    {
      "epoch": 39.96,
      "learning_rate": 0.001,
      "loss": 2.5111,
      "step": 208128
    },
    {
      "epoch": 39.97,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 208140
    },
    {
      "epoch": 39.97,
      "learning_rate": 0.001,
      "loss": 2.5121,
      "step": 208152
    },
    {
      "epoch": 39.97,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 208164
    },
    {
      "epoch": 39.97,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 208176
    },
    {
      "epoch": 39.97,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 208188
    },
    {
      "epoch": 39.98,
      "learning_rate": 0.001,
      "loss": 2.509,
      "step": 208200
    },
    {
      "epoch": 39.98,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 208212
    },
    {
      "epoch": 39.98,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 208224
    },
    {
      "epoch": 39.98,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 208236
    },
    {
      "epoch": 39.99,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 208248
    },
    {
      "epoch": 39.99,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 208260
    },
    {
      "epoch": 39.99,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 208272
    },
    {
      "epoch": 39.99,
      "learning_rate": 0.001,
      "loss": 2.5105,
      "step": 208284
    },
    {
      "epoch": 40.0,
      "learning_rate": 0.001,
      "loss": 2.5113,
      "step": 208296
    },
    {
      "epoch": 40.0,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 208308
    },
    {
      "epoch": 40.0,
      "learning_rate": 0.001,
      "loss": 2.5078,
      "step": 208320
    },
    {
      "epoch": 40.0,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 208332
    },
    {
      "epoch": 40.0,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 208344
    },
    {
      "epoch": 40.01,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 208356
    },
    {
      "epoch": 40.01,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 208368
    },
    {
      "epoch": 40.01,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 208380
    },
    {
      "epoch": 40.01,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 208392
    },
    {
      "epoch": 40.02,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 208404
    },
    {
      "epoch": 40.02,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 208416
    },
    {
      "epoch": 40.02,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 208428
    },
    {
      "epoch": 40.02,
      "learning_rate": 0.001,
      "loss": 2.48,
      "step": 208440
    },
    {
      "epoch": 40.03,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 208452
    },
    {
      "epoch": 40.03,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 208464
    },
    {
      "epoch": 40.03,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 208476
    },
    {
      "epoch": 40.03,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 208488
    },
    {
      "epoch": 40.03,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 208500
    },
    {
      "epoch": 40.04,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 208512
    },
    {
      "epoch": 40.04,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 208524
    },
    {
      "epoch": 40.04,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 208536
    },
    {
      "epoch": 40.04,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 208548
    },
    {
      "epoch": 40.05,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 208560
    },
    {
      "epoch": 40.05,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 208572
    },
    {
      "epoch": 40.05,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 208584
    },
    {
      "epoch": 40.05,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 208596
    },
    {
      "epoch": 40.06,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 208608
    },
    {
      "epoch": 40.06,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 208620
    },
    {
      "epoch": 40.06,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 208632
    },
    {
      "epoch": 40.06,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 208644
    },
    {
      "epoch": 40.06,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 208656
    },
    {
      "epoch": 40.07,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 208668
    },
    {
      "epoch": 40.07,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 208680
    },
    {
      "epoch": 40.07,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 208692
    },
    {
      "epoch": 40.07,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 208704
    },
    {
      "epoch": 40.08,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 208716
    },
    {
      "epoch": 40.08,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 208728
    },
    {
      "epoch": 40.08,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 208740
    },
    {
      "epoch": 40.08,
      "eval_ag_news_accuracy": 0.33096875,
      "eval_ag_news_bleu_score": 4.935941367708634,
      "eval_ag_news_bleu_score_sem": 0.1538459294985076,
      "eval_ag_news_emb_cos_sim": 0.8213269114494324,
      "eval_ag_news_emb_cos_sim_sem": 0.0065055465088516805,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.483672857284546,
      "eval_ag_news_n_ngrams_match_1": 14.498,
      "eval_ag_news_n_ngrams_match_2": 3.258,
      "eval_ag_news_n_ngrams_match_3": 0.894,
      "eval_ag_news_num_pred_words": 46.594,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.57916120972262,
      "eval_ag_news_pred_num_tokens": 62.828125,
      "eval_ag_news_rouge_score": 0.3610071342974589,
      "eval_ag_news_runtime": 11.5752,
      "eval_ag_news_samples_per_second": 43.196,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.3621832274607168,
      "eval_ag_news_token_set_f1_sem": 0.0045705840055422714,
      "eval_ag_news_token_set_precision": 0.34763943650042556,
      "eval_ag_news_token_set_recall": 0.3930541701857484,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 208750
    },
    {
      "epoch": 40.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11525,
      "eval_anthropic_toxic_prompts_bleu_score": 3.097525101761242,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11663056039354772,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6789708137512207,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00862111020403404,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2116432189941406,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.234,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.892,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.18,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.81983716191698,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21516375380994998,
      "eval_anthropic_toxic_prompts_runtime": 17.3061,
      "eval_anthropic_toxic_prompts_samples_per_second": 28.892,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.058,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.354691382890654,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006530206880100251,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44011877541032673,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3234596627295621,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 208750
    },
    {
      "epoch": 40.08,
      "eval_arxiv_accuracy": 0.3536875,
      "eval_arxiv_bleu_score": 4.448229909673317,
      "eval_arxiv_bleu_score_sem": 0.130967657186042,
      "eval_arxiv_emb_cos_sim": 0.7863626480102539,
      "eval_arxiv_emb_cos_sim_sem": 0.006658712107607318,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.340484857559204,
      "eval_arxiv_n_ngrams_match_1": 15.592,
      "eval_arxiv_n_ngrams_match_2": 3.068,
      "eval_arxiv_n_ngrams_match_3": 0.672,
      "eval_arxiv_num_pred_words": 39.946,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.232812279814862,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37596768707277856,
      "eval_arxiv_runtime": 11.5887,
      "eval_arxiv_samples_per_second": 43.145,
      "eval_arxiv_steps_per_second": 0.086,
      "eval_arxiv_token_set_f1": 0.36347388815320153,
      "eval_arxiv_token_set_f1_sem": 0.004275748206608021,
      "eval_arxiv_token_set_precision": 0.3164097050357384,
      "eval_arxiv_token_set_recall": 0.44445140774480296,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 208750
    },
    {
      "epoch": 40.08,
      "eval_python_code_alpaca_accuracy": 0.16396875,
      "eval_python_code_alpaca_bleu_score": 4.885110576187877,
      "eval_python_code_alpaca_bleu_score_sem": 0.15907377143308932,
      "eval_python_code_alpaca_emb_cos_sim": 0.7705926895141602,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008294998305136489,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.860717535018921,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.988,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.104,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.092,
      "eval_python_code_alpaca_num_pred_words": 43.784,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.47406068980649,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33739495106213235,
      "eval_python_code_alpaca_runtime": 10.9683,
      "eval_python_code_alpaca_samples_per_second": 45.586,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.48477394076078506,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005571838075289238,
      "eval_python_code_alpaca_token_set_precision": 0.5453718842198467,
      "eval_python_code_alpaca_token_set_recall": 0.45541301161504594,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 208750
    },
    {
      "epoch": 40.08,
      "eval_wikibio_accuracy": 0.33278125,
      "eval_wikibio_bleu_score": 6.3016171814692585,
      "eval_wikibio_bleu_score_sem": 0.21304977920731905,
      "eval_wikibio_emb_cos_sim": 0.7512096166610718,
      "eval_wikibio_emb_cos_sim_sem": 0.009017273457879236,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6376986503601074,
      "eval_wikibio_n_ngrams_match_1": 10.122,
      "eval_wikibio_n_ngrams_match_2": 3.532,
      "eval_wikibio_n_ngrams_match_3": 1.308,
      "eval_wikibio_num_pred_words": 35.194,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.00427488451915,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36452884895941184,
      "eval_wikibio_runtime": 10.8683,
      "eval_wikibio_samples_per_second": 46.005,
      "eval_wikibio_steps_per_second": 0.092,
      "eval_wikibio_token_set_f1": 0.32566865170184894,
      "eval_wikibio_token_set_f1_sem": 0.005296164996467761,
      "eval_wikibio_token_set_precision": 0.33206117543770086,
      "eval_wikibio_token_set_recall": 0.3365955105087792,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 208750
    },
    {
      "epoch": 40.08,
      "eval_nq_accuracy": 0.53803125,
      "eval_nq_bleu_score": 12.16726096911051,
      "eval_nq_bleu_score_sem": 0.4802400977228546,
      "eval_nq_emb_cos_sim": 0.837855339050293,
      "eval_nq_emb_cos_sim_sem": 0.007137091350551616,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1221439838409424,
      "eval_nq_n_ngrams_match_1": 23.38,
      "eval_nq_n_ngrams_match_2": 8.702,
      "eval_nq_n_ngrams_match_3": 4.084,
      "eval_nq_num_pred_words": 48.842,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.349018473260617,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.455708917268529,
      "eval_nq_runtime": 14.7363,
      "eval_nq_samples_per_second": 33.93,
      "eval_nq_steps_per_second": 0.068,
      "eval_nq_token_set_f1": 0.46982223235919346,
      "eval_nq_token_set_f1_sem": 0.005105655711209726,
      "eval_nq_token_set_precision": 0.4263708186276958,
      "eval_nq_token_set_recall": 0.5329242638413485,
      "eval_nq_true_num_tokens": 64.0,
      "step": 208750
    },
    {
      "epoch": 40.08,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 208752
    },
    {
      "epoch": 40.09,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 208764
    },
    {
      "epoch": 40.09,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 208776
    },
    {
      "epoch": 40.09,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 208788
    },
    {
      "epoch": 40.09,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 208800
    },
    {
      "epoch": 40.09,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 208812
    },
    {
      "epoch": 40.1,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 208824
    },
    {
      "epoch": 40.1,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 208836
    },
    {
      "epoch": 40.1,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 208848
    },
    {
      "epoch": 40.1,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 208860
    },
    {
      "epoch": 40.11,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 208872
    },
    {
      "epoch": 40.11,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 208884
    },
    {
      "epoch": 40.11,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 208896
    },
    {
      "epoch": 40.11,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 208908
    },
    {
      "epoch": 40.12,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 208920
    },
    {
      "epoch": 40.12,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 208932
    },
    {
      "epoch": 40.12,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 208944
    },
    {
      "epoch": 40.12,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 208956
    },
    {
      "epoch": 40.12,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 208968
    },
    {
      "epoch": 40.13,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 208980
    },
    {
      "epoch": 40.13,
      "learning_rate": 0.001,
      "loss": 2.4798,
      "step": 208992
    },
    {
      "epoch": 40.13,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 209004
    },
    {
      "epoch": 40.13,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 209016
    },
    {
      "epoch": 40.14,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 209028
    },
    {
      "epoch": 40.14,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 209040
    },
    {
      "epoch": 40.14,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 209052
    },
    {
      "epoch": 40.14,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 209064
    },
    {
      "epoch": 40.15,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 209076
    },
    {
      "epoch": 40.15,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 209088
    },
    {
      "epoch": 40.15,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 209100
    },
    {
      "epoch": 40.15,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 209112
    },
    {
      "epoch": 40.15,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 209124
    },
    {
      "epoch": 40.16,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 209136
    },
    {
      "epoch": 40.16,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 209148
    },
    {
      "epoch": 40.16,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 209160
    },
    {
      "epoch": 40.16,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 209172
    },
    {
      "epoch": 40.17,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 209184
    },
    {
      "epoch": 40.17,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 209196
    },
    {
      "epoch": 40.17,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 209208
    },
    {
      "epoch": 40.17,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 209220
    },
    {
      "epoch": 40.18,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 209232
    },
    {
      "epoch": 40.18,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 209244
    },
    {
      "epoch": 40.18,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 209256
    },
    {
      "epoch": 40.18,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 209268
    },
    {
      "epoch": 40.18,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 209280
    },
    {
      "epoch": 40.19,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 209292
    },
    {
      "epoch": 40.19,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 209304
    },
    {
      "epoch": 40.19,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 209316
    },
    {
      "epoch": 40.19,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 209328
    },
    {
      "epoch": 40.2,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 209340
    },
    {
      "epoch": 40.2,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 209352
    },
    {
      "epoch": 40.2,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 209364
    },
    {
      "epoch": 40.2,
      "eval_ag_news_accuracy": 0.3309375,
      "eval_ag_news_bleu_score": 4.93318129896034,
      "eval_ag_news_bleu_score_sem": 0.14533783890757565,
      "eval_ag_news_emb_cos_sim": 0.8203567862510681,
      "eval_ag_news_emb_cos_sim_sem": 0.006500523131344789,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4776253700256348,
      "eval_ag_news_n_ngrams_match_1": 14.488,
      "eval_ag_news_n_ngrams_match_2": 3.296,
      "eval_ag_news_n_ngrams_match_3": 0.918,
      "eval_ag_news_num_pred_words": 47.072,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.38273369250156,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35990483215168867,
      "eval_ag_news_runtime": 12.1129,
      "eval_ag_news_samples_per_second": 41.278,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.3590367377856191,
      "eval_ag_news_token_set_f1_sem": 0.004419847666648293,
      "eval_ag_news_token_set_precision": 0.3464645656915929,
      "eval_ag_news_token_set_recall": 0.3877209631188465,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 209375
    },
    {
      "epoch": 40.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.1156875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.194501195207348,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12205462381697112,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6905585527420044,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008911199059800427,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.20143723487854,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.276,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.958,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.008,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.567814555048678,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21425931687079672,
      "eval_anthropic_toxic_prompts_runtime": 11.5404,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.326,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35961137061853843,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006596553207682877,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43994764336799463,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3301284961525737,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 209375
    },
    {
      "epoch": 40.2,
      "eval_arxiv_accuracy": 0.35215625,
      "eval_arxiv_bleu_score": 4.452252721548929,
      "eval_arxiv_bleu_score_sem": 0.13454578809668796,
      "eval_arxiv_emb_cos_sim": 0.7796381711959839,
      "eval_arxiv_emb_cos_sim_sem": 0.00762268692421439,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3422701358795166,
      "eval_arxiv_n_ngrams_match_1": 15.41,
      "eval_arxiv_n_ngrams_match_2": 3.11,
      "eval_arxiv_n_ngrams_match_3": 0.722,
      "eval_arxiv_num_pred_words": 40.47,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.283260726359238,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3708061192118505,
      "eval_arxiv_runtime": 12.7394,
      "eval_arxiv_samples_per_second": 39.248,
      "eval_arxiv_steps_per_second": 0.078,
      "eval_arxiv_token_set_f1": 0.3624315697913239,
      "eval_arxiv_token_set_f1_sem": 0.004341473938293956,
      "eval_arxiv_token_set_precision": 0.3137251735981584,
      "eval_arxiv_token_set_recall": 0.44873390944587865,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 209375
    },
    {
      "epoch": 40.2,
      "eval_python_code_alpaca_accuracy": 0.16234375,
      "eval_python_code_alpaca_bleu_score": 4.58743821535871,
      "eval_python_code_alpaca_bleu_score_sem": 0.1477392500255441,
      "eval_python_code_alpaca_emb_cos_sim": 0.7528814077377319,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008492655829395944,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8829312324523926,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.81,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.876,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.0,
      "eval_python_code_alpaca_num_pred_words": 43.536,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.866567561323127,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3308213686440464,
      "eval_python_code_alpaca_runtime": 11.4652,
      "eval_python_code_alpaca_samples_per_second": 43.61,
      "eval_python_code_alpaca_steps_per_second": 0.087,
      "eval_python_code_alpaca_token_set_f1": 0.47938306180475776,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0053381083713815326,
      "eval_python_code_alpaca_token_set_precision": 0.5344018065093825,
      "eval_python_code_alpaca_token_set_recall": 0.457908204204466,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 209375
    },
    {
      "epoch": 40.2,
      "eval_wikibio_accuracy": 0.33221875,
      "eval_wikibio_bleu_score": 6.122986326382544,
      "eval_wikibio_bleu_score_sem": 0.2108694764063836,
      "eval_wikibio_emb_cos_sim": 0.745849609375,
      "eval_wikibio_emb_cos_sim_sem": 0.00949193258289794,
      "eval_wikibio_emb_top1_equal": 0.2890625,
      "eval_wikibio_emb_top1_equal_sem": 0.04022626667363519,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6151468753814697,
      "eval_wikibio_n_ngrams_match_1": 10.006,
      "eval_wikibio_n_ngrams_match_2": 3.364,
      "eval_wikibio_n_ngrams_match_3": 1.246,
      "eval_wikibio_num_pred_words": 35.648,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.15680294449252,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.356341779339303,
      "eval_wikibio_runtime": 12.0963,
      "eval_wikibio_samples_per_second": 41.335,
      "eval_wikibio_steps_per_second": 0.083,
      "eval_wikibio_token_set_f1": 0.3223301674895014,
      "eval_wikibio_token_set_f1_sem": 0.005406018528662601,
      "eval_wikibio_token_set_precision": 0.32741973151775566,
      "eval_wikibio_token_set_recall": 0.335332420786525,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 209375
    },
    {
      "epoch": 40.2,
      "eval_nq_accuracy": 0.5385,
      "eval_nq_bleu_score": 11.904968635578221,
      "eval_nq_bleu_score_sem": 0.4694043231357101,
      "eval_nq_emb_cos_sim": 0.8366987705230713,
      "eval_nq_emb_cos_sim_sem": 0.006706703538626558,
      "eval_nq_emb_top1_equal": 0.34375,
      "eval_nq_emb_top1_equal_sem": 0.04214578430296913,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1219308376312256,
      "eval_nq_n_ngrams_match_1": 23.432,
      "eval_nq_n_ngrams_match_2": 8.68,
      "eval_nq_n_ngrams_match_3": 3.964,
      "eval_nq_num_pred_words": 49.208,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.347239101258122,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4552547819811946,
      "eval_nq_runtime": 12.2671,
      "eval_nq_samples_per_second": 40.759,
      "eval_nq_steps_per_second": 0.082,
      "eval_nq_token_set_f1": 0.4698852687233232,
      "eval_nq_token_set_f1_sem": 0.004933105360904908,
      "eval_nq_token_set_precision": 0.42628357462327926,
      "eval_nq_token_set_recall": 0.5323010559740611,
      "eval_nq_true_num_tokens": 64.0,
      "step": 209375
    },
    {
      "epoch": 40.2,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 209376
    },
    {
      "epoch": 40.21,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 209388
    },
    {
      "epoch": 40.21,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 209400
    },
    {
      "epoch": 40.21,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 209412
    },
    {
      "epoch": 40.21,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 209424
    },
    {
      "epoch": 40.21,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 209436
    },
    {
      "epoch": 40.22,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 209448
    },
    {
      "epoch": 40.22,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 209460
    },
    {
      "epoch": 40.22,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 209472
    },
    {
      "epoch": 40.22,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 209484
    },
    {
      "epoch": 40.23,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 209496
    },
    {
      "epoch": 40.23,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 209508
    },
    {
      "epoch": 40.23,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 209520
    },
    {
      "epoch": 40.23,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 209532
    },
    {
      "epoch": 40.24,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 209544
    },
    {
      "epoch": 40.24,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 209556
    },
    {
      "epoch": 40.24,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 209568
    },
    {
      "epoch": 40.24,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 209580
    },
    {
      "epoch": 40.24,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 209592
    },
    {
      "epoch": 40.25,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 209604
    },
    {
      "epoch": 40.25,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 209616
    },
    {
      "epoch": 40.25,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 209628
    },
    {
      "epoch": 40.25,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 209640
    },
    {
      "epoch": 40.26,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 209652
    },
    {
      "epoch": 40.26,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 209664
    },
    {
      "epoch": 40.26,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 209676
    },
    {
      "epoch": 40.26,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 209688
    },
    {
      "epoch": 40.26,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 209700
    },
    {
      "epoch": 40.27,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 209712
    },
    {
      "epoch": 40.27,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 209724
    },
    {
      "epoch": 40.27,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 209736
    },
    {
      "epoch": 40.27,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 209748
    },
    {
      "epoch": 40.28,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 209760
    },
    {
      "epoch": 40.28,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 209772
    },
    {
      "epoch": 40.28,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 209784
    },
    {
      "epoch": 40.28,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 209796
    },
    {
      "epoch": 40.29,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 209808
    },
    {
      "epoch": 40.29,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 209820
    },
    {
      "epoch": 40.29,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 209832
    },
    {
      "epoch": 40.29,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 209844
    },
    {
      "epoch": 40.29,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 209856
    },
    {
      "epoch": 40.3,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 209868
    },
    {
      "epoch": 40.3,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 209880
    },
    {
      "epoch": 40.3,
      "learning_rate": 0.001,
      "loss": 2.48,
      "step": 209892
    },
    {
      "epoch": 40.3,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 209904
    },
    {
      "epoch": 40.31,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 209916
    },
    {
      "epoch": 40.31,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 209928
    },
    {
      "epoch": 40.31,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 209940
    },
    {
      "epoch": 40.31,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 209952
    },
    {
      "epoch": 40.32,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 209964
    },
    {
      "epoch": 40.32,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 209976
    },
    {
      "epoch": 40.32,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 209988
    },
    {
      "epoch": 40.32,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 210000
    },
    {
      "epoch": 40.32,
      "eval_ag_news_accuracy": 0.32790625,
      "eval_ag_news_bleu_score": 4.80583148150224,
      "eval_ag_news_bleu_score_sem": 0.14883307171151608,
      "eval_ag_news_emb_cos_sim": 0.8192859292030334,
      "eval_ag_news_emb_cos_sim_sem": 0.006844483562042194,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4955286979675293,
      "eval_ag_news_n_ngrams_match_1": 14.352,
      "eval_ag_news_n_ngrams_match_2": 3.178,
      "eval_ag_news_n_ngrams_match_3": 0.862,
      "eval_ag_news_num_pred_words": 46.744,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.96771330924418,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3562794744503931,
      "eval_ag_news_runtime": 11.7802,
      "eval_ag_news_samples_per_second": 42.444,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.35605547466433163,
      "eval_ag_news_token_set_f1_sem": 0.00449296281684269,
      "eval_ag_news_token_set_precision": 0.34346409945266354,
      "eval_ag_news_token_set_recall": 0.3853818908189327,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 210000
    },
    {
      "epoch": 40.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.1156875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.343361252546647,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13079115699973715,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6850472092628479,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008981338001167634,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2146494388580322,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.314,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.812,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.922,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.894563314756958,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22100386270129746,
      "eval_anthropic_toxic_prompts_runtime": 11.1469,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.856,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.09,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3556333968218707,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006553275851375397,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4440497811808492,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3227870203113184,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 210000
    },
    {
      "epoch": 40.32,
      "eval_arxiv_accuracy": 0.351375,
      "eval_arxiv_bleu_score": 4.426529597736935,
      "eval_arxiv_bleu_score_sem": 0.12933387731600798,
      "eval_arxiv_emb_cos_sim": 0.7829951047897339,
      "eval_arxiv_emb_cos_sim_sem": 0.006709320594501281,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.35552978515625,
      "eval_arxiv_n_ngrams_match_1": 15.472,
      "eval_arxiv_n_ngrams_match_2": 3.062,
      "eval_arxiv_n_ngrams_match_3": 0.684,
      "eval_arxiv_num_pred_words": 40.708,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.660784227322647,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37010746942971606,
      "eval_arxiv_runtime": 12.8819,
      "eval_arxiv_samples_per_second": 38.814,
      "eval_arxiv_steps_per_second": 0.078,
      "eval_arxiv_token_set_f1": 0.36062436532149145,
      "eval_arxiv_token_set_f1_sem": 0.004291935635760588,
      "eval_arxiv_token_set_precision": 0.3148144243616023,
      "eval_arxiv_token_set_recall": 0.43898269360850617,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 210000
    },
    {
      "epoch": 40.32,
      "eval_python_code_alpaca_accuracy": 0.1619375,
      "eval_python_code_alpaca_bleu_score": 4.8255838334559575,
      "eval_python_code_alpaca_bleu_score_sem": 0.1555355140434654,
      "eval_python_code_alpaca_emb_cos_sim": 0.7616056203842163,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009041864168351817,
      "eval_python_code_alpaca_emb_top1_equal": 0.109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8678431510925293,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.03,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.062,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.042,
      "eval_python_code_alpaca_num_pred_words": 43.044,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.599018810603624,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3436341632969879,
      "eval_python_code_alpaca_runtime": 11.2051,
      "eval_python_code_alpaca_samples_per_second": 44.623,
      "eval_python_code_alpaca_steps_per_second": 0.089,
      "eval_python_code_alpaca_token_set_f1": 0.4795116049501576,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005590784476489017,
      "eval_python_code_alpaca_token_set_precision": 0.5465008965189574,
      "eval_python_code_alpaca_token_set_recall": 0.4483892761876059,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 210000
    },
    {
      "epoch": 40.32,
      "eval_wikibio_accuracy": 0.3320625,
      "eval_wikibio_bleu_score": 6.137502554745872,
      "eval_wikibio_bleu_score_sem": 0.21732014561507812,
      "eval_wikibio_emb_cos_sim": 0.7399588823318481,
      "eval_wikibio_emb_cos_sim_sem": 0.009743779100541262,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.634054660797119,
      "eval_wikibio_n_ngrams_match_1": 10.158,
      "eval_wikibio_n_ngrams_match_2": 3.422,
      "eval_wikibio_n_ngrams_match_3": 1.254,
      "eval_wikibio_num_pred_words": 35.796,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.8660397202038,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35955470888347896,
      "eval_wikibio_runtime": 11.2374,
      "eval_wikibio_samples_per_second": 44.494,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.32491266562385956,
      "eval_wikibio_token_set_f1_sem": 0.005370835752497085,
      "eval_wikibio_token_set_precision": 0.33108912271739827,
      "eval_wikibio_token_set_recall": 0.3356568324562391,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 210000
    },
    {
      "epoch": 40.32,
      "eval_nq_accuracy": 0.5374375,
      "eval_nq_bleu_score": 12.006375269684451,
      "eval_nq_bleu_score_sem": 0.47693535751820915,
      "eval_nq_emb_cos_sim": 0.8355339765548706,
      "eval_nq_emb_cos_sim_sem": 0.007053370587251773,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1234610080718994,
      "eval_nq_n_ngrams_match_1": 23.29,
      "eval_nq_n_ngrams_match_2": 8.666,
      "eval_nq_n_ngrams_match_3": 3.988,
      "eval_nq_num_pred_words": 48.744,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.360021576981262,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4538973253726808,
      "eval_nq_runtime": 11.7989,
      "eval_nq_samples_per_second": 42.377,
      "eval_nq_steps_per_second": 0.085,
      "eval_nq_token_set_f1": 0.46452968421213203,
      "eval_nq_token_set_f1_sem": 0.004976013658556225,
      "eval_nq_token_set_precision": 0.42438179938582704,
      "eval_nq_token_set_recall": 0.5202218240750814,
      "eval_nq_true_num_tokens": 64.0,
      "step": 210000
    },
    {
      "epoch": 40.32,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 210012
    },
    {
      "epoch": 40.33,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 210024
    },
    {
      "epoch": 40.33,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 210036
    },
    {
      "epoch": 40.33,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 210048
    },
    {
      "epoch": 40.33,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 210060
    },
    {
      "epoch": 40.34,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 210072
    },
    {
      "epoch": 40.34,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 210084
    },
    {
      "epoch": 40.34,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 210096
    },
    {
      "epoch": 40.34,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 210108
    },
    {
      "epoch": 40.35,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 210120
    },
    {
      "epoch": 40.35,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 210132
    },
    {
      "epoch": 40.35,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 210144
    },
    {
      "epoch": 40.35,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 210156
    },
    {
      "epoch": 40.35,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 210168
    },
    {
      "epoch": 40.36,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 210180
    },
    {
      "epoch": 40.36,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 210192
    },
    {
      "epoch": 40.36,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 210204
    },
    {
      "epoch": 40.36,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 210216
    },
    {
      "epoch": 40.37,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 210228
    },
    {
      "epoch": 40.37,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 210240
    },
    {
      "epoch": 40.37,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 210252
    },
    {
      "epoch": 40.37,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 210264
    },
    {
      "epoch": 40.38,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 210276
    },
    {
      "epoch": 40.38,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 210288
    },
    {
      "epoch": 40.38,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 210300
    },
    {
      "epoch": 40.38,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 210312
    },
    {
      "epoch": 40.38,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 210324
    },
    {
      "epoch": 40.39,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 210336
    },
    {
      "epoch": 40.39,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 210348
    },
    {
      "epoch": 40.39,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 210360
    },
    {
      "epoch": 40.39,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 210372
    },
    {
      "epoch": 40.4,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 210384
    },
    {
      "epoch": 40.4,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 210396
    },
    {
      "epoch": 40.4,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 210408
    },
    {
      "epoch": 40.4,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 210420
    },
    {
      "epoch": 40.41,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 210432
    },
    {
      "epoch": 40.41,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 210444
    },
    {
      "epoch": 40.41,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 210456
    },
    {
      "epoch": 40.41,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 210468
    },
    {
      "epoch": 40.41,
      "learning_rate": 0.001,
      "loss": 2.4993,
      "step": 210480
    },
    {
      "epoch": 40.42,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 210492
    },
    {
      "epoch": 40.42,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 210504
    },
    {
      "epoch": 40.42,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 210516
    },
    {
      "epoch": 40.42,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 210528
    },
    {
      "epoch": 40.43,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 210540
    },
    {
      "epoch": 40.43,
      "learning_rate": 0.001,
      "loss": 2.5039,
      "step": 210552
    },
    {
      "epoch": 40.43,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 210564
    },
    {
      "epoch": 40.43,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 210576
    },
    {
      "epoch": 40.44,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 210588
    },
    {
      "epoch": 40.44,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 210600
    },
    {
      "epoch": 40.44,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 210612
    },
    {
      "epoch": 40.44,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 210624
    },
    {
      "epoch": 40.44,
      "eval_ag_news_accuracy": 0.32871875,
      "eval_ag_news_bleu_score": 4.876870398780209,
      "eval_ag_news_bleu_score_sem": 0.16241775785277915,
      "eval_ag_news_emb_cos_sim": 0.8137305974960327,
      "eval_ag_news_emb_cos_sim_sem": 0.00768045247282111,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4638900756835938,
      "eval_ag_news_n_ngrams_match_1": 14.358,
      "eval_ag_news_n_ngrams_match_2": 3.22,
      "eval_ag_news_n_ngrams_match_3": 0.864,
      "eval_ag_news_num_pred_words": 46.612,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.940988012002432,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35835874044039284,
      "eval_ag_news_runtime": 11.7547,
      "eval_ag_news_samples_per_second": 42.536,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.35823911175592515,
      "eval_ag_news_token_set_f1_sem": 0.004558974469125087,
      "eval_ag_news_token_set_precision": 0.3435106189570707,
      "eval_ag_news_token_set_recall": 0.3883246332005096,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 210625
    },
    {
      "epoch": 40.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.1143125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1847753912167023,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1245610900900696,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6829575896263123,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008382243598643055,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.201392889022827,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.216,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.902,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.05,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.566725098445858,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2157115303601157,
      "eval_anthropic_toxic_prompts_runtime": 10.9879,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.504,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.091,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3595051590713531,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006581567480878276,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43888456085591127,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3318332029080335,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 210625
    },
    {
      "epoch": 40.44,
      "eval_arxiv_accuracy": 0.35271875,
      "eval_arxiv_bleu_score": 4.338077270623405,
      "eval_arxiv_bleu_score_sem": 0.12700943965583625,
      "eval_arxiv_emb_cos_sim": 0.7769384384155273,
      "eval_arxiv_emb_cos_sim_sem": 0.006924786479345607,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3294804096221924,
      "eval_arxiv_n_ngrams_match_1": 15.314,
      "eval_arxiv_n_ngrams_match_2": 2.94,
      "eval_arxiv_n_ngrams_match_3": 0.672,
      "eval_arxiv_num_pred_words": 40.278,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.923828980377685,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36808475488458736,
      "eval_arxiv_runtime": 12.9984,
      "eval_arxiv_samples_per_second": 38.466,
      "eval_arxiv_steps_per_second": 0.077,
      "eval_arxiv_token_set_f1": 0.3600837828079317,
      "eval_arxiv_token_set_f1_sem": 0.0042502580140088885,
      "eval_arxiv_token_set_precision": 0.31187614693706206,
      "eval_arxiv_token_set_recall": 0.44790754064248495,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 210625
    },
    {
      "epoch": 40.44,
      "eval_python_code_alpaca_accuracy": 0.16315625,
      "eval_python_code_alpaca_bleu_score": 4.907965020543179,
      "eval_python_code_alpaca_bleu_score_sem": 0.15581937041705646,
      "eval_python_code_alpaca_emb_cos_sim": 0.7710789442062378,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007235904636885408,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.850116729736328,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.08,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.158,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.126,
      "eval_python_code_alpaca_num_pred_words": 44.12,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.28979995656837,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3388770115783863,
      "eval_python_code_alpaca_runtime": 11.5358,
      "eval_python_code_alpaca_samples_per_second": 43.343,
      "eval_python_code_alpaca_steps_per_second": 0.087,
      "eval_python_code_alpaca_token_set_f1": 0.4852917335105785,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005660214690095791,
      "eval_python_code_alpaca_token_set_precision": 0.5487585492618948,
      "eval_python_code_alpaca_token_set_recall": 0.4561044806844883,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 210625
    },
    {
      "epoch": 40.44,
      "eval_wikibio_accuracy": 0.332,
      "eval_wikibio_bleu_score": 5.828536278432289,
      "eval_wikibio_bleu_score_sem": 0.22581346194596846,
      "eval_wikibio_emb_cos_sim": 0.7287882566452026,
      "eval_wikibio_emb_cos_sim_sem": 0.010525572188826472,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6336963176727295,
      "eval_wikibio_n_ngrams_match_1": 9.594,
      "eval_wikibio_n_ngrams_match_2": 3.22,
      "eval_wikibio_n_ngrams_match_3": 1.24,
      "eval_wikibio_num_pred_words": 34.668,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.852473116117025,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3414386348689038,
      "eval_wikibio_runtime": 11.1153,
      "eval_wikibio_samples_per_second": 44.983,
      "eval_wikibio_steps_per_second": 0.09,
      "eval_wikibio_token_set_f1": 0.30615204940000623,
      "eval_wikibio_token_set_f1_sem": 0.006072646876585744,
      "eval_wikibio_token_set_precision": 0.312479302205344,
      "eval_wikibio_token_set_recall": 0.32157828067846506,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 210625
    },
    {
      "epoch": 40.44,
      "eval_nq_accuracy": 0.53746875,
      "eval_nq_bleu_score": 12.100714747366794,
      "eval_nq_bleu_score_sem": 0.4895064940662709,
      "eval_nq_emb_cos_sim": 0.8344129323959351,
      "eval_nq_emb_cos_sim_sem": 0.007559017524093605,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1229686737060547,
      "eval_nq_n_ngrams_match_1": 23.416,
      "eval_nq_n_ngrams_match_2": 8.66,
      "eval_nq_n_ngrams_match_3": 4.064,
      "eval_nq_num_pred_words": 49.138,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.355906664099342,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.453806256172518,
      "eval_nq_runtime": 13.3251,
      "eval_nq_samples_per_second": 37.523,
      "eval_nq_steps_per_second": 0.075,
      "eval_nq_token_set_f1": 0.46764387993165013,
      "eval_nq_token_set_f1_sem": 0.005044550602033417,
      "eval_nq_token_set_precision": 0.426408823021345,
      "eval_nq_token_set_recall": 0.5270325611115051,
      "eval_nq_true_num_tokens": 64.0,
      "step": 210625
    },
    {
      "epoch": 40.44,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 210636
    },
    {
      "epoch": 40.45,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 210648
    },
    {
      "epoch": 40.45,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 210660
    },
    {
      "epoch": 40.45,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 210672
    },
    {
      "epoch": 40.45,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 210684
    },
    {
      "epoch": 40.46,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 210696
    },
    {
      "epoch": 40.46,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 210708
    },
    {
      "epoch": 40.46,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 210720
    },
    {
      "epoch": 40.46,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 210732
    },
    {
      "epoch": 40.47,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 210744
    },
    {
      "epoch": 40.47,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 210756
    },
    {
      "epoch": 40.47,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 210768
    },
    {
      "epoch": 40.47,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 210780
    },
    {
      "epoch": 40.47,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 210792
    },
    {
      "epoch": 40.48,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 210804
    },
    {
      "epoch": 40.48,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 210816
    },
    {
      "epoch": 40.48,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 210828
    },
    {
      "epoch": 40.48,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 210840
    },
    {
      "epoch": 40.49,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 210852
    },
    {
      "epoch": 40.49,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 210864
    },
    {
      "epoch": 40.49,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 210876
    },
    {
      "epoch": 40.49,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 210888
    },
    {
      "epoch": 40.5,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 210900
    },
    {
      "epoch": 40.5,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 210912
    },
    {
      "epoch": 40.5,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 210924
    },
    {
      "epoch": 40.5,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 210936
    },
    {
      "epoch": 40.5,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 210948
    },
    {
      "epoch": 40.51,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 210960
    },
    {
      "epoch": 40.51,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 210972
    },
    {
      "epoch": 40.51,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 210984
    },
    {
      "epoch": 40.51,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 210996
    },
    {
      "epoch": 40.52,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 211008
    },
    {
      "epoch": 40.52,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 211020
    },
    {
      "epoch": 40.52,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 211032
    },
    {
      "epoch": 40.52,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 211044
    },
    {
      "epoch": 40.53,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 211056
    },
    {
      "epoch": 40.53,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 211068
    },
    {
      "epoch": 40.53,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 211080
    },
    {
      "epoch": 40.53,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 211092
    },
    {
      "epoch": 40.53,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 211104
    },
    {
      "epoch": 40.54,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 211116
    },
    {
      "epoch": 40.54,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 211128
    },
    {
      "epoch": 40.54,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 211140
    },
    {
      "epoch": 40.54,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 211152
    },
    {
      "epoch": 40.55,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 211164
    },
    {
      "epoch": 40.55,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 211176
    },
    {
      "epoch": 40.55,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 211188
    },
    {
      "epoch": 40.55,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 211200
    },
    {
      "epoch": 40.56,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 211212
    },
    {
      "epoch": 40.56,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 211224
    },
    {
      "epoch": 40.56,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 211236
    },
    {
      "epoch": 40.56,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 211248
    },
    {
      "epoch": 40.56,
      "eval_ag_news_accuracy": 0.330625,
      "eval_ag_news_bleu_score": 4.965679517767526,
      "eval_ag_news_bleu_score_sem": 0.15529164176969681,
      "eval_ag_news_emb_cos_sim": 0.8184776902198792,
      "eval_ag_news_emb_cos_sim_sem": 0.006984504612418496,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4651143550872803,
      "eval_ag_news_n_ngrams_match_1": 14.518,
      "eval_ag_news_n_ngrams_match_2": 3.28,
      "eval_ag_news_n_ngrams_match_3": 0.914,
      "eval_ag_news_num_pred_words": 46.82,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.980116653066233,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3601847621843889,
      "eval_ag_news_runtime": 12.4137,
      "eval_ag_news_samples_per_second": 40.278,
      "eval_ag_news_steps_per_second": 0.081,
      "eval_ag_news_token_set_f1": 0.35821609908491003,
      "eval_ag_news_token_set_f1_sem": 0.004377098564057673,
      "eval_ag_news_token_set_precision": 0.34504259764844536,
      "eval_ag_news_token_set_recall": 0.38826048122376766,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 211250
    },
    {
      "epoch": 40.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.114375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.229517019320023,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1241547252813052,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6804816722869873,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008320458294359607,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.219191312789917,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.288,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.972,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.672,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.007888442331616,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22019143045454434,
      "eval_anthropic_toxic_prompts_runtime": 12.1742,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.07,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.082,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3629537146769006,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00648898051165861,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4443750867682475,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3334166465013034,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 211250
    },
    {
      "epoch": 40.56,
      "eval_arxiv_accuracy": 0.3526875,
      "eval_arxiv_bleu_score": 4.517229432060944,
      "eval_arxiv_bleu_score_sem": 0.13048247904619614,
      "eval_arxiv_emb_cos_sim": 0.7832765579223633,
      "eval_arxiv_emb_cos_sim_sem": 0.00662021622984868,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3431384563446045,
      "eval_arxiv_n_ngrams_match_1": 15.442,
      "eval_arxiv_n_ngrams_match_2": 3.068,
      "eval_arxiv_n_ngrams_match_3": 0.716,
      "eval_arxiv_num_pred_words": 40.462,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.307830326066725,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37094627656375234,
      "eval_arxiv_runtime": 11.6376,
      "eval_arxiv_samples_per_second": 42.964,
      "eval_arxiv_steps_per_second": 0.086,
      "eval_arxiv_token_set_f1": 0.3637689808034788,
      "eval_arxiv_token_set_f1_sem": 0.0041864600168504094,
      "eval_arxiv_token_set_precision": 0.31430136831508204,
      "eval_arxiv_token_set_recall": 0.4516966199853352,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 211250
    },
    {
      "epoch": 40.56,
      "eval_python_code_alpaca_accuracy": 0.162875,
      "eval_python_code_alpaca_bleu_score": 5.001133545530016,
      "eval_python_code_alpaca_bleu_score_sem": 0.16316511733482023,
      "eval_python_code_alpaca_emb_cos_sim": 0.7707309722900391,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00773646168322504,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.880523920059204,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.1,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.12,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.148,
      "eval_python_code_alpaca_num_pred_words": 43.604,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.823608880037952,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3442481904266813,
      "eval_python_code_alpaca_runtime": 16.3009,
      "eval_python_code_alpaca_samples_per_second": 30.673,
      "eval_python_code_alpaca_steps_per_second": 0.061,
      "eval_python_code_alpaca_token_set_f1": 0.48811303618622753,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0053038756078575226,
      "eval_python_code_alpaca_token_set_precision": 0.5529669880239189,
      "eval_python_code_alpaca_token_set_recall": 0.4554527505613025,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 211250
    },
    {
      "epoch": 40.56,
      "eval_wikibio_accuracy": 0.3328125,
      "eval_wikibio_bleu_score": 6.106486569138538,
      "eval_wikibio_bleu_score_sem": 0.21862486709111842,
      "eval_wikibio_emb_cos_sim": 0.7377462387084961,
      "eval_wikibio_emb_cos_sim_sem": 0.010332872187623108,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6214442253112793,
      "eval_wikibio_n_ngrams_match_1": 10.036,
      "eval_wikibio_n_ngrams_match_2": 3.37,
      "eval_wikibio_n_ngrams_match_3": 1.26,
      "eval_wikibio_num_pred_words": 35.068,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.39153064041837,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3529416400819134,
      "eval_wikibio_runtime": 12.5853,
      "eval_wikibio_samples_per_second": 39.729,
      "eval_wikibio_steps_per_second": 0.079,
      "eval_wikibio_token_set_f1": 0.3216823173080126,
      "eval_wikibio_token_set_f1_sem": 0.0057506740642656,
      "eval_wikibio_token_set_precision": 0.3279516456954584,
      "eval_wikibio_token_set_recall": 0.33241708463340364,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 211250
    },
    {
      "epoch": 40.56,
      "eval_nq_accuracy": 0.53803125,
      "eval_nq_bleu_score": 12.029555287348698,
      "eval_nq_bleu_score_sem": 0.4873451969618991,
      "eval_nq_emb_cos_sim": 0.8419448137283325,
      "eval_nq_emb_cos_sim_sem": 0.006734322895291353,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1206815242767334,
      "eval_nq_n_ngrams_match_1": 23.474,
      "eval_nq_n_ngrams_match_2": 8.722,
      "eval_nq_n_ngrams_match_3": 4.03,
      "eval_nq_num_pred_words": 48.762,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.336817295381927,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45793415195912957,
      "eval_nq_runtime": 12.8976,
      "eval_nq_samples_per_second": 38.767,
      "eval_nq_steps_per_second": 0.078,
      "eval_nq_token_set_f1": 0.4699142640313866,
      "eval_nq_token_set_f1_sem": 0.004932251966649148,
      "eval_nq_token_set_precision": 0.42773513140946984,
      "eval_nq_token_set_recall": 0.5297096013171274,
      "eval_nq_true_num_tokens": 64.0,
      "step": 211250
    },
    {
      "epoch": 40.56,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 211260
    },
    {
      "epoch": 40.57,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 211272
    },
    {
      "epoch": 40.57,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 211284
    },
    {
      "epoch": 40.57,
      "learning_rate": 0.001,
      "loss": 2.5088,
      "step": 211296
    },
    {
      "epoch": 40.57,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 211308
    },
    {
      "epoch": 40.58,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 211320
    },
    {
      "epoch": 40.58,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 211332
    },
    {
      "epoch": 40.58,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 211344
    },
    {
      "epoch": 40.58,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 211356
    },
    {
      "epoch": 40.59,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 211368
    },
    {
      "epoch": 40.59,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 211380
    },
    {
      "epoch": 40.59,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 211392
    },
    {
      "epoch": 40.59,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 211404
    },
    {
      "epoch": 40.59,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 211416
    },
    {
      "epoch": 40.6,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 211428
    },
    {
      "epoch": 40.6,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 211440
    },
    {
      "epoch": 40.6,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 211452
    },
    {
      "epoch": 40.6,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 211464
    },
    {
      "epoch": 40.61,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 211476
    },
    {
      "epoch": 40.61,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 211488
    },
    {
      "epoch": 40.61,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 211500
    },
    {
      "epoch": 40.61,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 211512
    },
    {
      "epoch": 40.62,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 211524
    },
    {
      "epoch": 40.62,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 211536
    },
    {
      "epoch": 40.62,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 211548
    },
    {
      "epoch": 40.62,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 211560
    },
    {
      "epoch": 40.62,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 211572
    },
    {
      "epoch": 40.63,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 211584
    },
    {
      "epoch": 40.63,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 211596
    },
    {
      "epoch": 40.63,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 211608
    },
    {
      "epoch": 40.63,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 211620
    },
    {
      "epoch": 40.64,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 211632
    },
    {
      "epoch": 40.64,
      "learning_rate": 0.001,
      "loss": 2.5152,
      "step": 211644
    },
    {
      "epoch": 40.64,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 211656
    },
    {
      "epoch": 40.64,
      "learning_rate": 0.001,
      "loss": 2.5047,
      "step": 211668
    },
    {
      "epoch": 40.65,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 211680
    },
    {
      "epoch": 40.65,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 211692
    },
    {
      "epoch": 40.65,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 211704
    },
    {
      "epoch": 40.65,
      "learning_rate": 0.001,
      "loss": 2.5145,
      "step": 211716
    },
    {
      "epoch": 40.65,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 211728
    },
    {
      "epoch": 40.66,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 211740
    },
    {
      "epoch": 40.66,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 211752
    },
    {
      "epoch": 40.66,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 211764
    },
    {
      "epoch": 40.66,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 211776
    },
    {
      "epoch": 40.67,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 211788
    },
    {
      "epoch": 40.67,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 211800
    },
    {
      "epoch": 40.67,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 211812
    },
    {
      "epoch": 40.67,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 211824
    },
    {
      "epoch": 40.68,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 211836
    },
    {
      "epoch": 40.68,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 211848
    },
    {
      "epoch": 40.68,
      "learning_rate": 0.001,
      "loss": 2.5096,
      "step": 211860
    },
    {
      "epoch": 40.68,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 211872
    },
    {
      "epoch": 40.68,
      "eval_ag_news_accuracy": 0.32915625,
      "eval_ag_news_bleu_score": 4.963234534569914,
      "eval_ag_news_bleu_score_sem": 0.15203737496492756,
      "eval_ag_news_emb_cos_sim": 0.8209718465805054,
      "eval_ag_news_emb_cos_sim_sem": 0.007165501619170499,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4764633178710938,
      "eval_ag_news_n_ngrams_match_1": 14.484,
      "eval_ag_news_n_ngrams_match_2": 3.296,
      "eval_ag_news_n_ngrams_match_3": 0.972,
      "eval_ag_news_num_pred_words": 47.052,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.34512512283607,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35927382853782835,
      "eval_ag_news_runtime": 20.4873,
      "eval_ag_news_samples_per_second": 24.405,
      "eval_ag_news_steps_per_second": 0.049,
      "eval_ag_news_token_set_f1": 0.3579980060028512,
      "eval_ag_news_token_set_f1_sem": 0.004656055630614914,
      "eval_ag_news_token_set_precision": 0.34694101676323597,
      "eval_ag_news_token_set_recall": 0.3829433068540424,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 211875
    },
    {
      "epoch": 40.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.11615625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.376417473185111,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12940743142741185,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6825761198997498,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008438446421132331,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.190232753753662,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.34,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.03,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.812,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.524,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.29408132370793,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22075433661742308,
      "eval_anthropic_toxic_prompts_runtime": 11.4515,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.662,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35710008178358466,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063458533161736864,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4496051392700254,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3225220007767997,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 211875
    },
    {
      "epoch": 40.68,
      "eval_arxiv_accuracy": 0.35378125,
      "eval_arxiv_bleu_score": 4.480396452198746,
      "eval_arxiv_bleu_score_sem": 0.1302892952645812,
      "eval_arxiv_emb_cos_sim": 0.7887937426567078,
      "eval_arxiv_emb_cos_sim_sem": 0.006822198415892059,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3442654609680176,
      "eval_arxiv_n_ngrams_match_1": 15.622,
      "eval_arxiv_n_ngrams_match_2": 3.06,
      "eval_arxiv_n_ngrams_match_3": 0.688,
      "eval_arxiv_num_pred_words": 41.222,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.339751365924066,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3742452813832927,
      "eval_arxiv_runtime": 26.3743,
      "eval_arxiv_samples_per_second": 18.958,
      "eval_arxiv_steps_per_second": 0.038,
      "eval_arxiv_token_set_f1": 0.367423677481192,
      "eval_arxiv_token_set_f1_sem": 0.0041650053318129715,
      "eval_arxiv_token_set_precision": 0.3198981863267829,
      "eval_arxiv_token_set_recall": 0.4484298164276399,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 211875
    },
    {
      "epoch": 40.68,
      "eval_python_code_alpaca_accuracy": 0.16390625,
      "eval_python_code_alpaca_bleu_score": 4.834341631975883,
      "eval_python_code_alpaca_bleu_score_sem": 0.15402381560850545,
      "eval_python_code_alpaca_emb_cos_sim": 0.7638465166091919,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008511363761778717,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.861112117767334,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.108,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.126,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.05,
      "eval_python_code_alpaca_num_pred_words": 43.992,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.480957013195052,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34007841519793347,
      "eval_python_code_alpaca_runtime": 26.318,
      "eval_python_code_alpaca_samples_per_second": 18.998,
      "eval_python_code_alpaca_steps_per_second": 0.038,
      "eval_python_code_alpaca_token_set_f1": 0.4885419612175739,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0052848286232211,
      "eval_python_code_alpaca_token_set_precision": 0.5541858940783554,
      "eval_python_code_alpaca_token_set_recall": 0.45751941396856666,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 211875
    },
    {
      "epoch": 40.68,
      "eval_wikibio_accuracy": 0.3301875,
      "eval_wikibio_bleu_score": 6.323464112115377,
      "eval_wikibio_bleu_score_sem": 0.2315125932544155,
      "eval_wikibio_emb_cos_sim": 0.747446596622467,
      "eval_wikibio_emb_cos_sim_sem": 0.008913036398977697,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6288974285125732,
      "eval_wikibio_n_ngrams_match_1": 10.132,
      "eval_wikibio_n_ngrams_match_2": 3.516,
      "eval_wikibio_n_ngrams_match_3": 1.32,
      "eval_wikibio_num_pred_words": 35.56,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.671258455501224,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3644290219383398,
      "eval_wikibio_runtime": 16.338,
      "eval_wikibio_samples_per_second": 30.603,
      "eval_wikibio_steps_per_second": 0.061,
      "eval_wikibio_token_set_f1": 0.32812273040857015,
      "eval_wikibio_token_set_f1_sem": 0.005243822949007464,
      "eval_wikibio_token_set_precision": 0.3338040701242717,
      "eval_wikibio_token_set_recall": 0.34078901371443304,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 211875
    },
    {
      "epoch": 40.68,
      "eval_nq_accuracy": 0.53696875,
      "eval_nq_bleu_score": 11.799364175949483,
      "eval_nq_bleu_score_sem": 0.4728314380828865,
      "eval_nq_emb_cos_sim": 0.8347982168197632,
      "eval_nq_emb_cos_sim_sem": 0.007110457835015271,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1179277896881104,
      "eval_nq_n_ngrams_match_1": 23.418,
      "eval_nq_n_ngrams_match_2": 8.656,
      "eval_nq_n_ngrams_match_3": 3.922,
      "eval_nq_num_pred_words": 49.098,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.313891493510692,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.455719516647935,
      "eval_nq_runtime": 16.0765,
      "eval_nq_samples_per_second": 31.101,
      "eval_nq_steps_per_second": 0.062,
      "eval_nq_token_set_f1": 0.4669491230711507,
      "eval_nq_token_set_f1_sem": 0.004873035977873829,
      "eval_nq_token_set_precision": 0.42769601919580363,
      "eval_nq_token_set_recall": 0.5229055021960608,
      "eval_nq_true_num_tokens": 64.0,
      "step": 211875
    },
    {
      "epoch": 40.68,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 211884
    },
    {
      "epoch": 40.69,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 211896
    },
    {
      "epoch": 40.69,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 211908
    },
    {
      "epoch": 40.69,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 211920
    },
    {
      "epoch": 40.69,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 211932
    },
    {
      "epoch": 40.7,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 211944
    },
    {
      "epoch": 40.7,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 211956
    },
    {
      "epoch": 40.7,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 211968
    },
    {
      "epoch": 40.7,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 211980
    },
    {
      "epoch": 40.71,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 211992
    },
    {
      "epoch": 40.71,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 212004
    },
    {
      "epoch": 40.71,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 212016
    },
    {
      "epoch": 40.71,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 212028
    },
    {
      "epoch": 40.71,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 212040
    },
    {
      "epoch": 40.72,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 212052
    },
    {
      "epoch": 40.72,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 212064
    },
    {
      "epoch": 40.72,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 212076
    },
    {
      "epoch": 40.72,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 212088
    },
    {
      "epoch": 40.73,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 212100
    },
    {
      "epoch": 40.73,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 212112
    },
    {
      "epoch": 40.73,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 212124
    },
    {
      "epoch": 40.73,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 212136
    },
    {
      "epoch": 40.74,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 212148
    },
    {
      "epoch": 40.74,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 212160
    },
    {
      "epoch": 40.74,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 212172
    },
    {
      "epoch": 40.74,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 212184
    },
    {
      "epoch": 40.74,
      "learning_rate": 0.001,
      "loss": 2.5064,
      "step": 212196
    },
    {
      "epoch": 40.75,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 212208
    },
    {
      "epoch": 40.75,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 212220
    },
    {
      "epoch": 40.75,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 212232
    },
    {
      "epoch": 40.75,
      "learning_rate": 0.001,
      "loss": 2.5046,
      "step": 212244
    },
    {
      "epoch": 40.76,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 212256
    },
    {
      "epoch": 40.76,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 212268
    },
    {
      "epoch": 40.76,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 212280
    },
    {
      "epoch": 40.76,
      "learning_rate": 0.001,
      "loss": 2.5026,
      "step": 212292
    },
    {
      "epoch": 40.76,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 212304
    },
    {
      "epoch": 40.77,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 212316
    },
    {
      "epoch": 40.77,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 212328
    },
    {
      "epoch": 40.77,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 212340
    },
    {
      "epoch": 40.77,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 212352
    },
    {
      "epoch": 40.78,
      "learning_rate": 0.001,
      "loss": 2.5102,
      "step": 212364
    },
    {
      "epoch": 40.78,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 212376
    },
    {
      "epoch": 40.78,
      "learning_rate": 0.001,
      "loss": 2.5162,
      "step": 212388
    },
    {
      "epoch": 40.78,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 212400
    },
    {
      "epoch": 40.79,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 212412
    },
    {
      "epoch": 40.79,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 212424
    },
    {
      "epoch": 40.79,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 212436
    },
    {
      "epoch": 40.79,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 212448
    },
    {
      "epoch": 40.79,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 212460
    },
    {
      "epoch": 40.8,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 212472
    },
    {
      "epoch": 40.8,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 212484
    },
    {
      "epoch": 40.8,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 212496
    },
    {
      "epoch": 40.8,
      "eval_ag_news_accuracy": 0.32740625,
      "eval_ag_news_bleu_score": 5.073949490606959,
      "eval_ag_news_bleu_score_sem": 0.16189017421293195,
      "eval_ag_news_emb_cos_sim": 0.8260356187820435,
      "eval_ag_news_emb_cos_sim_sem": 0.006121638109755535,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.47394061088562,
      "eval_ag_news_n_ngrams_match_1": 14.348,
      "eval_ag_news_n_ngrams_match_2": 3.276,
      "eval_ag_news_n_ngrams_match_3": 0.982,
      "eval_ag_news_num_pred_words": 46.828,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.26363068625445,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35707282301627,
      "eval_ag_news_runtime": 26.7904,
      "eval_ag_news_samples_per_second": 18.663,
      "eval_ag_news_steps_per_second": 0.037,
      "eval_ag_news_token_set_f1": 0.3575342032398384,
      "eval_ag_news_token_set_f1_sem": 0.004328800450529145,
      "eval_ag_news_token_set_precision": 0.3437397156310016,
      "eval_ag_news_token_set_recall": 0.38607168588250695,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 212500
    },
    {
      "epoch": 40.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.11584375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.19271293440135,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12423840186488055,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6878588199615479,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00838356068765902,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1866867542266846,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.252,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.962,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.862,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.208087080706633,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21844141793597383,
      "eval_anthropic_toxic_prompts_runtime": 11.4823,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.545,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3653211047980055,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006858512618122479,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4382184239518757,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.34065660787244145,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 212500
    },
    {
      "epoch": 40.8,
      "eval_arxiv_accuracy": 0.35275,
      "eval_arxiv_bleu_score": 4.513127925483866,
      "eval_arxiv_bleu_score_sem": 0.13612349484737696,
      "eval_arxiv_emb_cos_sim": 0.7828484773635864,
      "eval_arxiv_emb_cos_sim_sem": 0.006214240004290033,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3370327949523926,
      "eval_arxiv_n_ngrams_match_1": 15.622,
      "eval_arxiv_n_ngrams_match_2": 3.19,
      "eval_arxiv_n_ngrams_match_3": 0.702,
      "eval_arxiv_num_pred_words": 40.366,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.1355188723438,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3742057430507527,
      "eval_arxiv_runtime": 11.7402,
      "eval_arxiv_samples_per_second": 42.589,
      "eval_arxiv_steps_per_second": 0.085,
      "eval_arxiv_token_set_f1": 0.3688973608504254,
      "eval_arxiv_token_set_f1_sem": 0.00418976189933825,
      "eval_arxiv_token_set_precision": 0.31977340586560815,
      "eval_arxiv_token_set_recall": 0.457249922942953,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 212500
    },
    {
      "epoch": 40.8,
      "eval_python_code_alpaca_accuracy": 0.16359375,
      "eval_python_code_alpaca_bleu_score": 4.545406947168154,
      "eval_python_code_alpaca_bleu_score_sem": 0.1547740315439224,
      "eval_python_code_alpaca_emb_cos_sim": 0.7672972083091736,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0075867962485293315,
      "eval_python_code_alpaca_emb_top1_equal": 0.09375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.838118076324463,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.884,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.832,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.968,
      "eval_python_code_alpaca_num_pred_words": 44.42,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.083585262664368,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3330134344414717,
      "eval_python_code_alpaca_runtime": 32.4593,
      "eval_python_code_alpaca_samples_per_second": 15.404,
      "eval_python_code_alpaca_steps_per_second": 0.031,
      "eval_python_code_alpaca_token_set_f1": 0.4852851572535872,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054490215000587185,
      "eval_python_code_alpaca_token_set_precision": 0.5401105876679381,
      "eval_python_code_alpaca_token_set_recall": 0.4626911120535161,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 212500
    },
    {
      "epoch": 40.8,
      "eval_wikibio_accuracy": 0.3296875,
      "eval_wikibio_bleu_score": 6.147854636696207,
      "eval_wikibio_bleu_score_sem": 0.2178778092055402,
      "eval_wikibio_emb_cos_sim": 0.7475197315216064,
      "eval_wikibio_emb_cos_sim_sem": 0.009752180811374106,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6456449031829834,
      "eval_wikibio_n_ngrams_match_1": 9.974,
      "eval_wikibio_n_ngrams_match_2": 3.418,
      "eval_wikibio_n_ngrams_match_3": 1.256,
      "eval_wikibio_num_pred_words": 35.402,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.30746949623982,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35711404812557335,
      "eval_wikibio_runtime": 21.0921,
      "eval_wikibio_samples_per_second": 23.706,
      "eval_wikibio_steps_per_second": 0.047,
      "eval_wikibio_token_set_f1": 0.32296697502696603,
      "eval_wikibio_token_set_f1_sem": 0.00566851255044716,
      "eval_wikibio_token_set_precision": 0.3255429715328488,
      "eval_wikibio_token_set_recall": 0.33631945041421923,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 212500
    },
    {
      "epoch": 40.8,
      "eval_nq_accuracy": 0.53625,
      "eval_nq_bleu_score": 12.112637320579347,
      "eval_nq_bleu_score_sem": 0.49196328655898786,
      "eval_nq_emb_cos_sim": 0.8384393453598022,
      "eval_nq_emb_cos_sim_sem": 0.007336762045371977,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1238081455230713,
      "eval_nq_n_ngrams_match_1": 23.568,
      "eval_nq_n_ngrams_match_2": 8.814,
      "eval_nq_n_ngrams_match_3": 4.056,
      "eval_nq_num_pred_words": 49.302,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.362924157331262,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4588801477255383,
      "eval_nq_runtime": 11.7801,
      "eval_nq_samples_per_second": 42.445,
      "eval_nq_steps_per_second": 0.085,
      "eval_nq_token_set_f1": 0.47183306649742235,
      "eval_nq_token_set_f1_sem": 0.0048326883239400275,
      "eval_nq_token_set_precision": 0.4294888847624133,
      "eval_nq_token_set_recall": 0.5321096533546643,
      "eval_nq_true_num_tokens": 64.0,
      "step": 212500
    },
    {
      "epoch": 40.8,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 212508
    },
    {
      "epoch": 40.81,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 212520
    },
    {
      "epoch": 40.81,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 212532
    },
    {
      "epoch": 40.81,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 212544
    },
    {
      "epoch": 40.81,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 212556
    },
    {
      "epoch": 40.82,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 212568
    },
    {
      "epoch": 40.82,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 212580
    },
    {
      "epoch": 40.82,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 212592
    },
    {
      "epoch": 40.82,
      "learning_rate": 0.001,
      "loss": 2.5058,
      "step": 212604
    },
    {
      "epoch": 40.82,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 212616
    },
    {
      "epoch": 40.83,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 212628
    },
    {
      "epoch": 40.83,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 212640
    },
    {
      "epoch": 40.83,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 212652
    },
    {
      "epoch": 40.83,
      "learning_rate": 0.001,
      "loss": 2.5106,
      "step": 212664
    },
    {
      "epoch": 40.84,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 212676
    },
    {
      "epoch": 40.84,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 212688
    },
    {
      "epoch": 40.84,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 212700
    },
    {
      "epoch": 40.84,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 212712
    },
    {
      "epoch": 40.85,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 212724
    },
    {
      "epoch": 40.85,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 212736
    },
    {
      "epoch": 40.85,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 212748
    },
    {
      "epoch": 40.85,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 212760
    },
    {
      "epoch": 40.85,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 212772
    },
    {
      "epoch": 40.86,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 212784
    },
    {
      "epoch": 40.86,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 212796
    },
    {
      "epoch": 40.86,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 212808
    },
    {
      "epoch": 40.86,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 212820
    },
    {
      "epoch": 40.87,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 212832
    },
    {
      "epoch": 40.87,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 212844
    },
    {
      "epoch": 40.87,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 212856
    },
    {
      "epoch": 40.87,
      "learning_rate": 0.001,
      "loss": 2.5028,
      "step": 212868
    },
    {
      "epoch": 40.88,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 212880
    },
    {
      "epoch": 40.88,
      "learning_rate": 0.001,
      "loss": 2.5063,
      "step": 212892
    },
    {
      "epoch": 40.88,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 212904
    },
    {
      "epoch": 40.88,
      "learning_rate": 0.001,
      "loss": 2.5101,
      "step": 212916
    },
    {
      "epoch": 40.88,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 212928
    },
    {
      "epoch": 40.89,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 212940
    },
    {
      "epoch": 40.89,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 212952
    },
    {
      "epoch": 40.89,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 212964
    },
    {
      "epoch": 40.89,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 212976
    },
    {
      "epoch": 40.9,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 212988
    },
    {
      "epoch": 40.9,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 213000
    },
    {
      "epoch": 40.9,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 213012
    },
    {
      "epoch": 40.9,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 213024
    },
    {
      "epoch": 40.91,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 213036
    },
    {
      "epoch": 40.91,
      "learning_rate": 0.001,
      "loss": 2.5053,
      "step": 213048
    },
    {
      "epoch": 40.91,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 213060
    },
    {
      "epoch": 40.91,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 213072
    },
    {
      "epoch": 40.91,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 213084
    },
    {
      "epoch": 40.92,
      "learning_rate": 0.001,
      "loss": 2.5072,
      "step": 213096
    },
    {
      "epoch": 40.92,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 213108
    },
    {
      "epoch": 40.92,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 213120
    },
    {
      "epoch": 40.92,
      "eval_ag_news_accuracy": 0.3271875,
      "eval_ag_news_bleu_score": 5.169108115754358,
      "eval_ag_news_bleu_score_sem": 0.16503923262174292,
      "eval_ag_news_emb_cos_sim": 0.8210017681121826,
      "eval_ag_news_emb_cos_sim_sem": 0.007500360964767584,
      "eval_ag_news_emb_top1_equal": 0.21875,
      "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4736239910125732,
      "eval_ag_news_n_ngrams_match_1": 14.552,
      "eval_ag_news_n_ngrams_match_2": 3.316,
      "eval_ag_news_n_ngrams_match_3": 1.028,
      "eval_ag_news_num_pred_words": 46.538,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.25341699661642,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3636627801571863,
      "eval_ag_news_runtime": 15.3268,
      "eval_ag_news_samples_per_second": 32.623,
      "eval_ag_news_steps_per_second": 0.065,
      "eval_ag_news_token_set_f1": 0.361360187616445,
      "eval_ag_news_token_set_f1_sem": 0.00446827457414915,
      "eval_ag_news_token_set_precision": 0.34878617505293585,
      "eval_ag_news_token_set_recall": 0.38898972593450143,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 213125
    },
    {
      "epoch": 40.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.11546875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2853693684322023,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12805168652778529,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6821027994155884,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009127290561927336,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.219939947128296,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.272,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.966,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.388,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.02661721597671,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22074704688960617,
      "eval_anthropic_toxic_prompts_runtime": 32.4558,
      "eval_anthropic_toxic_prompts_samples_per_second": 15.406,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.031,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35853575371725,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006844027464354096,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43916205786315576,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3275761487719574,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 213125
    },
    {
      "epoch": 40.92,
      "eval_arxiv_accuracy": 0.35296875,
      "eval_arxiv_bleu_score": 4.548372064096796,
      "eval_arxiv_bleu_score_sem": 0.13928834346354316,
      "eval_arxiv_emb_cos_sim": 0.7800613641738892,
      "eval_arxiv_emb_cos_sim_sem": 0.0066088161658709905,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3394787311553955,
      "eval_arxiv_n_ngrams_match_1": 15.768,
      "eval_arxiv_n_ngrams_match_2": 3.052,
      "eval_arxiv_n_ngrams_match_3": 0.704,
      "eval_arxiv_num_pred_words": 40.804,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.204420787036668,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3755981361737335,
      "eval_arxiv_runtime": 28.3775,
      "eval_arxiv_samples_per_second": 17.62,
      "eval_arxiv_steps_per_second": 0.035,
      "eval_arxiv_token_set_f1": 0.36887950641780737,
      "eval_arxiv_token_set_f1_sem": 0.0042013310633415,
      "eval_arxiv_token_set_precision": 0.32464162611819186,
      "eval_arxiv_token_set_recall": 0.44361441413087405,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 213125
    },
    {
      "epoch": 40.92,
      "eval_python_code_alpaca_accuracy": 0.16378125,
      "eval_python_code_alpaca_bleu_score": 4.705942055721906,
      "eval_python_code_alpaca_bleu_score_sem": 0.1418070028514918,
      "eval_python_code_alpaca_emb_cos_sim": 0.7664552927017212,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007452085042520895,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8642737865448,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.968,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.934,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.006,
      "eval_python_code_alpaca_num_pred_words": 42.916,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.536313472465732,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34459987645758366,
      "eval_python_code_alpaca_runtime": 12.2157,
      "eval_python_code_alpaca_samples_per_second": 40.931,
      "eval_python_code_alpaca_steps_per_second": 0.082,
      "eval_python_code_alpaca_token_set_f1": 0.48284114138059797,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005379991978509389,
      "eval_python_code_alpaca_token_set_precision": 0.542330542676281,
      "eval_python_code_alpaca_token_set_recall": 0.4588993503726998,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 213125
    },
    {
      "epoch": 40.92,
      "eval_wikibio_accuracy": 0.3299375,
      "eval_wikibio_bleu_score": 6.295265732135231,
      "eval_wikibio_bleu_score_sem": 0.2197365531556376,
      "eval_wikibio_emb_cos_sim": 0.7429490089416504,
      "eval_wikibio_emb_cos_sim_sem": 0.008816129598994516,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.620128870010376,
      "eval_wikibio_n_ngrams_match_1": 10.27,
      "eval_wikibio_n_ngrams_match_2": 3.51,
      "eval_wikibio_n_ngrams_match_3": 1.348,
      "eval_wikibio_num_pred_words": 36.102,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.34237982486106,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36273745766227117,
      "eval_wikibio_runtime": 28.0698,
      "eval_wikibio_samples_per_second": 17.813,
      "eval_wikibio_steps_per_second": 0.036,
      "eval_wikibio_token_set_f1": 0.32618856732670787,
      "eval_wikibio_token_set_f1_sem": 0.005601880546756807,
      "eval_wikibio_token_set_precision": 0.33269052837779683,
      "eval_wikibio_token_set_recall": 0.33711766931600834,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 213125
    },
    {
      "epoch": 40.92,
      "eval_nq_accuracy": 0.537375,
      "eval_nq_bleu_score": 12.43024418502401,
      "eval_nq_bleu_score_sem": 0.5122337884247374,
      "eval_nq_emb_cos_sim": 0.8392021656036377,
      "eval_nq_emb_cos_sim_sem": 0.0067571704384513055,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1223866939544678,
      "eval_nq_n_ngrams_match_1": 23.656,
      "eval_nq_n_ngrams_match_2": 8.896,
      "eval_nq_n_ngrams_match_3": 4.224,
      "eval_nq_num_pred_words": 49.348,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.351045110414807,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4572380599396225,
      "eval_nq_runtime": 33.0366,
      "eval_nq_samples_per_second": 15.135,
      "eval_nq_steps_per_second": 0.03,
      "eval_nq_token_set_f1": 0.4705994427112349,
      "eval_nq_token_set_f1_sem": 0.005034228319134849,
      "eval_nq_token_set_precision": 0.43036213735376244,
      "eval_nq_token_set_recall": 0.5268105817699161,
      "eval_nq_true_num_tokens": 64.0,
      "step": 213125
    },
    {
      "epoch": 40.92,
      "learning_rate": 0.001,
      "loss": 2.5095,
      "step": 213132
    },
    {
      "epoch": 40.93,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 213144
    },
    {
      "epoch": 40.93,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 213156
    },
    {
      "epoch": 40.93,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 213168
    },
    {
      "epoch": 40.93,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 213180
    },
    {
      "epoch": 40.94,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 213192
    },
    {
      "epoch": 40.94,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 213204
    },
    {
      "epoch": 40.94,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 213216
    },
    {
      "epoch": 40.94,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 213228
    },
    {
      "epoch": 40.94,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 213240
    },
    {
      "epoch": 40.95,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 213252
    },
    {
      "epoch": 40.95,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 213264
    },
    {
      "epoch": 40.95,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 213276
    },
    {
      "epoch": 40.95,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 213288
    },
    {
      "epoch": 40.96,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 213300
    },
    {
      "epoch": 40.96,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 213312
    },
    {
      "epoch": 40.96,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 213324
    },
    {
      "epoch": 40.96,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 213336
    },
    {
      "epoch": 40.97,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 213348
    },
    {
      "epoch": 40.97,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 213360
    },
    {
      "epoch": 40.97,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 213372
    },
    {
      "epoch": 40.97,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 213384
    },
    {
      "epoch": 40.97,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 213396
    },
    {
      "epoch": 40.98,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 213408
    },
    {
      "epoch": 40.98,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 213420
    },
    {
      "epoch": 40.98,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 213432
    },
    {
      "epoch": 40.98,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 213444
    },
    {
      "epoch": 40.99,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 213456
    },
    {
      "epoch": 40.99,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 213468
    },
    {
      "epoch": 40.99,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 213480
    },
    {
      "epoch": 40.99,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 213492
    },
    {
      "epoch": 41.0,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 213504
    },
    {
      "epoch": 41.0,
      "learning_rate": 0.001,
      "loss": 2.5026,
      "step": 213516
    },
    {
      "epoch": 41.0,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 213528
    },
    {
      "epoch": 41.0,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 213540
    },
    {
      "epoch": 41.0,
      "learning_rate": 0.001,
      "loss": 2.4699,
      "step": 213552
    },
    {
      "epoch": 41.01,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 213564
    },
    {
      "epoch": 41.01,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 213576
    },
    {
      "epoch": 41.01,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 213588
    },
    {
      "epoch": 41.01,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 213600
    },
    {
      "epoch": 41.02,
      "learning_rate": 0.001,
      "loss": 2.4748,
      "step": 213612
    },
    {
      "epoch": 41.02,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 213624
    },
    {
      "epoch": 41.02,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 213636
    },
    {
      "epoch": 41.02,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 213648
    },
    {
      "epoch": 41.03,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 213660
    },
    {
      "epoch": 41.03,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 213672
    },
    {
      "epoch": 41.03,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 213684
    },
    {
      "epoch": 41.03,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 213696
    },
    {
      "epoch": 41.03,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 213708
    },
    {
      "epoch": 41.04,
      "learning_rate": 0.001,
      "loss": 2.4796,
      "step": 213720
    },
    {
      "epoch": 41.04,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 213732
    },
    {
      "epoch": 41.04,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 213744
    },
    {
      "epoch": 41.04,
      "eval_ag_news_accuracy": 0.33009375,
      "eval_ag_news_bleu_score": 5.021169210371341,
      "eval_ag_news_bleu_score_sem": 0.15638525229831882,
      "eval_ag_news_emb_cos_sim": 0.8251120448112488,
      "eval_ag_news_emb_cos_sim_sem": 0.0059141320523791955,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.470844030380249,
      "eval_ag_news_n_ngrams_match_1": 14.6,
      "eval_ag_news_n_ngrams_match_2": 3.308,
      "eval_ag_news_n_ngrams_match_3": 0.956,
      "eval_ag_news_num_pred_words": 47.136,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.163878281823266,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36112328265591154,
      "eval_ag_news_runtime": 28.624,
      "eval_ag_news_samples_per_second": 17.468,
      "eval_ag_news_steps_per_second": 0.035,
      "eval_ag_news_token_set_f1": 0.36095395962152366,
      "eval_ag_news_token_set_f1_sem": 0.004518493952908497,
      "eval_ag_news_token_set_precision": 0.34960525361713585,
      "eval_ag_news_token_set_recall": 0.3858555758987579,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 213750
    },
    {
      "epoch": 41.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.1164375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2782936240360314,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12083623366515364,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6817408800125122,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009024195419202642,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1967227458953857,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.138,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.45226246254783,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2198347171276814,
      "eval_anthropic_toxic_prompts_runtime": 29.821,
      "eval_anthropic_toxic_prompts_samples_per_second": 16.767,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.034,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3570814592866485,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006393325882212609,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44961072120712753,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32034972562121095,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 213750
    },
    {
      "epoch": 41.04,
      "eval_arxiv_accuracy": 0.35471875,
      "eval_arxiv_bleu_score": 4.395262377424749,
      "eval_arxiv_bleu_score_sem": 0.12285051178584978,
      "eval_arxiv_emb_cos_sim": 0.7829989790916443,
      "eval_arxiv_emb_cos_sim_sem": 0.007437658307984517,
      "eval_arxiv_emb_top1_equal": 0.34375,
      "eval_arxiv_emb_top1_equal_sem": 0.04214578430296913,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.333362579345703,
      "eval_arxiv_n_ngrams_match_1": 15.882,
      "eval_arxiv_n_ngrams_match_2": 3.064,
      "eval_arxiv_n_ngrams_match_3": 0.628,
      "eval_arxiv_num_pred_words": 41.092,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.032444719762797,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3798240210446542,
      "eval_arxiv_runtime": 28.8102,
      "eval_arxiv_samples_per_second": 17.355,
      "eval_arxiv_steps_per_second": 0.035,
      "eval_arxiv_token_set_f1": 0.36853587317567066,
      "eval_arxiv_token_set_f1_sem": 0.004078921244420197,
      "eval_arxiv_token_set_precision": 0.3225207895017115,
      "eval_arxiv_token_set_recall": 0.4442408650522827,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 213750
    },
    {
      "epoch": 41.04,
      "eval_python_code_alpaca_accuracy": 0.16084375,
      "eval_python_code_alpaca_bleu_score": 4.601435887369982,
      "eval_python_code_alpaca_bleu_score_sem": 0.13867339875978243,
      "eval_python_code_alpaca_emb_cos_sim": 0.7690881490707397,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007758508436263286,
      "eval_python_code_alpaca_emb_top1_equal": 0.1953125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8940842151641846,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.992,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.936,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.948,
      "eval_python_code_alpaca_num_pred_words": 43.204,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 18.066948425516664,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34104713195191094,
      "eval_python_code_alpaca_runtime": 32.3754,
      "eval_python_code_alpaca_samples_per_second": 15.444,
      "eval_python_code_alpaca_steps_per_second": 0.031,
      "eval_python_code_alpaca_token_set_f1": 0.4810809371042541,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005493721279410755,
      "eval_python_code_alpaca_token_set_precision": 0.5410872989249331,
      "eval_python_code_alpaca_token_set_recall": 0.45792149389525905,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 213750
    },
    {
      "epoch": 41.04,
      "eval_wikibio_accuracy": 0.3305625,
      "eval_wikibio_bleu_score": 6.193185836495677,
      "eval_wikibio_bleu_score_sem": 0.2152254404163372,
      "eval_wikibio_emb_cos_sim": 0.7507250905036926,
      "eval_wikibio_emb_cos_sim_sem": 0.008622931079598611,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6053049564361572,
      "eval_wikibio_n_ngrams_match_1": 10.338,
      "eval_wikibio_n_ngrams_match_2": 3.484,
      "eval_wikibio_n_ngrams_match_3": 1.318,
      "eval_wikibio_num_pred_words": 36.028,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.7929023789624,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3664291891461958,
      "eval_wikibio_runtime": 23.4455,
      "eval_wikibio_samples_per_second": 21.326,
      "eval_wikibio_steps_per_second": 0.043,
      "eval_wikibio_token_set_f1": 0.3289246859551673,
      "eval_wikibio_token_set_f1_sem": 0.005232198835774688,
      "eval_wikibio_token_set_precision": 0.337068872071351,
      "eval_wikibio_token_set_recall": 0.3361057086222319,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 213750
    },
    {
      "epoch": 41.04,
      "eval_nq_accuracy": 0.5374375,
      "eval_nq_bleu_score": 12.379697384449983,
      "eval_nq_bleu_score_sem": 0.49119424103565773,
      "eval_nq_emb_cos_sim": 0.8429065942764282,
      "eval_nq_emb_cos_sim_sem": 0.006428461898558779,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.113908290863037,
      "eval_nq_n_ngrams_match_1": 23.766,
      "eval_nq_n_ngrams_match_2": 8.868,
      "eval_nq_n_ngrams_match_3": 4.178,
      "eval_nq_num_pred_words": 49.446,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.28054088768326,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4611481037840349,
      "eval_nq_runtime": 31.6604,
      "eval_nq_samples_per_second": 15.793,
      "eval_nq_steps_per_second": 0.032,
      "eval_nq_token_set_f1": 0.4730361680173183,
      "eval_nq_token_set_f1_sem": 0.005038475836783473,
      "eval_nq_token_set_precision": 0.4335635735805142,
      "eval_nq_token_set_recall": 0.5272752864067703,
      "eval_nq_true_num_tokens": 64.0,
      "step": 213750
    },
    {
      "epoch": 41.04,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 213756
    },
    {
      "epoch": 41.05,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 213768
    },
    {
      "epoch": 41.05,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 213780
    },
    {
      "epoch": 41.05,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 213792
    },
    {
      "epoch": 41.05,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 213804
    },
    {
      "epoch": 41.06,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 213816
    },
    {
      "epoch": 41.06,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 213828
    },
    {
      "epoch": 41.06,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 213840
    },
    {
      "epoch": 41.06,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 213852
    },
    {
      "epoch": 41.06,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 213864
    },
    {
      "epoch": 41.07,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 213876
    },
    {
      "epoch": 41.07,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 213888
    },
    {
      "epoch": 41.07,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 213900
    },
    {
      "epoch": 41.07,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 213912
    },
    {
      "epoch": 41.08,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 213924
    },
    {
      "epoch": 41.08,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 213936
    },
    {
      "epoch": 41.08,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 213948
    },
    {
      "epoch": 41.08,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 213960
    },
    {
      "epoch": 41.09,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 213972
    },
    {
      "epoch": 41.09,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 213984
    },
    {
      "epoch": 41.09,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 213996
    },
    {
      "epoch": 41.09,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 214008
    },
    {
      "epoch": 41.09,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 214020
    },
    {
      "epoch": 41.1,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 214032
    },
    {
      "epoch": 41.1,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 214044
    },
    {
      "epoch": 41.1,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 214056
    },
    {
      "epoch": 41.1,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 214068
    },
    {
      "epoch": 41.11,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 214080
    },
    {
      "epoch": 41.11,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 214092
    },
    {
      "epoch": 41.11,
      "learning_rate": 0.001,
      "loss": 2.4796,
      "step": 214104
    },
    {
      "epoch": 41.11,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 214116
    },
    {
      "epoch": 41.12,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 214128
    },
    {
      "epoch": 41.12,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 214140
    },
    {
      "epoch": 41.12,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 214152
    },
    {
      "epoch": 41.12,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 214164
    },
    {
      "epoch": 41.12,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 214176
    },
    {
      "epoch": 41.13,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 214188
    },
    {
      "epoch": 41.13,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 214200
    },
    {
      "epoch": 41.13,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 214212
    },
    {
      "epoch": 41.13,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 214224
    },
    {
      "epoch": 41.14,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 214236
    },
    {
      "epoch": 41.14,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 214248
    },
    {
      "epoch": 41.14,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 214260
    },
    {
      "epoch": 41.14,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 214272
    },
    {
      "epoch": 41.15,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 214284
    },
    {
      "epoch": 41.15,
      "learning_rate": 0.001,
      "loss": 2.48,
      "step": 214296
    },
    {
      "epoch": 41.15,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 214308
    },
    {
      "epoch": 41.15,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 214320
    },
    {
      "epoch": 41.15,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 214332
    },
    {
      "epoch": 41.16,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 214344
    },
    {
      "epoch": 41.16,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 214356
    },
    {
      "epoch": 41.16,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 214368
    },
    {
      "epoch": 41.16,
      "eval_ag_news_accuracy": 0.32984375,
      "eval_ag_news_bleu_score": 5.0500648045856416,
      "eval_ag_news_bleu_score_sem": 0.15145892087035598,
      "eval_ag_news_emb_cos_sim": 0.8266647458076477,
      "eval_ag_news_emb_cos_sim_sem": 0.006768569843884487,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4688570499420166,
      "eval_ag_news_n_ngrams_match_1": 14.59,
      "eval_ag_news_n_ngrams_match_2": 3.314,
      "eval_ag_news_n_ngrams_match_3": 0.962,
      "eval_ag_news_num_pred_words": 46.63,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.10003273579078,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3645330028525393,
      "eval_ag_news_runtime": 14.5066,
      "eval_ag_news_samples_per_second": 34.467,
      "eval_ag_news_steps_per_second": 0.069,
      "eval_ag_news_token_set_f1": 0.36426206718323795,
      "eval_ag_news_token_set_f1_sem": 0.004448607279050028,
      "eval_ag_news_token_set_precision": 0.35096366226675657,
      "eval_ag_news_token_set_recall": 0.3927617778315918,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 214375
    },
    {
      "epoch": 41.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.11621875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1590549159351045,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11324066800428384,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6883586049079895,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008477277498045532,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1922554969787598,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.998,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.438,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.343271745139432,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21667823639138195,
      "eval_anthropic_toxic_prompts_runtime": 16.8899,
      "eval_anthropic_toxic_prompts_samples_per_second": 29.603,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.059,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3609796755322638,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062846575196215115,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44631097934195413,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3291285110048185,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 214375
    },
    {
      "epoch": 41.16,
      "eval_arxiv_accuracy": 0.35440625,
      "eval_arxiv_bleu_score": 4.517363137791126,
      "eval_arxiv_bleu_score_sem": 0.14084658776327824,
      "eval_arxiv_emb_cos_sim": 0.7761874794960022,
      "eval_arxiv_emb_cos_sim_sem": 0.007372744258743644,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3308255672454834,
      "eval_arxiv_n_ngrams_match_1": 15.536,
      "eval_arxiv_n_ngrams_match_2": 3.086,
      "eval_arxiv_n_ngrams_match_3": 0.706,
      "eval_arxiv_num_pred_words": 39.976,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.961416206506318,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3733102903340708,
      "eval_arxiv_runtime": 18.8546,
      "eval_arxiv_samples_per_second": 26.519,
      "eval_arxiv_steps_per_second": 0.053,
      "eval_arxiv_token_set_f1": 0.36435647260772625,
      "eval_arxiv_token_set_f1_sem": 0.004353442177504389,
      "eval_arxiv_token_set_precision": 0.31615323023331926,
      "eval_arxiv_token_set_recall": 0.4490499772382298,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 214375
    },
    {
      "epoch": 41.16,
      "eval_python_code_alpaca_accuracy": 0.16225,
      "eval_python_code_alpaca_bleu_score": 4.7667569386599205,
      "eval_python_code_alpaca_bleu_score_sem": 0.15668912231222806,
      "eval_python_code_alpaca_emb_cos_sim": 0.760850191116333,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008738534616707929,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.859790802001953,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.938,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.014,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.044,
      "eval_python_code_alpaca_num_pred_words": 43.26,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.45787440216678,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33944286564659687,
      "eval_python_code_alpaca_runtime": 28.3846,
      "eval_python_code_alpaca_samples_per_second": 17.615,
      "eval_python_code_alpaca_steps_per_second": 0.035,
      "eval_python_code_alpaca_token_set_f1": 0.4826520430671495,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005614066338067491,
      "eval_python_code_alpaca_token_set_precision": 0.543396768205565,
      "eval_python_code_alpaca_token_set_recall": 0.4575205354507373,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 214375
    },
    {
      "epoch": 41.16,
      "eval_wikibio_accuracy": 0.32696875,
      "eval_wikibio_bleu_score": 6.3706395457308505,
      "eval_wikibio_bleu_score_sem": 0.2218275256674258,
      "eval_wikibio_emb_cos_sim": 0.7428708672523499,
      "eval_wikibio_emb_cos_sim_sem": 0.009724521283494323,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6633644104003906,
      "eval_wikibio_n_ngrams_match_1": 10.508,
      "eval_wikibio_n_ngrams_match_2": 3.566,
      "eval_wikibio_n_ngrams_match_3": 1.326,
      "eval_wikibio_num_pred_words": 36.516,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.99230856509684,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3714831456641676,
      "eval_wikibio_runtime": 30.2693,
      "eval_wikibio_samples_per_second": 16.518,
      "eval_wikibio_steps_per_second": 0.033,
      "eval_wikibio_token_set_f1": 0.33239982870428225,
      "eval_wikibio_token_set_f1_sem": 0.005208510645716343,
      "eval_wikibio_token_set_precision": 0.3424158931721413,
      "eval_wikibio_token_set_recall": 0.3388729467404177,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 214375
    },
    {
      "epoch": 41.16,
      "eval_nq_accuracy": 0.536125,
      "eval_nq_bleu_score": 11.95739626544232,
      "eval_nq_bleu_score_sem": 0.4800209515731253,
      "eval_nq_emb_cos_sim": 0.8345988392829895,
      "eval_nq_emb_cos_sim_sem": 0.0072397742444143135,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1194307804107666,
      "eval_nq_n_ngrams_match_1": 23.534,
      "eval_nq_n_ngrams_match_2": 8.686,
      "eval_nq_n_ngrams_match_3": 3.956,
      "eval_nq_num_pred_words": 48.94,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.326396590462897,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4577215320803478,
      "eval_nq_runtime": 19.8451,
      "eval_nq_samples_per_second": 25.195,
      "eval_nq_steps_per_second": 0.05,
      "eval_nq_token_set_f1": 0.4708188621334759,
      "eval_nq_token_set_f1_sem": 0.0049491715082104135,
      "eval_nq_token_set_precision": 0.429296364658933,
      "eval_nq_token_set_recall": 0.5289907161663872,
      "eval_nq_true_num_tokens": 64.0,
      "step": 214375
    },
    {
      "epoch": 41.16,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 214380
    },
    {
      "epoch": 41.17,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 214392
    },
    {
      "epoch": 41.17,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 214404
    },
    {
      "epoch": 41.17,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 214416
    },
    {
      "epoch": 41.17,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 214428
    },
    {
      "epoch": 41.18,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 214440
    },
    {
      "epoch": 41.18,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 214452
    },
    {
      "epoch": 41.18,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 214464
    },
    {
      "epoch": 41.18,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 214476
    },
    {
      "epoch": 41.18,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 214488
    },
    {
      "epoch": 41.19,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 214500
    },
    {
      "epoch": 41.19,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 214512
    },
    {
      "epoch": 41.19,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 214524
    },
    {
      "epoch": 41.19,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 214536
    },
    {
      "epoch": 41.2,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 214548
    },
    {
      "epoch": 41.2,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 214560
    },
    {
      "epoch": 41.2,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 214572
    },
    {
      "epoch": 41.2,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 214584
    },
    {
      "epoch": 41.21,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 214596
    },
    {
      "epoch": 41.21,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 214608
    },
    {
      "epoch": 41.21,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 214620
    },
    {
      "epoch": 41.21,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 214632
    },
    {
      "epoch": 41.21,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 214644
    },
    {
      "epoch": 41.22,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 214656
    },
    {
      "epoch": 41.22,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 214668
    },
    {
      "epoch": 41.22,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 214680
    },
    {
      "epoch": 41.22,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 214692
    },
    {
      "epoch": 41.23,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 214704
    },
    {
      "epoch": 41.23,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 214716
    },
    {
      "epoch": 41.23,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 214728
    },
    {
      "epoch": 41.23,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 214740
    },
    {
      "epoch": 41.24,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 214752
    },
    {
      "epoch": 41.24,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 214764
    },
    {
      "epoch": 41.24,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 214776
    },
    {
      "epoch": 41.24,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 214788
    },
    {
      "epoch": 41.24,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 214800
    },
    {
      "epoch": 41.25,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 214812
    },
    {
      "epoch": 41.25,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 214824
    },
    {
      "epoch": 41.25,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 214836
    },
    {
      "epoch": 41.25,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 214848
    },
    {
      "epoch": 41.26,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 214860
    },
    {
      "epoch": 41.26,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 214872
    },
    {
      "epoch": 41.26,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 214884
    },
    {
      "epoch": 41.26,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 214896
    },
    {
      "epoch": 41.26,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 214908
    },
    {
      "epoch": 41.27,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 214920
    },
    {
      "epoch": 41.27,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 214932
    },
    {
      "epoch": 41.27,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 214944
    },
    {
      "epoch": 41.27,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 214956
    },
    {
      "epoch": 41.28,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 214968
    },
    {
      "epoch": 41.28,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 214980
    },
    {
      "epoch": 41.28,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 214992
    },
    {
      "epoch": 41.28,
      "eval_ag_news_accuracy": 0.3286875,
      "eval_ag_news_bleu_score": 4.911106548055742,
      "eval_ag_news_bleu_score_sem": 0.14961079662746388,
      "eval_ag_news_emb_cos_sim": 0.8296934366226196,
      "eval_ag_news_emb_cos_sim_sem": 0.0058848492122872366,
      "eval_ag_news_emb_top1_equal": 0.2890625,
      "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4643592834472656,
      "eval_ag_news_n_ngrams_match_1": 14.346,
      "eval_ag_news_n_ngrams_match_2": 3.224,
      "eval_ag_news_n_ngrams_match_3": 0.902,
      "eval_ag_news_num_pred_words": 46.742,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.955978488105877,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3581928938976931,
      "eval_ag_news_runtime": 15.8479,
      "eval_ag_news_samples_per_second": 31.55,
      "eval_ag_news_steps_per_second": 0.063,
      "eval_ag_news_token_set_f1": 0.3568847740016171,
      "eval_ag_news_token_set_f1_sem": 0.004431182206229447,
      "eval_ag_news_token_set_precision": 0.3438032532362278,
      "eval_ag_news_token_set_recall": 0.3854205918315783,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 215000
    },
    {
      "epoch": 41.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.1150625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0956788601167107,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11819492799749814,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6837218403816223,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009700210454437634,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.20171856880188,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.32,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.362,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.574727287052745,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21645741784773337,
      "eval_anthropic_toxic_prompts_runtime": 27.1114,
      "eval_anthropic_toxic_prompts_samples_per_second": 18.442,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.037,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35735044911598524,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006285558259126718,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44404995470070413,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3261554018538246,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 215000
    },
    {
      "epoch": 41.28,
      "eval_arxiv_accuracy": 0.353625,
      "eval_arxiv_bleu_score": 4.539136264421966,
      "eval_arxiv_bleu_score_sem": 0.12932764220855839,
      "eval_arxiv_emb_cos_sim": 0.7805837988853455,
      "eval_arxiv_emb_cos_sim_sem": 0.006615535295485938,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3242313861846924,
      "eval_arxiv_n_ngrams_match_1": 15.794,
      "eval_arxiv_n_ngrams_match_2": 3.102,
      "eval_arxiv_n_ngrams_match_3": 0.7,
      "eval_arxiv_num_pred_words": 40.87,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.777640157526058,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37585548490655346,
      "eval_arxiv_runtime": 33.5625,
      "eval_arxiv_samples_per_second": 14.898,
      "eval_arxiv_steps_per_second": 0.03,
      "eval_arxiv_token_set_f1": 0.3692386947731102,
      "eval_arxiv_token_set_f1_sem": 0.0042843514083066355,
      "eval_arxiv_token_set_precision": 0.32221066340479737,
      "eval_arxiv_token_set_recall": 0.447671855288058,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 215000
    },
    {
      "epoch": 41.28,
      "eval_python_code_alpaca_accuracy": 0.164625,
      "eval_python_code_alpaca_bleu_score": 4.733642276204919,
      "eval_python_code_alpaca_bleu_score_sem": 0.15469169785408335,
      "eval_python_code_alpaca_emb_cos_sim": 0.7663385272026062,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007827949320452032,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8512251377105713,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.014,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.952,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.024,
      "eval_python_code_alpaca_num_pred_words": 43.668,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.308974733487993,
      "eval_python_code_alpaca_pred_num_tokens": 62.9921875,
      "eval_python_code_alpaca_rouge_score": 0.3413899149257977,
      "eval_python_code_alpaca_runtime": 16.8678,
      "eval_python_code_alpaca_samples_per_second": 29.642,
      "eval_python_code_alpaca_steps_per_second": 0.059,
      "eval_python_code_alpaca_token_set_f1": 0.4822155775355044,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054798389208696825,
      "eval_python_code_alpaca_token_set_precision": 0.5454143116803508,
      "eval_python_code_alpaca_token_set_recall": 0.45402405975938065,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 215000
    },
    {
      "epoch": 41.28,
      "eval_wikibio_accuracy": 0.33246875,
      "eval_wikibio_bleu_score": 6.354835710628173,
      "eval_wikibio_bleu_score_sem": 0.2192777670804854,
      "eval_wikibio_emb_cos_sim": 0.7575595378875732,
      "eval_wikibio_emb_cos_sim_sem": 0.008641644280337448,
      "eval_wikibio_emb_top1_equal": 0.265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03919146934646163,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6177446842193604,
      "eval_wikibio_n_ngrams_match_1": 10.304,
      "eval_wikibio_n_ngrams_match_2": 3.564,
      "eval_wikibio_n_ngrams_match_3": 1.338,
      "eval_wikibio_num_pred_words": 36.224,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.25345470260978,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36866684470874084,
      "eval_wikibio_runtime": 17.4541,
      "eval_wikibio_samples_per_second": 28.646,
      "eval_wikibio_steps_per_second": 0.057,
      "eval_wikibio_token_set_f1": 0.32775999538201855,
      "eval_wikibio_token_set_f1_sem": 0.005387724717493739,
      "eval_wikibio_token_set_precision": 0.33604664044376237,
      "eval_wikibio_token_set_recall": 0.3355184819412624,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 215000
    },
    {
      "epoch": 41.28,
      "eval_nq_accuracy": 0.53690625,
      "eval_nq_bleu_score": 12.167901512556057,
      "eval_nq_bleu_score_sem": 0.48271683559208606,
      "eval_nq_emb_cos_sim": 0.8382583260536194,
      "eval_nq_emb_cos_sim_sem": 0.007045135488179449,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1166770458221436,
      "eval_nq_n_ngrams_match_1": 23.418,
      "eval_nq_n_ngrams_match_2": 8.758,
      "eval_nq_n_ngrams_match_3": 4.096,
      "eval_nq_num_pred_words": 49.038,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.303499444973093,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45391975960684405,
      "eval_nq_runtime": 19.1117,
      "eval_nq_samples_per_second": 26.162,
      "eval_nq_steps_per_second": 0.052,
      "eval_nq_token_set_f1": 0.46770886768053815,
      "eval_nq_token_set_f1_sem": 0.004929054691161523,
      "eval_nq_token_set_precision": 0.42591227686410665,
      "eval_nq_token_set_recall": 0.5272969797722891,
      "eval_nq_true_num_tokens": 64.0,
      "step": 215000
    },
    {
      "epoch": 41.28,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 215004
    },
    {
      "epoch": 41.29,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 215016
    },
    {
      "epoch": 41.29,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 215028
    },
    {
      "epoch": 41.29,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 215040
    },
    {
      "epoch": 41.29,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 215052
    },
    {
      "epoch": 41.29,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 215064
    },
    {
      "epoch": 41.3,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 215076
    },
    {
      "epoch": 41.3,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 215088
    },
    {
      "epoch": 41.3,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 215100
    },
    {
      "epoch": 41.3,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 215112
    },
    {
      "epoch": 41.31,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 215124
    },
    {
      "epoch": 41.31,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 215136
    },
    {
      "epoch": 41.31,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 215148
    },
    {
      "epoch": 41.31,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 215160
    },
    {
      "epoch": 41.32,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 215172
    },
    {
      "epoch": 41.32,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 215184
    },
    {
      "epoch": 41.32,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 215196
    },
    {
      "epoch": 41.32,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 215208
    },
    {
      "epoch": 41.32,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 215220
    },
    {
      "epoch": 41.33,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 215232
    },
    {
      "epoch": 41.33,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 215244
    },
    {
      "epoch": 41.33,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 215256
    },
    {
      "epoch": 41.33,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 215268
    },
    {
      "epoch": 41.34,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 215280
    },
    {
      "epoch": 41.34,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 215292
    },
    {
      "epoch": 41.34,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 215304
    },
    {
      "epoch": 41.34,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 215316
    },
    {
      "epoch": 41.35,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 215328
    },
    {
      "epoch": 41.35,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 215340
    },
    {
      "epoch": 41.35,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 215352
    },
    {
      "epoch": 41.35,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 215364
    },
    {
      "epoch": 41.35,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 215376
    },
    {
      "epoch": 41.36,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 215388
    },
    {
      "epoch": 41.36,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 215400
    },
    {
      "epoch": 41.36,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 215412
    },
    {
      "epoch": 41.36,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 215424
    },
    {
      "epoch": 41.37,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 215436
    },
    {
      "epoch": 41.37,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 215448
    },
    {
      "epoch": 41.37,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 215460
    },
    {
      "epoch": 41.37,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 215472
    },
    {
      "epoch": 41.38,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 215484
    },
    {
      "epoch": 41.38,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 215496
    },
    {
      "epoch": 41.38,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 215508
    },
    {
      "epoch": 41.38,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 215520
    },
    {
      "epoch": 41.38,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 215532
    },
    {
      "epoch": 41.39,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 215544
    },
    {
      "epoch": 41.39,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 215556
    },
    {
      "epoch": 41.39,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 215568
    },
    {
      "epoch": 41.39,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 215580
    },
    {
      "epoch": 41.4,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 215592
    },
    {
      "epoch": 41.4,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 215604
    },
    {
      "epoch": 41.4,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 215616
    },
    {
      "epoch": 41.4,
      "eval_ag_news_accuracy": 0.32859375,
      "eval_ag_news_bleu_score": 4.894812245394238,
      "eval_ag_news_bleu_score_sem": 0.1531967238026543,
      "eval_ag_news_emb_cos_sim": 0.8191208243370056,
      "eval_ag_news_emb_cos_sim_sem": 0.006856653464549715,
      "eval_ag_news_emb_top1_equal": 0.2890625,
      "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4753947257995605,
      "eval_ag_news_n_ngrams_match_1": 14.278,
      "eval_ag_news_n_ngrams_match_2": 3.196,
      "eval_ag_news_n_ngrams_match_3": 0.934,
      "eval_ag_news_num_pred_words": 46.552,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.310579839272364,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35686990856875433,
      "eval_ag_news_runtime": 18.7791,
      "eval_ag_news_samples_per_second": 26.625,
      "eval_ag_news_steps_per_second": 0.053,
      "eval_ag_news_token_set_f1": 0.355913359291341,
      "eval_ag_news_token_set_f1_sem": 0.00457827121597649,
      "eval_ag_news_token_set_precision": 0.34176478967173335,
      "eval_ag_news_token_set_recall": 0.3846986494841942,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 215625
    },
    {
      "epoch": 41.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.11503125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0877630092569657,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1131846609571181,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6885554790496826,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009313639559895713,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.178467273712158,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.258,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.186,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.009924692248806,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21387175087449456,
      "eval_anthropic_toxic_prompts_runtime": 10.8721,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.989,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.092,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35715467711469967,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067049714524682075,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43857833331093654,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32829719280060493,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 215625
    },
    {
      "epoch": 41.4,
      "eval_arxiv_accuracy": 0.35271875,
      "eval_arxiv_bleu_score": 4.370168208140656,
      "eval_arxiv_bleu_score_sem": 0.12384095741061274,
      "eval_arxiv_emb_cos_sim": 0.7805910110473633,
      "eval_arxiv_emb_cos_sim_sem": 0.0067356505210194465,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.338520050048828,
      "eval_arxiv_n_ngrams_match_1": 15.608,
      "eval_arxiv_n_ngrams_match_2": 2.986,
      "eval_arxiv_n_ngrams_match_3": 0.652,
      "eval_arxiv_num_pred_words": 40.362,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.177394698476608,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37313406751281053,
      "eval_arxiv_runtime": 19.0748,
      "eval_arxiv_samples_per_second": 26.213,
      "eval_arxiv_steps_per_second": 0.052,
      "eval_arxiv_token_set_f1": 0.3670502874880372,
      "eval_arxiv_token_set_f1_sem": 0.004256057944297466,
      "eval_arxiv_token_set_precision": 0.3192087679268113,
      "eval_arxiv_token_set_recall": 0.4474315570824344,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 215625
    },
    {
      "epoch": 41.4,
      "eval_python_code_alpaca_accuracy": 0.163375,
      "eval_python_code_alpaca_bleu_score": 4.609929005459897,
      "eval_python_code_alpaca_bleu_score_sem": 0.15030918062664822,
      "eval_python_code_alpaca_emb_cos_sim": 0.7570693492889404,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008847097654482404,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8363194465637207,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.792,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.898,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.99,
      "eval_python_code_alpaca_num_pred_words": 43.442,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.052885834519543,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3333159120233796,
      "eval_python_code_alpaca_runtime": 22.6613,
      "eval_python_code_alpaca_samples_per_second": 22.064,
      "eval_python_code_alpaca_steps_per_second": 0.044,
      "eval_python_code_alpaca_token_set_f1": 0.48357523990704065,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005586495130759468,
      "eval_python_code_alpaca_token_set_precision": 0.5363463926542905,
      "eval_python_code_alpaca_token_set_recall": 0.46347628627462434,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 215625
    },
    {
      "epoch": 41.4,
      "eval_wikibio_accuracy": 0.3279375,
      "eval_wikibio_bleu_score": 6.334611971164382,
      "eval_wikibio_bleu_score_sem": 0.22534530571102912,
      "eval_wikibio_emb_cos_sim": 0.7648400068283081,
      "eval_wikibio_emb_cos_sim_sem": 0.00739425890781944,
      "eval_wikibio_emb_top1_equal": 0.2578125,
      "eval_wikibio_emb_top1_equal_sem": 0.038815656435002115,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6433780193328857,
      "eval_wikibio_n_ngrams_match_1": 10.366,
      "eval_wikibio_n_ngrams_match_2": 3.534,
      "eval_wikibio_n_ngrams_match_3": 1.392,
      "eval_wikibio_num_pred_words": 36.416,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.22072926446075,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.364576780067592,
      "eval_wikibio_runtime": 12.419,
      "eval_wikibio_samples_per_second": 40.261,
      "eval_wikibio_steps_per_second": 0.081,
      "eval_wikibio_token_set_f1": 0.32913321843529936,
      "eval_wikibio_token_set_f1_sem": 0.005290374367772688,
      "eval_wikibio_token_set_precision": 0.33872084585488477,
      "eval_wikibio_token_set_recall": 0.3356276853334591,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 215625
    },
    {
      "epoch": 41.4,
      "eval_nq_accuracy": 0.5360625,
      "eval_nq_bleu_score": 12.354282355795416,
      "eval_nq_bleu_score_sem": 0.4942688778690195,
      "eval_nq_emb_cos_sim": 0.8390867710113525,
      "eval_nq_emb_cos_sim_sem": 0.007167073564911053,
      "eval_nq_emb_top1_equal": 0.34375,
      "eval_nq_emb_top1_equal_sem": 0.04214578430296913,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.117419958114624,
      "eval_nq_n_ngrams_match_1": 23.732,
      "eval_nq_n_ngrams_match_2": 8.874,
      "eval_nq_n_ngrams_match_3": 4.138,
      "eval_nq_num_pred_words": 49.026,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.30967050877712,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46196326280853994,
      "eval_nq_runtime": 15.8886,
      "eval_nq_samples_per_second": 31.469,
      "eval_nq_steps_per_second": 0.063,
      "eval_nq_token_set_f1": 0.47229959673054156,
      "eval_nq_token_set_f1_sem": 0.004810090559089124,
      "eval_nq_token_set_precision": 0.43130310019788043,
      "eval_nq_token_set_recall": 0.529068390268912,
      "eval_nq_true_num_tokens": 64.0,
      "step": 215625
    },
    {
      "epoch": 41.4,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 215628
    },
    {
      "epoch": 41.41,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 215640
    },
    {
      "epoch": 41.41,
      "learning_rate": 0.001,
      "loss": 2.5066,
      "step": 215652
    },
    {
      "epoch": 41.41,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 215664
    },
    {
      "epoch": 41.41,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 215676
    },
    {
      "epoch": 41.41,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 215688
    },
    {
      "epoch": 41.42,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 215700
    },
    {
      "epoch": 41.42,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 215712
    },
    {
      "epoch": 41.42,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 215724
    },
    {
      "epoch": 41.42,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 215736
    },
    {
      "epoch": 41.43,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 215748
    },
    {
      "epoch": 41.43,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 215760
    },
    {
      "epoch": 41.43,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 215772
    },
    {
      "epoch": 41.43,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 215784
    },
    {
      "epoch": 41.44,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 215796
    },
    {
      "epoch": 41.44,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 215808
    },
    {
      "epoch": 41.44,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 215820
    },
    {
      "epoch": 41.44,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 215832
    },
    {
      "epoch": 41.44,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 215844
    },
    {
      "epoch": 41.45,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 215856
    },
    {
      "epoch": 41.45,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 215868
    },
    {
      "epoch": 41.45,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 215880
    },
    {
      "epoch": 41.45,
      "learning_rate": 0.001,
      "loss": 2.5051,
      "step": 215892
    },
    {
      "epoch": 41.46,
      "learning_rate": 0.001,
      "loss": 2.5038,
      "step": 215904
    },
    {
      "epoch": 41.46,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 215916
    },
    {
      "epoch": 41.46,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 215928
    },
    {
      "epoch": 41.46,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 215940
    },
    {
      "epoch": 41.47,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 215952
    },
    {
      "epoch": 41.47,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 215964
    },
    {
      "epoch": 41.47,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 215976
    },
    {
      "epoch": 41.47,
      "learning_rate": 0.001,
      "loss": 2.4756,
      "step": 215988
    },
    {
      "epoch": 41.47,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 216000
    },
    {
      "epoch": 41.48,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 216012
    },
    {
      "epoch": 41.48,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 216024
    },
    {
      "epoch": 41.48,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 216036
    },
    {
      "epoch": 41.48,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 216048
    },
    {
      "epoch": 41.49,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 216060
    },
    {
      "epoch": 41.49,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 216072
    },
    {
      "epoch": 41.49,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 216084
    },
    {
      "epoch": 41.49,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 216096
    },
    {
      "epoch": 41.5,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 216108
    },
    {
      "epoch": 41.5,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 216120
    },
    {
      "epoch": 41.5,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 216132
    },
    {
      "epoch": 41.5,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 216144
    },
    {
      "epoch": 41.5,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 216156
    },
    {
      "epoch": 41.51,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 216168
    },
    {
      "epoch": 41.51,
      "learning_rate": 0.001,
      "loss": 2.5087,
      "step": 216180
    },
    {
      "epoch": 41.51,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 216192
    },
    {
      "epoch": 41.51,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 216204
    },
    {
      "epoch": 41.52,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 216216
    },
    {
      "epoch": 41.52,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 216228
    },
    {
      "epoch": 41.52,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 216240
    },
    {
      "epoch": 41.52,
      "eval_ag_news_accuracy": 0.32965625,
      "eval_ag_news_bleu_score": 4.975507926974853,
      "eval_ag_news_bleu_score_sem": 0.16282958648646803,
      "eval_ag_news_emb_cos_sim": 0.819546103477478,
      "eval_ag_news_emb_cos_sim_sem": 0.006797881659934797,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.461921453475952,
      "eval_ag_news_n_ngrams_match_1": 14.562,
      "eval_ag_news_n_ngrams_match_2": 3.268,
      "eval_ag_news_n_ngrams_match_3": 0.958,
      "eval_ag_news_num_pred_words": 46.948,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.878170126297825,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36062709276859495,
      "eval_ag_news_runtime": 16.6294,
      "eval_ag_news_samples_per_second": 30.067,
      "eval_ag_news_steps_per_second": 0.06,
      "eval_ag_news_token_set_f1": 0.3587889977136666,
      "eval_ag_news_token_set_f1_sem": 0.004344475204137445,
      "eval_ag_news_token_set_precision": 0.34800964407998963,
      "eval_ag_news_token_set_recall": 0.38283276987761256,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 216250
    },
    {
      "epoch": 41.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.11634375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.154174211135589,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12056889723607998,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6729702353477478,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009214522025999235,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.179611921310425,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.166,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.924,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.726,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.774,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.037423330004746,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21408151939438294,
      "eval_anthropic_toxic_prompts_runtime": 11.596,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.118,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.086,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35509446971774833,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006623083033777618,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4339126628588507,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3270439195951161,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 216250
    },
    {
      "epoch": 41.52,
      "eval_arxiv_accuracy": 0.35446875,
      "eval_arxiv_bleu_score": 4.6568916794792266,
      "eval_arxiv_bleu_score_sem": 0.13509631913722348,
      "eval_arxiv_emb_cos_sim": 0.7880792617797852,
      "eval_arxiv_emb_cos_sim_sem": 0.007674974699603709,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.325824499130249,
      "eval_arxiv_n_ngrams_match_1": 15.91,
      "eval_arxiv_n_ngrams_match_2": 3.252,
      "eval_arxiv_n_ngrams_match_3": 0.754,
      "eval_arxiv_num_pred_words": 41.314,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.82192834433291,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3792920545631109,
      "eval_arxiv_runtime": 12.6402,
      "eval_arxiv_samples_per_second": 39.556,
      "eval_arxiv_steps_per_second": 0.079,
      "eval_arxiv_token_set_f1": 0.3700239828577745,
      "eval_arxiv_token_set_f1_sem": 0.00429736415187324,
      "eval_arxiv_token_set_precision": 0.3253997234839459,
      "eval_arxiv_token_set_recall": 0.44514193106922234,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 216250
    },
    {
      "epoch": 41.52,
      "eval_python_code_alpaca_accuracy": 0.16440625,
      "eval_python_code_alpaca_bleu_score": 4.758354599599039,
      "eval_python_code_alpaca_bleu_score_sem": 0.14804902514791038,
      "eval_python_code_alpaca_emb_cos_sim": 0.7701547145843506,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007143228985366015,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8359646797180176,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.98,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.964,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.992,
      "eval_python_code_alpaca_num_pred_words": 42.522,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.04683710900897,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.341995131297409,
      "eval_python_code_alpaca_runtime": 12.2223,
      "eval_python_code_alpaca_samples_per_second": 40.909,
      "eval_python_code_alpaca_steps_per_second": 0.082,
      "eval_python_code_alpaca_token_set_f1": 0.48032662453752833,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005443734611848907,
      "eval_python_code_alpaca_token_set_precision": 0.545062432059744,
      "eval_python_code_alpaca_token_set_recall": 0.4513832325149613,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 216250
    },
    {
      "epoch": 41.52,
      "eval_wikibio_accuracy": 0.33096875,
      "eval_wikibio_bleu_score": 6.133919956002912,
      "eval_wikibio_bleu_score_sem": 0.22972306645858473,
      "eval_wikibio_emb_cos_sim": 0.7307310104370117,
      "eval_wikibio_emb_cos_sim_sem": 0.010424130651361321,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6156070232391357,
      "eval_wikibio_n_ngrams_match_1": 10.212,
      "eval_wikibio_n_ngrams_match_2": 3.484,
      "eval_wikibio_n_ngrams_match_3": 1.32,
      "eval_wikibio_num_pred_words": 35.858,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.17390450208594,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3567853559953841,
      "eval_wikibio_runtime": 11.6035,
      "eval_wikibio_samples_per_second": 43.09,
      "eval_wikibio_steps_per_second": 0.086,
      "eval_wikibio_token_set_f1": 0.32210554516742845,
      "eval_wikibio_token_set_f1_sem": 0.005619655367232886,
      "eval_wikibio_token_set_precision": 0.3316311373522093,
      "eval_wikibio_token_set_recall": 0.330802615239244,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 216250
    },
    {
      "epoch": 41.52,
      "eval_nq_accuracy": 0.537375,
      "eval_nq_bleu_score": 12.47472674728733,
      "eval_nq_bleu_score_sem": 0.5044904634964353,
      "eval_nq_emb_cos_sim": 0.8439424633979797,
      "eval_nq_emb_cos_sim_sem": 0.006934180616751981,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1153202056884766,
      "eval_nq_n_ngrams_match_1": 23.6,
      "eval_nq_n_ngrams_match_2": 8.864,
      "eval_nq_n_ngrams_match_3": 4.226,
      "eval_nq_num_pred_words": 49.226,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.292240563654603,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4573645291481141,
      "eval_nq_runtime": 12.5055,
      "eval_nq_samples_per_second": 39.982,
      "eval_nq_steps_per_second": 0.08,
      "eval_nq_token_set_f1": 0.46811466852358713,
      "eval_nq_token_set_f1_sem": 0.004987963608760948,
      "eval_nq_token_set_precision": 0.429554826873254,
      "eval_nq_token_set_recall": 0.5219791979977839,
      "eval_nq_true_num_tokens": 64.0,
      "step": 216250
    },
    {
      "epoch": 41.52,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 216252
    },
    {
      "epoch": 41.53,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 216264
    },
    {
      "epoch": 41.53,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 216276
    },
    {
      "epoch": 41.53,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 216288
    },
    {
      "epoch": 41.53,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 216300
    },
    {
      "epoch": 41.53,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 216312
    },
    {
      "epoch": 41.54,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 216324
    },
    {
      "epoch": 41.54,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 216336
    },
    {
      "epoch": 41.54,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 216348
    },
    {
      "epoch": 41.54,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 216360
    },
    {
      "epoch": 41.55,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 216372
    },
    {
      "epoch": 41.55,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 216384
    },
    {
      "epoch": 41.55,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 216396
    },
    {
      "epoch": 41.55,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 216408
    },
    {
      "epoch": 41.56,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 216420
    },
    {
      "epoch": 41.56,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 216432
    },
    {
      "epoch": 41.56,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 216444
    },
    {
      "epoch": 41.56,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 216456
    },
    {
      "epoch": 41.56,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 216468
    },
    {
      "epoch": 41.57,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 216480
    },
    {
      "epoch": 41.57,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 216492
    },
    {
      "epoch": 41.57,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 216504
    },
    {
      "epoch": 41.57,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 216516
    },
    {
      "epoch": 41.58,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 216528
    },
    {
      "epoch": 41.58,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 216540
    },
    {
      "epoch": 41.58,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 216552
    },
    {
      "epoch": 41.58,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 216564
    },
    {
      "epoch": 41.59,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 216576
    },
    {
      "epoch": 41.59,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 216588
    },
    {
      "epoch": 41.59,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 216600
    },
    {
      "epoch": 41.59,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 216612
    },
    {
      "epoch": 41.59,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 216624
    },
    {
      "epoch": 41.6,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 216636
    },
    {
      "epoch": 41.6,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 216648
    },
    {
      "epoch": 41.6,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 216660
    },
    {
      "epoch": 41.6,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 216672
    },
    {
      "epoch": 41.61,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 216684
    },
    {
      "epoch": 41.61,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 216696
    },
    {
      "epoch": 41.61,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 216708
    },
    {
      "epoch": 41.61,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 216720
    },
    {
      "epoch": 41.62,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 216732
    },
    {
      "epoch": 41.62,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 216744
    },
    {
      "epoch": 41.62,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 216756
    },
    {
      "epoch": 41.62,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 216768
    },
    {
      "epoch": 41.62,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 216780
    },
    {
      "epoch": 41.63,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 216792
    },
    {
      "epoch": 41.63,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 216804
    },
    {
      "epoch": 41.63,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 216816
    },
    {
      "epoch": 41.63,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 216828
    },
    {
      "epoch": 41.64,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 216840
    },
    {
      "epoch": 41.64,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 216852
    },
    {
      "epoch": 41.64,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 216864
    },
    {
      "epoch": 41.64,
      "eval_ag_news_accuracy": 0.32953125,
      "eval_ag_news_bleu_score": 4.887446146470891,
      "eval_ag_news_bleu_score_sem": 0.15133356751567484,
      "eval_ag_news_emb_cos_sim": 0.8248995542526245,
      "eval_ag_news_emb_cos_sim_sem": 0.006077720435062675,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.46960186958313,
      "eval_ag_news_n_ngrams_match_1": 14.484,
      "eval_ag_news_n_ngrams_match_2": 3.22,
      "eval_ag_news_n_ngrams_match_3": 0.932,
      "eval_ag_news_num_pred_words": 47.306,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.123950376711434,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3605940607231198,
      "eval_ag_news_runtime": 13.4946,
      "eval_ag_news_samples_per_second": 37.052,
      "eval_ag_news_steps_per_second": 0.074,
      "eval_ag_news_token_set_f1": 0.35857620706777227,
      "eval_ag_news_token_set_f1_sem": 0.00450405740569866,
      "eval_ag_news_token_set_precision": 0.34563491624333487,
      "eval_ag_news_token_set_recall": 0.3842417877920185,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 216875
    },
    {
      "epoch": 41.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.1150625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2513411431816186,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.126892913681169,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6807286143302917,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008860512206110012,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.204263210296631,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.346,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.892,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.637340788500286,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2195885444428198,
      "eval_anthropic_toxic_prompts_runtime": 11.038,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.298,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.091,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36068211898830227,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065099890695187665,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4471870388666266,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32668230122323505,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 216875
    },
    {
      "epoch": 41.64,
      "eval_arxiv_accuracy": 0.35315625,
      "eval_arxiv_bleu_score": 4.477254602161336,
      "eval_arxiv_bleu_score_sem": 0.12972708225207144,
      "eval_arxiv_emb_cos_sim": 0.7866698503494263,
      "eval_arxiv_emb_cos_sim_sem": 0.00649461071875212,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.334129571914673,
      "eval_arxiv_n_ngrams_match_1": 15.7,
      "eval_arxiv_n_ngrams_match_2": 3.066,
      "eval_arxiv_n_ngrams_match_3": 0.68,
      "eval_arxiv_num_pred_words": 41.072,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.053953644091038,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3719643842534974,
      "eval_arxiv_runtime": 12.3489,
      "eval_arxiv_samples_per_second": 40.49,
      "eval_arxiv_steps_per_second": 0.081,
      "eval_arxiv_token_set_f1": 0.3657464384572065,
      "eval_arxiv_token_set_f1_sem": 0.004237910241118956,
      "eval_arxiv_token_set_precision": 0.31950822512058513,
      "eval_arxiv_token_set_recall": 0.44483633726071475,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 216875
    },
    {
      "epoch": 41.64,
      "eval_python_code_alpaca_accuracy": 0.1628125,
      "eval_python_code_alpaca_bleu_score": 4.724090232263869,
      "eval_python_code_alpaca_bleu_score_sem": 0.14421954033702714,
      "eval_python_code_alpaca_emb_cos_sim": 0.7643148899078369,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0076521653520252204,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8600780963897705,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.016,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.97,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.988,
      "eval_python_code_alpaca_num_pred_words": 42.9,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.46289067204433,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34516917348252074,
      "eval_python_code_alpaca_runtime": 15.4619,
      "eval_python_code_alpaca_samples_per_second": 32.338,
      "eval_python_code_alpaca_steps_per_second": 0.065,
      "eval_python_code_alpaca_token_set_f1": 0.4861581536419704,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005400821756556567,
      "eval_python_code_alpaca_token_set_precision": 0.5497370924112553,
      "eval_python_code_alpaca_token_set_recall": 0.4574293568838186,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 216875
    },
    {
      "epoch": 41.64,
      "eval_wikibio_accuracy": 0.32853125,
      "eval_wikibio_bleu_score": 6.049140432372366,
      "eval_wikibio_bleu_score_sem": 0.2281769783081609,
      "eval_wikibio_emb_cos_sim": 0.7500406503677368,
      "eval_wikibio_emb_cos_sim_sem": 0.010314869556408384,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.629960298538208,
      "eval_wikibio_n_ngrams_match_1": 10.076,
      "eval_wikibio_n_ngrams_match_2": 3.428,
      "eval_wikibio_n_ngrams_match_3": 1.28,
      "eval_wikibio_num_pred_words": 35.682,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.71131939295495,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3565092139045687,
      "eval_wikibio_runtime": 11.28,
      "eval_wikibio_samples_per_second": 44.326,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.3204860351212683,
      "eval_wikibio_token_set_f1_sem": 0.005776137581377932,
      "eval_wikibio_token_set_precision": 0.32676062238929077,
      "eval_wikibio_token_set_recall": 0.3340286328140021,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 216875
    },
    {
      "epoch": 41.64,
      "eval_nq_accuracy": 0.5380625,
      "eval_nq_bleu_score": 12.163553386979748,
      "eval_nq_bleu_score_sem": 0.490096609792054,
      "eval_nq_emb_cos_sim": 0.8416872024536133,
      "eval_nq_emb_cos_sim_sem": 0.0067169327104690535,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.117141008377075,
      "eval_nq_n_ngrams_match_1": 23.504,
      "eval_nq_n_ngrams_match_2": 8.782,
      "eval_nq_n_ngrams_match_3": 4.054,
      "eval_nq_num_pred_words": 49.368,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.307352851639534,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4560129468395967,
      "eval_nq_runtime": 11.6033,
      "eval_nq_samples_per_second": 43.091,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4682685039993215,
      "eval_nq_token_set_f1_sem": 0.004955362930816411,
      "eval_nq_token_set_precision": 0.4261815413465682,
      "eval_nq_token_set_recall": 0.5267050085226828,
      "eval_nq_true_num_tokens": 64.0,
      "step": 216875
    },
    {
      "epoch": 41.64,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 216876
    },
    {
      "epoch": 41.65,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 216888
    },
    {
      "epoch": 41.65,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 216900
    },
    {
      "epoch": 41.65,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 216912
    },
    {
      "epoch": 41.65,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 216924
    },
    {
      "epoch": 41.65,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 216936
    },
    {
      "epoch": 41.66,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 216948
    },
    {
      "epoch": 41.66,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 216960
    },
    {
      "epoch": 41.66,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 216972
    },
    {
      "epoch": 41.66,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 216984
    },
    {
      "epoch": 41.67,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 216996
    },
    {
      "epoch": 41.67,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 217008
    },
    {
      "epoch": 41.67,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 217020
    },
    {
      "epoch": 41.67,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 217032
    },
    {
      "epoch": 41.68,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 217044
    },
    {
      "epoch": 41.68,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 217056
    },
    {
      "epoch": 41.68,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 217068
    },
    {
      "epoch": 41.68,
      "learning_rate": 0.001,
      "loss": 2.5045,
      "step": 217080
    },
    {
      "epoch": 41.68,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 217092
    },
    {
      "epoch": 41.69,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 217104
    },
    {
      "epoch": 41.69,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 217116
    },
    {
      "epoch": 41.69,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 217128
    },
    {
      "epoch": 41.69,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 217140
    },
    {
      "epoch": 41.7,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 217152
    },
    {
      "epoch": 41.7,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 217164
    },
    {
      "epoch": 41.7,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 217176
    },
    {
      "epoch": 41.7,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 217188
    },
    {
      "epoch": 41.71,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 217200
    },
    {
      "epoch": 41.71,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 217212
    },
    {
      "epoch": 41.71,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 217224
    },
    {
      "epoch": 41.71,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 217236
    },
    {
      "epoch": 41.71,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 217248
    },
    {
      "epoch": 41.72,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 217260
    },
    {
      "epoch": 41.72,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 217272
    },
    {
      "epoch": 41.72,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 217284
    },
    {
      "epoch": 41.72,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 217296
    },
    {
      "epoch": 41.73,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 217308
    },
    {
      "epoch": 41.73,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 217320
    },
    {
      "epoch": 41.73,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 217332
    },
    {
      "epoch": 41.73,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 217344
    },
    {
      "epoch": 41.74,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 217356
    },
    {
      "epoch": 41.74,
      "learning_rate": 0.001,
      "loss": 2.5048,
      "step": 217368
    },
    {
      "epoch": 41.74,
      "learning_rate": 0.001,
      "loss": 2.5052,
      "step": 217380
    },
    {
      "epoch": 41.74,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 217392
    },
    {
      "epoch": 41.74,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 217404
    },
    {
      "epoch": 41.75,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 217416
    },
    {
      "epoch": 41.75,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 217428
    },
    {
      "epoch": 41.75,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 217440
    },
    {
      "epoch": 41.75,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 217452
    },
    {
      "epoch": 41.76,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 217464
    },
    {
      "epoch": 41.76,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 217476
    },
    {
      "epoch": 41.76,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 217488
    },
    {
      "epoch": 41.76,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 217500
    },
    {
      "epoch": 41.76,
      "eval_ag_news_accuracy": 0.32859375,
      "eval_ag_news_bleu_score": 4.995583157547209,
      "eval_ag_news_bleu_score_sem": 0.1596756936655915,
      "eval_ag_news_emb_cos_sim": 0.8249098062515259,
      "eval_ag_news_emb_cos_sim_sem": 0.006520750325907478,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4698293209075928,
      "eval_ag_news_n_ngrams_match_1": 14.552,
      "eval_ag_news_n_ngrams_match_2": 3.266,
      "eval_ag_news_n_ngrams_match_3": 0.896,
      "eval_ag_news_num_pred_words": 47.264,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.13125784278652,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3614383583803509,
      "eval_ag_news_runtime": 16.9613,
      "eval_ag_news_samples_per_second": 29.479,
      "eval_ag_news_steps_per_second": 0.059,
      "eval_ag_news_token_set_f1": 0.3588871070810427,
      "eval_ag_news_token_set_f1_sem": 0.004512316867045151,
      "eval_ag_news_token_set_precision": 0.3472791618544491,
      "eval_ag_news_token_set_recall": 0.38478773813664385,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 217500
    },
    {
      "epoch": 41.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.11625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1188430433525545,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11125796945082382,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6867282390594482,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009132653089855839,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1896812915802,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.292,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.976,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.71,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.836,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.280687750182906,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21645242415468985,
      "eval_anthropic_toxic_prompts_runtime": 11.2998,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.249,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.088,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3606826922836969,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006406130219475097,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44734410281782194,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32859232847931197,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 217500
    },
    {
      "epoch": 41.76,
      "eval_arxiv_accuracy": 0.35484375,
      "eval_arxiv_bleu_score": 4.349381927634081,
      "eval_arxiv_bleu_score_sem": 0.12261669041775945,
      "eval_arxiv_emb_cos_sim": 0.7795267105102539,
      "eval_arxiv_emb_cos_sim_sem": 0.006579561643192874,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3339083194732666,
      "eval_arxiv_n_ngrams_match_1": 15.524,
      "eval_arxiv_n_ngrams_match_2": 2.992,
      "eval_arxiv_n_ngrams_match_3": 0.63,
      "eval_arxiv_num_pred_words": 41.052,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.047747324963133,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3708006821196053,
      "eval_arxiv_runtime": 11.5819,
      "eval_arxiv_samples_per_second": 43.171,
      "eval_arxiv_steps_per_second": 0.086,
      "eval_arxiv_token_set_f1": 0.36223340003084625,
      "eval_arxiv_token_set_f1_sem": 0.004068027199631216,
      "eval_arxiv_token_set_precision": 0.31482838317608736,
      "eval_arxiv_token_set_recall": 0.4432528440056383,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 217500
    },
    {
      "epoch": 41.76,
      "eval_python_code_alpaca_accuracy": 0.16503125,
      "eval_python_code_alpaca_bleu_score": 4.934751460771646,
      "eval_python_code_alpaca_bleu_score_sem": 0.1560997454553954,
      "eval_python_code_alpaca_emb_cos_sim": 0.7645458579063416,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007737975677048893,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.819257974624634,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.146,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.216,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.152,
      "eval_python_code_alpaca_num_pred_words": 44.776,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.76440644076113,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3379222997135297,
      "eval_python_code_alpaca_runtime": 11.3243,
      "eval_python_code_alpaca_samples_per_second": 44.153,
      "eval_python_code_alpaca_steps_per_second": 0.088,
      "eval_python_code_alpaca_token_set_f1": 0.4850597834122426,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005418792979596099,
      "eval_python_code_alpaca_token_set_precision": 0.5567010566560711,
      "eval_python_code_alpaca_token_set_recall": 0.4485105211120921,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 217500
    },
    {
      "epoch": 41.76,
      "eval_wikibio_accuracy": 0.32784375,
      "eval_wikibio_bleu_score": 6.1123385861585655,
      "eval_wikibio_bleu_score_sem": 0.22337905477189268,
      "eval_wikibio_emb_cos_sim": 0.7536593675613403,
      "eval_wikibio_emb_cos_sim_sem": 0.009892973017369255,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6195919513702393,
      "eval_wikibio_n_ngrams_match_1": 10.164,
      "eval_wikibio_n_ngrams_match_2": 3.456,
      "eval_wikibio_n_ngrams_match_3": 1.268,
      "eval_wikibio_num_pred_words": 36.198,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.322335386663816,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36250165984266763,
      "eval_wikibio_runtime": 11.5349,
      "eval_wikibio_samples_per_second": 43.347,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.3232677307099517,
      "eval_wikibio_token_set_f1_sem": 0.005515271360753612,
      "eval_wikibio_token_set_precision": 0.3308972985618345,
      "eval_wikibio_token_set_recall": 0.3323116846259824,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 217500
    },
    {
      "epoch": 41.76,
      "eval_nq_accuracy": 0.5384375,
      "eval_nq_bleu_score": 12.172826344679459,
      "eval_nq_bleu_score_sem": 0.49596154815355714,
      "eval_nq_emb_cos_sim": 0.8395051956176758,
      "eval_nq_emb_cos_sim_sem": 0.007189846033997096,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.113952159881592,
      "eval_nq_n_ngrams_match_1": 23.578,
      "eval_nq_n_ngrams_match_2": 8.722,
      "eval_nq_n_ngrams_match_3": 4.05,
      "eval_nq_num_pred_words": 49.268,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.280904154853133,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4568493131470111,
      "eval_nq_runtime": 11.7851,
      "eval_nq_samples_per_second": 42.426,
      "eval_nq_steps_per_second": 0.085,
      "eval_nq_token_set_f1": 0.46812867341981784,
      "eval_nq_token_set_f1_sem": 0.004992514109594052,
      "eval_nq_token_set_precision": 0.42765309099848514,
      "eval_nq_token_set_recall": 0.5251882959590899,
      "eval_nq_true_num_tokens": 64.0,
      "step": 217500
    },
    {
      "epoch": 41.76,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 217512
    },
    {
      "epoch": 41.77,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 217524
    },
    {
      "epoch": 41.77,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 217536
    },
    {
      "epoch": 41.77,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 217548
    },
    {
      "epoch": 41.77,
      "learning_rate": 0.001,
      "loss": 2.505,
      "step": 217560
    },
    {
      "epoch": 41.78,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 217572
    },
    {
      "epoch": 41.78,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 217584
    },
    {
      "epoch": 41.78,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 217596
    },
    {
      "epoch": 41.78,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 217608
    },
    {
      "epoch": 41.79,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 217620
    },
    {
      "epoch": 41.79,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 217632
    },
    {
      "epoch": 41.79,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 217644
    },
    {
      "epoch": 41.79,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 217656
    },
    {
      "epoch": 41.79,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 217668
    },
    {
      "epoch": 41.8,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 217680
    },
    {
      "epoch": 41.8,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 217692
    },
    {
      "epoch": 41.8,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 217704
    },
    {
      "epoch": 41.8,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 217716
    },
    {
      "epoch": 41.81,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 217728
    },
    {
      "epoch": 41.81,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 217740
    },
    {
      "epoch": 41.81,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 217752
    },
    {
      "epoch": 41.81,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 217764
    },
    {
      "epoch": 41.82,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 217776
    },
    {
      "epoch": 41.82,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 217788
    },
    {
      "epoch": 41.82,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 217800
    },
    {
      "epoch": 41.82,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 217812
    },
    {
      "epoch": 41.82,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 217824
    },
    {
      "epoch": 41.83,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 217836
    },
    {
      "epoch": 41.83,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 217848
    },
    {
      "epoch": 41.83,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 217860
    },
    {
      "epoch": 41.83,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 217872
    },
    {
      "epoch": 41.84,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 217884
    },
    {
      "epoch": 41.84,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 217896
    },
    {
      "epoch": 41.84,
      "learning_rate": 0.001,
      "loss": 2.5026,
      "step": 217908
    },
    {
      "epoch": 41.84,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 217920
    },
    {
      "epoch": 41.85,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 217932
    },
    {
      "epoch": 41.85,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 217944
    },
    {
      "epoch": 41.85,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 217956
    },
    {
      "epoch": 41.85,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 217968
    },
    {
      "epoch": 41.85,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 217980
    },
    {
      "epoch": 41.86,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 217992
    },
    {
      "epoch": 41.86,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 218004
    },
    {
      "epoch": 41.86,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 218016
    },
    {
      "epoch": 41.86,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 218028
    },
    {
      "epoch": 41.87,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 218040
    },
    {
      "epoch": 41.87,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 218052
    },
    {
      "epoch": 41.87,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 218064
    },
    {
      "epoch": 41.87,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 218076
    },
    {
      "epoch": 41.88,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 218088
    },
    {
      "epoch": 41.88,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 218100
    },
    {
      "epoch": 41.88,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 218112
    },
    {
      "epoch": 41.88,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 218124
    },
    {
      "epoch": 41.88,
      "eval_ag_news_accuracy": 0.3290625,
      "eval_ag_news_bleu_score": 5.09083726930891,
      "eval_ag_news_bleu_score_sem": 0.15831070234832995,
      "eval_ag_news_emb_cos_sim": 0.8283110857009888,
      "eval_ag_news_emb_cos_sim_sem": 0.005814721466176664,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4672694206237793,
      "eval_ag_news_n_ngrams_match_1": 14.598,
      "eval_ag_news_n_ngrams_match_2": 3.33,
      "eval_ag_news_n_ngrams_match_3": 0.968,
      "eval_ag_news_num_pred_words": 46.674,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.049110216441534,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3625376925528374,
      "eval_ag_news_runtime": 12.3344,
      "eval_ag_news_samples_per_second": 40.537,
      "eval_ag_news_steps_per_second": 0.081,
      "eval_ag_news_token_set_f1": 0.3626152811554046,
      "eval_ag_news_token_set_f1_sem": 0.004335993821420875,
      "eval_ag_news_token_set_precision": 0.34905871442261815,
      "eval_ag_news_token_set_recall": 0.38912115175818046,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 218125
    },
    {
      "epoch": 41.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.1161875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.249123508989452,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12067826666755373,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6899729371070862,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00905122274435476,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1833913326263428,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.418,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.594,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.128442530899807,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2208748617019638,
      "eval_anthropic_toxic_prompts_runtime": 11.1337,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.909,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.09,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3657788793315223,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006582693540461152,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4565585661968616,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33091982162053835,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 218125
    },
    {
      "epoch": 41.88,
      "eval_arxiv_accuracy": 0.353375,
      "eval_arxiv_bleu_score": 4.370257328071202,
      "eval_arxiv_bleu_score_sem": 0.1334749586855463,
      "eval_arxiv_emb_cos_sim": 0.7769792079925537,
      "eval_arxiv_emb_cos_sim_sem": 0.007613431739699201,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.322906732559204,
      "eval_arxiv_n_ngrams_match_1": 15.376,
      "eval_arxiv_n_ngrams_match_2": 3.012,
      "eval_arxiv_n_ngrams_match_3": 0.686,
      "eval_arxiv_num_pred_words": 40.202,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.740868765839455,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36781700390140043,
      "eval_arxiv_runtime": 13.6106,
      "eval_arxiv_samples_per_second": 36.736,
      "eval_arxiv_steps_per_second": 0.073,
      "eval_arxiv_token_set_f1": 0.36137802291831084,
      "eval_arxiv_token_set_f1_sem": 0.0043563007602366866,
      "eval_arxiv_token_set_precision": 0.31262204657687714,
      "eval_arxiv_token_set_recall": 0.4488679895227799,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 218125
    },
    {
      "epoch": 41.88,
      "eval_python_code_alpaca_accuracy": 0.16365625,
      "eval_python_code_alpaca_bleu_score": 4.749160743792896,
      "eval_python_code_alpaca_bleu_score_sem": 0.14927533323434977,
      "eval_python_code_alpaca_emb_cos_sim": 0.7650162577629089,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007978047418889484,
      "eval_python_code_alpaca_emb_top1_equal": 0.1015625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8195087909698486,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.836,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.984,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.996,
      "eval_python_code_alpaca_num_pred_words": 42.47,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.76861175527306,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3406014475336757,
      "eval_python_code_alpaca_runtime": 16.6532,
      "eval_python_code_alpaca_samples_per_second": 30.024,
      "eval_python_code_alpaca_steps_per_second": 0.06,
      "eval_python_code_alpaca_token_set_f1": 0.48095663814900064,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005580266254246061,
      "eval_python_code_alpaca_token_set_precision": 0.5385783192099336,
      "eval_python_code_alpaca_token_set_recall": 0.45711145748608706,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 218125
    },
    {
      "epoch": 41.88,
      "eval_wikibio_accuracy": 0.32821875,
      "eval_wikibio_bleu_score": 6.353924405723115,
      "eval_wikibio_bleu_score_sem": 0.22379463979548128,
      "eval_wikibio_emb_cos_sim": 0.7559791803359985,
      "eval_wikibio_emb_cos_sim_sem": 0.009643657944814617,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.631709098815918,
      "eval_wikibio_n_ngrams_match_1": 10.236,
      "eval_wikibio_n_ngrams_match_2": 3.512,
      "eval_wikibio_n_ngrams_match_3": 1.356,
      "eval_wikibio_num_pred_words": 35.81,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.77732665872199,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36379702726230145,
      "eval_wikibio_runtime": 12.6298,
      "eval_wikibio_samples_per_second": 39.589,
      "eval_wikibio_steps_per_second": 0.079,
      "eval_wikibio_token_set_f1": 0.3260040864467825,
      "eval_wikibio_token_set_f1_sem": 0.005482797619439678,
      "eval_wikibio_token_set_precision": 0.334243061565777,
      "eval_wikibio_token_set_recall": 0.33423554439435343,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 218125
    },
    {
      "epoch": 41.88,
      "eval_nq_accuracy": 0.5383125,
      "eval_nq_bleu_score": 12.320078189725463,
      "eval_nq_bleu_score_sem": 0.5029188997077199,
      "eval_nq_emb_cos_sim": 0.8397891521453857,
      "eval_nq_emb_cos_sim_sem": 0.007033134173065972,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.116191864013672,
      "eval_nq_n_ngrams_match_1": 23.524,
      "eval_nq_n_ngrams_match_2": 8.742,
      "eval_nq_n_ngrams_match_3": 4.15,
      "eval_nq_num_pred_words": 49.086,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.29947171526534,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45623088664809053,
      "eval_nq_runtime": 12.9591,
      "eval_nq_samples_per_second": 38.583,
      "eval_nq_steps_per_second": 0.077,
      "eval_nq_token_set_f1": 0.4718029099495432,
      "eval_nq_token_set_f1_sem": 0.004986500594134429,
      "eval_nq_token_set_precision": 0.4298493940720233,
      "eval_nq_token_set_recall": 0.5313249893367641,
      "eval_nq_true_num_tokens": 64.0,
      "step": 218125
    },
    {
      "epoch": 41.88,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 218136
    },
    {
      "epoch": 41.89,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 218148
    },
    {
      "epoch": 41.89,
      "learning_rate": 0.001,
      "loss": 2.5099,
      "step": 218160
    },
    {
      "epoch": 41.89,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 218172
    },
    {
      "epoch": 41.89,
      "learning_rate": 0.001,
      "loss": 2.5059,
      "step": 218184
    },
    {
      "epoch": 41.9,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 218196
    },
    {
      "epoch": 41.9,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 218208
    },
    {
      "epoch": 41.9,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 218220
    },
    {
      "epoch": 41.9,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 218232
    },
    {
      "epoch": 41.91,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 218244
    },
    {
      "epoch": 41.91,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 218256
    },
    {
      "epoch": 41.91,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 218268
    },
    {
      "epoch": 41.91,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 218280
    },
    {
      "epoch": 41.91,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 218292
    },
    {
      "epoch": 41.92,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 218304
    },
    {
      "epoch": 41.92,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 218316
    },
    {
      "epoch": 41.92,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 218328
    },
    {
      "epoch": 41.92,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 218340
    },
    {
      "epoch": 41.93,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 218352
    },
    {
      "epoch": 41.93,
      "learning_rate": 0.001,
      "loss": 2.5031,
      "step": 218364
    },
    {
      "epoch": 41.93,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 218376
    },
    {
      "epoch": 41.93,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 218388
    },
    {
      "epoch": 41.94,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 218400
    },
    {
      "epoch": 41.94,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 218412
    },
    {
      "epoch": 41.94,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 218424
    },
    {
      "epoch": 41.94,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 218436
    },
    {
      "epoch": 41.94,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 218448
    },
    {
      "epoch": 41.95,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 218460
    },
    {
      "epoch": 41.95,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 218472
    },
    {
      "epoch": 41.95,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 218484
    },
    {
      "epoch": 41.95,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 218496
    },
    {
      "epoch": 41.96,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 218508
    },
    {
      "epoch": 41.96,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 218520
    },
    {
      "epoch": 41.96,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 218532
    },
    {
      "epoch": 41.96,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 218544
    },
    {
      "epoch": 41.97,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 218556
    },
    {
      "epoch": 41.97,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 218568
    },
    {
      "epoch": 41.97,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 218580
    },
    {
      "epoch": 41.97,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 218592
    },
    {
      "epoch": 41.97,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 218604
    },
    {
      "epoch": 41.98,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 218616
    },
    {
      "epoch": 41.98,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 218628
    },
    {
      "epoch": 41.98,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 218640
    },
    {
      "epoch": 41.98,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 218652
    },
    {
      "epoch": 41.99,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 218664
    },
    {
      "epoch": 41.99,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 218676
    },
    {
      "epoch": 41.99,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 218688
    },
    {
      "epoch": 41.99,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 218700
    },
    {
      "epoch": 42.0,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 218712
    },
    {
      "epoch": 42.0,
      "learning_rate": 0.001,
      "loss": 2.5044,
      "step": 218724
    },
    {
      "epoch": 42.0,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 218736
    },
    {
      "epoch": 42.0,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 218748
    },
    {
      "epoch": 42.0,
      "eval_ag_news_accuracy": 0.32928125,
      "eval_ag_news_bleu_score": 4.891300621489941,
      "eval_ag_news_bleu_score_sem": 0.1568683167658272,
      "eval_ag_news_emb_cos_sim": 0.8244635462760925,
      "eval_ag_news_emb_cos_sim_sem": 0.0069586988873286855,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4658851623535156,
      "eval_ag_news_n_ngrams_match_1": 14.512,
      "eval_ag_news_n_ngrams_match_2": 3.192,
      "eval_ag_news_n_ngrams_match_3": 0.886,
      "eval_ag_news_num_pred_words": 46.956,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.00477666219361,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3598947344764221,
      "eval_ag_news_runtime": 17.1695,
      "eval_ag_news_samples_per_second": 29.121,
      "eval_ag_news_steps_per_second": 0.058,
      "eval_ag_news_token_set_f1": 0.3594154402224956,
      "eval_ag_news_token_set_f1_sem": 0.004417977714066989,
      "eval_ag_news_token_set_precision": 0.34828672300609675,
      "eval_ag_news_token_set_recall": 0.3852051015499847,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 218750
    },
    {
      "epoch": 42.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.1168125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1829855031755563,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12178573539836313,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6733123660087585,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009087960308277084,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.188302755355835,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.112,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.934,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.092,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.247239003018652,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21379264199978543,
      "eval_anthropic_toxic_prompts_runtime": 12.2803,
      "eval_anthropic_toxic_prompts_samples_per_second": 40.716,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.081,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35017008278401085,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006562992112271465,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4270487698133057,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3230413062917486,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 218750
    },
    {
      "epoch": 42.0,
      "eval_arxiv_accuracy": 0.35415625,
      "eval_arxiv_bleu_score": 4.431849600988413,
      "eval_arxiv_bleu_score_sem": 0.13175663319751654,
      "eval_arxiv_emb_cos_sim": 0.7872753143310547,
      "eval_arxiv_emb_cos_sim_sem": 0.006389407575057372,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.329688787460327,
      "eval_arxiv_n_ngrams_match_1": 15.66,
      "eval_arxiv_n_ngrams_match_2": 3.054,
      "eval_arxiv_n_ngrams_match_3": 0.67,
      "eval_arxiv_num_pred_words": 41.062,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.929648293779977,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37350832353546937,
      "eval_arxiv_runtime": 12.5477,
      "eval_arxiv_samples_per_second": 39.848,
      "eval_arxiv_steps_per_second": 0.08,
      "eval_arxiv_token_set_f1": 0.3669420986340655,
      "eval_arxiv_token_set_f1_sem": 0.0041141701218152405,
      "eval_arxiv_token_set_precision": 0.32124725225562717,
      "eval_arxiv_token_set_recall": 0.44572007610761427,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 218750
    },
    {
      "epoch": 42.0,
      "eval_python_code_alpaca_accuracy": 0.16371875,
      "eval_python_code_alpaca_bleu_score": 4.875315255716432,
      "eval_python_code_alpaca_bleu_score_sem": 0.16528973634850896,
      "eval_python_code_alpaca_emb_cos_sim": 0.7545474767684937,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008491577133491869,
      "eval_python_code_alpaca_emb_top1_equal": 0.2109375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03620184850179216,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.826129674911499,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.728,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.974,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.01,
      "eval_python_code_alpaca_num_pred_words": 41.06,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.880003135353743,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34356245234797383,
      "eval_python_code_alpaca_runtime": 12.6955,
      "eval_python_code_alpaca_samples_per_second": 39.384,
      "eval_python_code_alpaca_steps_per_second": 0.079,
      "eval_python_code_alpaca_token_set_f1": 0.4770808737470414,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005691738362953203,
      "eval_python_code_alpaca_token_set_precision": 0.5291690114545474,
      "eval_python_code_alpaca_token_set_recall": 0.46001131629935305,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 218750
    },
    {
      "epoch": 42.0,
      "eval_wikibio_accuracy": 0.33125,
      "eval_wikibio_bleu_score": 6.2129605813949595,
      "eval_wikibio_bleu_score_sem": 0.22906099806481456,
      "eval_wikibio_emb_cos_sim": 0.7505396604537964,
      "eval_wikibio_emb_cos_sim_sem": 0.009477432749921176,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.5982353687286377,
      "eval_wikibio_n_ngrams_match_1": 10.306,
      "eval_wikibio_n_ngrams_match_2": 3.488,
      "eval_wikibio_n_ngrams_match_3": 1.31,
      "eval_wikibio_num_pred_words": 35.792,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.533709003250955,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3629200904000062,
      "eval_wikibio_runtime": 12.2875,
      "eval_wikibio_samples_per_second": 40.692,
      "eval_wikibio_steps_per_second": 0.081,
      "eval_wikibio_token_set_f1": 0.32620654746320593,
      "eval_wikibio_token_set_f1_sem": 0.005560672309609381,
      "eval_wikibio_token_set_precision": 0.33631133800354357,
      "eval_wikibio_token_set_recall": 0.3338787244091316,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 218750
    },
    {
      "epoch": 42.0,
      "eval_nq_accuracy": 0.538,
      "eval_nq_bleu_score": 12.353996683991667,
      "eval_nq_bleu_score_sem": 0.49252432605877994,
      "eval_nq_emb_cos_sim": 0.8390676975250244,
      "eval_nq_emb_cos_sim_sem": 0.007353442977759179,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1182310581207275,
      "eval_nq_n_ngrams_match_1": 23.764,
      "eval_nq_n_ngrams_match_2": 8.856,
      "eval_nq_n_ngrams_match_3": 4.15,
      "eval_nq_num_pred_words": 49.232,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.316413216713073,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4603732456239009,
      "eval_nq_runtime": 12.8752,
      "eval_nq_samples_per_second": 38.834,
      "eval_nq_steps_per_second": 0.078,
      "eval_nq_token_set_f1": 0.4729860581120128,
      "eval_nq_token_set_f1_sem": 0.0049555346143281475,
      "eval_nq_token_set_precision": 0.43302175884646615,
      "eval_nq_token_set_recall": 0.5286954614644978,
      "eval_nq_true_num_tokens": 64.0,
      "step": 218750
    },
    {
      "epoch": 42.0,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 218760
    },
    {
      "epoch": 42.01,
      "learning_rate": 0.001,
      "loss": 2.4757,
      "step": 218772
    },
    {
      "epoch": 42.01,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 218784
    },
    {
      "epoch": 42.01,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 218796
    },
    {
      "epoch": 42.01,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 218808
    },
    {
      "epoch": 42.02,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 218820
    },
    {
      "epoch": 42.02,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 218832
    },
    {
      "epoch": 42.02,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 218844
    },
    {
      "epoch": 42.02,
      "learning_rate": 0.001,
      "loss": 2.4746,
      "step": 218856
    },
    {
      "epoch": 42.03,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 218868
    },
    {
      "epoch": 42.03,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 218880
    },
    {
      "epoch": 42.03,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 218892
    },
    {
      "epoch": 42.03,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 218904
    },
    {
      "epoch": 42.03,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 218916
    },
    {
      "epoch": 42.04,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 218928
    },
    {
      "epoch": 42.04,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 218940
    },
    {
      "epoch": 42.04,
      "learning_rate": 0.001,
      "loss": 2.4797,
      "step": 218952
    },
    {
      "epoch": 42.04,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 218964
    },
    {
      "epoch": 42.05,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 218976
    },
    {
      "epoch": 42.05,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 218988
    },
    {
      "epoch": 42.05,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 219000
    },
    {
      "epoch": 42.05,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 219012
    },
    {
      "epoch": 42.06,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 219024
    },
    {
      "epoch": 42.06,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 219036
    },
    {
      "epoch": 42.06,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 219048
    },
    {
      "epoch": 42.06,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 219060
    },
    {
      "epoch": 42.06,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 219072
    },
    {
      "epoch": 42.07,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 219084
    },
    {
      "epoch": 42.07,
      "learning_rate": 0.001,
      "loss": 2.4736,
      "step": 219096
    },
    {
      "epoch": 42.07,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 219108
    },
    {
      "epoch": 42.07,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 219120
    },
    {
      "epoch": 42.08,
      "learning_rate": 0.001,
      "loss": 2.4764,
      "step": 219132
    },
    {
      "epoch": 42.08,
      "learning_rate": 0.001,
      "loss": 2.4736,
      "step": 219144
    },
    {
      "epoch": 42.08,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 219156
    },
    {
      "epoch": 42.08,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 219168
    },
    {
      "epoch": 42.09,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 219180
    },
    {
      "epoch": 42.09,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 219192
    },
    {
      "epoch": 42.09,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 219204
    },
    {
      "epoch": 42.09,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 219216
    },
    {
      "epoch": 42.09,
      "learning_rate": 0.001,
      "loss": 2.4776,
      "step": 219228
    },
    {
      "epoch": 42.1,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 219240
    },
    {
      "epoch": 42.1,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 219252
    },
    {
      "epoch": 42.1,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 219264
    },
    {
      "epoch": 42.1,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 219276
    },
    {
      "epoch": 42.11,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 219288
    },
    {
      "epoch": 42.11,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 219300
    },
    {
      "epoch": 42.11,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 219312
    },
    {
      "epoch": 42.11,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 219324
    },
    {
      "epoch": 42.12,
      "learning_rate": 0.001,
      "loss": 2.4764,
      "step": 219336
    },
    {
      "epoch": 42.12,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 219348
    },
    {
      "epoch": 42.12,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 219360
    },
    {
      "epoch": 42.12,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 219372
    },
    {
      "epoch": 42.12,
      "eval_ag_news_accuracy": 0.32878125,
      "eval_ag_news_bleu_score": 5.114055657840925,
      "eval_ag_news_bleu_score_sem": 0.16190495147979014,
      "eval_ag_news_emb_cos_sim": 0.8220123648643494,
      "eval_ag_news_emb_cos_sim_sem": 0.006110483682879323,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4679293632507324,
      "eval_ag_news_n_ngrams_match_1": 14.56,
      "eval_ag_news_n_ngrams_match_2": 3.366,
      "eval_ag_news_n_ngrams_match_3": 0.984,
      "eval_ag_news_num_pred_words": 46.9,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.07026777104748,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3620513106323513,
      "eval_ag_news_runtime": 13.2317,
      "eval_ag_news_samples_per_second": 37.788,
      "eval_ag_news_steps_per_second": 0.076,
      "eval_ag_news_token_set_f1": 0.36444018936295913,
      "eval_ag_news_token_set_f1_sem": 0.0044494099152123006,
      "eval_ag_news_token_set_precision": 0.349512383981734,
      "eval_ag_news_token_set_recall": 0.39478669348646933,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 219375
    },
    {
      "epoch": 42.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.1151875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2868874737624294,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12704543539871685,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6880490779876709,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009382956637716959,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.196662187576294,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.414,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.450781719471212,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21942776916217527,
      "eval_anthropic_toxic_prompts_runtime": 11.4068,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.833,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.088,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3635019726377188,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064806904481003276,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44988551061375176,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32919869055265,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 219375
    },
    {
      "epoch": 42.12,
      "eval_arxiv_accuracy": 0.35246875,
      "eval_arxiv_bleu_score": 4.6956854491173035,
      "eval_arxiv_bleu_score_sem": 0.1368974649284805,
      "eval_arxiv_emb_cos_sim": 0.7871077656745911,
      "eval_arxiv_emb_cos_sim_sem": 0.006543580088365714,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3312876224517822,
      "eval_arxiv_n_ngrams_match_1": 15.858,
      "eval_arxiv_n_ngrams_match_2": 3.182,
      "eval_arxiv_n_ngrams_match_3": 0.748,
      "eval_arxiv_num_pred_words": 40.916,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.974338909711257,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3790483686474596,
      "eval_arxiv_runtime": 18.2242,
      "eval_arxiv_samples_per_second": 27.436,
      "eval_arxiv_steps_per_second": 0.055,
      "eval_arxiv_token_set_f1": 0.3694431494366954,
      "eval_arxiv_token_set_f1_sem": 0.004190429298376747,
      "eval_arxiv_token_set_precision": 0.32267483759534554,
      "eval_arxiv_token_set_recall": 0.44660387072495344,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 219375
    },
    {
      "epoch": 42.12,
      "eval_python_code_alpaca_accuracy": 0.16253125,
      "eval_python_code_alpaca_bleu_score": 4.720674963220425,
      "eval_python_code_alpaca_bleu_score_sem": 0.13931244265866943,
      "eval_python_code_alpaca_emb_cos_sim": 0.7657042741775513,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007414526932141625,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8257317543029785,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.92,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.93,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.966,
      "eval_python_code_alpaca_num_pred_words": 42.614,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.873287570453744,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3425676640681087,
      "eval_python_code_alpaca_runtime": 11.8163,
      "eval_python_code_alpaca_samples_per_second": 42.314,
      "eval_python_code_alpaca_steps_per_second": 0.085,
      "eval_python_code_alpaca_token_set_f1": 0.48370720603233985,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00529003908739912,
      "eval_python_code_alpaca_token_set_precision": 0.5418550952657617,
      "eval_python_code_alpaca_token_set_recall": 0.4564278057183093,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 219375
    },
    {
      "epoch": 42.12,
      "eval_wikibio_accuracy": 0.3291875,
      "eval_wikibio_bleu_score": 6.206690478962144,
      "eval_wikibio_bleu_score_sem": 0.21878327489493257,
      "eval_wikibio_emb_cos_sim": 0.7545444369316101,
      "eval_wikibio_emb_cos_sim_sem": 0.00849759688883934,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6319236755371094,
      "eval_wikibio_n_ngrams_match_1": 10.206,
      "eval_wikibio_n_ngrams_match_2": 3.424,
      "eval_wikibio_n_ngrams_match_3": 1.292,
      "eval_wikibio_num_pred_words": 35.738,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.78543366336793,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3640439718529963,
      "eval_wikibio_runtime": 11.3557,
      "eval_wikibio_samples_per_second": 44.031,
      "eval_wikibio_steps_per_second": 0.088,
      "eval_wikibio_token_set_f1": 0.3267677720836948,
      "eval_wikibio_token_set_f1_sem": 0.005445527984017967,
      "eval_wikibio_token_set_precision": 0.3333888306024795,
      "eval_wikibio_token_set_recall": 0.3353166533567367,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 219375
    },
    {
      "epoch": 42.12,
      "eval_nq_accuracy": 0.5368125,
      "eval_nq_bleu_score": 12.255347474851035,
      "eval_nq_bleu_score_sem": 0.4991846005084359,
      "eval_nq_emb_cos_sim": 0.8426265716552734,
      "eval_nq_emb_cos_sim_sem": 0.007043752544712686,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.117433547973633,
      "eval_nq_n_ngrams_match_1": 23.562,
      "eval_nq_n_ngrams_match_2": 8.758,
      "eval_nq_n_ngrams_match_3": 4.092,
      "eval_nq_num_pred_words": 49.042,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.309783436795081,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45805297250126337,
      "eval_nq_runtime": 11.9233,
      "eval_nq_samples_per_second": 41.935,
      "eval_nq_steps_per_second": 0.084,
      "eval_nq_token_set_f1": 0.47193520466452954,
      "eval_nq_token_set_f1_sem": 0.005136736506925819,
      "eval_nq_token_set_precision": 0.42909744487728674,
      "eval_nq_token_set_recall": 0.532314281496284,
      "eval_nq_true_num_tokens": 64.0,
      "step": 219375
    },
    {
      "epoch": 42.12,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 219384
    },
    {
      "epoch": 42.13,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 219396
    },
    {
      "epoch": 42.13,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 219408
    },
    {
      "epoch": 42.13,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 219420
    },
    {
      "epoch": 42.13,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 219432
    },
    {
      "epoch": 42.14,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 219444
    },
    {
      "epoch": 42.14,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 219456
    },
    {
      "epoch": 42.14,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 219468
    },
    {
      "epoch": 42.14,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 219480
    },
    {
      "epoch": 42.15,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 219492
    },
    {
      "epoch": 42.15,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 219504
    },
    {
      "epoch": 42.15,
      "learning_rate": 0.001,
      "loss": 2.4797,
      "step": 219516
    },
    {
      "epoch": 42.15,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 219528
    },
    {
      "epoch": 42.15,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 219540
    },
    {
      "epoch": 42.16,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 219552
    },
    {
      "epoch": 42.16,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 219564
    },
    {
      "epoch": 42.16,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 219576
    },
    {
      "epoch": 42.16,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 219588
    },
    {
      "epoch": 42.17,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 219600
    },
    {
      "epoch": 42.17,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 219612
    },
    {
      "epoch": 42.17,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 219624
    },
    {
      "epoch": 42.17,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 219636
    },
    {
      "epoch": 42.18,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 219648
    },
    {
      "epoch": 42.18,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 219660
    },
    {
      "epoch": 42.18,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 219672
    },
    {
      "epoch": 42.18,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 219684
    },
    {
      "epoch": 42.18,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 219696
    },
    {
      "epoch": 42.19,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 219708
    },
    {
      "epoch": 42.19,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 219720
    },
    {
      "epoch": 42.19,
      "learning_rate": 0.001,
      "loss": 2.5017,
      "step": 219732
    },
    {
      "epoch": 42.19,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 219744
    },
    {
      "epoch": 42.2,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 219756
    },
    {
      "epoch": 42.2,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 219768
    },
    {
      "epoch": 42.2,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 219780
    },
    {
      "epoch": 42.2,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 219792
    },
    {
      "epoch": 42.21,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 219804
    },
    {
      "epoch": 42.21,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 219816
    },
    {
      "epoch": 42.21,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 219828
    },
    {
      "epoch": 42.21,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 219840
    },
    {
      "epoch": 42.21,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 219852
    },
    {
      "epoch": 42.22,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 219864
    },
    {
      "epoch": 42.22,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 219876
    },
    {
      "epoch": 42.22,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 219888
    },
    {
      "epoch": 42.22,
      "learning_rate": 0.001,
      "loss": 2.4732,
      "step": 219900
    },
    {
      "epoch": 42.23,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 219912
    },
    {
      "epoch": 42.23,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 219924
    },
    {
      "epoch": 42.23,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 219936
    },
    {
      "epoch": 42.23,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 219948
    },
    {
      "epoch": 42.24,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 219960
    },
    {
      "epoch": 42.24,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 219972
    },
    {
      "epoch": 42.24,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 219984
    },
    {
      "epoch": 42.24,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 219996
    },
    {
      "epoch": 42.24,
      "eval_ag_news_accuracy": 0.32940625,
      "eval_ag_news_bleu_score": 5.135285862552853,
      "eval_ag_news_bleu_score_sem": 0.16595851834441983,
      "eval_ag_news_emb_cos_sim": 0.821050226688385,
      "eval_ag_news_emb_cos_sim_sem": 0.0068386936387280104,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4684741497039795,
      "eval_ag_news_n_ngrams_match_1": 14.552,
      "eval_ag_news_n_ngrams_match_2": 3.33,
      "eval_ag_news_n_ngrams_match_3": 0.97,
      "eval_ag_news_num_pred_words": 46.664,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.087743978449446,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3603146158118864,
      "eval_ag_news_runtime": 26.1457,
      "eval_ag_news_samples_per_second": 19.124,
      "eval_ag_news_steps_per_second": 0.038,
      "eval_ag_news_token_set_f1": 0.3619990764633275,
      "eval_ag_news_token_set_f1_sem": 0.004462129280319432,
      "eval_ag_news_token_set_precision": 0.3492692383491201,
      "eval_ag_news_token_set_recall": 0.3900119421807912,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 220000
    },
    {
      "epoch": 42.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.11453125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.324966670128112,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1391054499045368,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6809598207473755,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009369215448013392,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.210698127746582,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.252,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.962,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.762,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.896,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.79639123206612,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21624338666186077,
      "eval_anthropic_toxic_prompts_runtime": 14.8659,
      "eval_anthropic_toxic_prompts_samples_per_second": 33.634,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.067,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3519435219790378,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006549468492754389,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.43621953616353665,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3205663962370167,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 220000
    },
    {
      "epoch": 42.24,
      "eval_arxiv_accuracy": 0.3528125,
      "eval_arxiv_bleu_score": 4.68695975337914,
      "eval_arxiv_bleu_score_sem": 0.1430225954552886,
      "eval_arxiv_emb_cos_sim": 0.7879422903060913,
      "eval_arxiv_emb_cos_sim_sem": 0.00641595218708514,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.336101770401001,
      "eval_arxiv_n_ngrams_match_1": 15.572,
      "eval_arxiv_n_ngrams_match_2": 3.19,
      "eval_arxiv_n_ngrams_match_3": 0.794,
      "eval_arxiv_num_pred_words": 40.648,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.10933620375242,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37193294828325585,
      "eval_arxiv_runtime": 11.6836,
      "eval_arxiv_samples_per_second": 42.795,
      "eval_arxiv_steps_per_second": 0.086,
      "eval_arxiv_token_set_f1": 0.36365067851263927,
      "eval_arxiv_token_set_f1_sem": 0.004470472987750251,
      "eval_arxiv_token_set_precision": 0.31817153992694336,
      "eval_arxiv_token_set_recall": 0.4402469038092684,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 220000
    },
    {
      "epoch": 42.24,
      "eval_python_code_alpaca_accuracy": 0.1643125,
      "eval_python_code_alpaca_bleu_score": 4.671901115767498,
      "eval_python_code_alpaca_bleu_score_sem": 0.14884401069773442,
      "eval_python_code_alpaca_emb_cos_sim": 0.765325129032135,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00721018583967065,
      "eval_python_code_alpaca_emb_top1_equal": 0.1953125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8322031497955322,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.838,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.88,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.956,
      "eval_python_code_alpaca_num_pred_words": 42.322,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.982835368834017,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34017329993780643,
      "eval_python_code_alpaca_runtime": 11.3858,
      "eval_python_code_alpaca_samples_per_second": 43.914,
      "eval_python_code_alpaca_steps_per_second": 0.088,
      "eval_python_code_alpaca_token_set_f1": 0.4764276407302975,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005412071359097353,
      "eval_python_code_alpaca_token_set_precision": 0.5376840820111548,
      "eval_python_code_alpaca_token_set_recall": 0.44977870489549737,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 220000
    },
    {
      "epoch": 42.24,
      "eval_wikibio_accuracy": 0.32946875,
      "eval_wikibio_bleu_score": 6.1818557344923555,
      "eval_wikibio_bleu_score_sem": 0.2245235752092188,
      "eval_wikibio_emb_cos_sim": 0.7415040731430054,
      "eval_wikibio_emb_cos_sim_sem": 0.008858960016704697,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6164400577545166,
      "eval_wikibio_n_ngrams_match_1": 9.936,
      "eval_wikibio_n_ngrams_match_2": 3.396,
      "eval_wikibio_n_ngrams_match_3": 1.304,
      "eval_wikibio_num_pred_words": 35.448,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.204884549540516,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35385059861310764,
      "eval_wikibio_runtime": 21.3734,
      "eval_wikibio_samples_per_second": 23.394,
      "eval_wikibio_steps_per_second": 0.047,
      "eval_wikibio_token_set_f1": 0.3195351922274151,
      "eval_wikibio_token_set_f1_sem": 0.005707029002276072,
      "eval_wikibio_token_set_precision": 0.32460346752223623,
      "eval_wikibio_token_set_recall": 0.3310073936346742,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 220000
    },
    {
      "epoch": 42.24,
      "eval_nq_accuracy": 0.53684375,
      "eval_nq_bleu_score": 12.067484525172185,
      "eval_nq_bleu_score_sem": 0.49549202108640733,
      "eval_nq_emb_cos_sim": 0.8371459245681763,
      "eval_nq_emb_cos_sim_sem": 0.0076807949159652614,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.115579605102539,
      "eval_nq_n_ngrams_match_1": 23.578,
      "eval_nq_n_ngrams_match_2": 8.74,
      "eval_nq_n_ngrams_match_3": 4.02,
      "eval_nq_num_pred_words": 49.024,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.29439184500658,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45796722880794505,
      "eval_nq_runtime": 16.4204,
      "eval_nq_samples_per_second": 30.45,
      "eval_nq_steps_per_second": 0.061,
      "eval_nq_token_set_f1": 0.4707648551827902,
      "eval_nq_token_set_f1_sem": 0.005015695960147839,
      "eval_nq_token_set_precision": 0.4303408368698218,
      "eval_nq_token_set_recall": 0.5277757233312265,
      "eval_nq_true_num_tokens": 64.0,
      "step": 220000
    },
    {
      "epoch": 42.24,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 220008
    },
    {
      "epoch": 42.25,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 220020
    },
    {
      "epoch": 42.25,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 220032
    },
    {
      "epoch": 42.25,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 220044
    },
    {
      "epoch": 42.25,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 220056
    },
    {
      "epoch": 42.26,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 220068
    },
    {
      "epoch": 42.26,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 220080
    },
    {
      "epoch": 42.26,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 220092
    },
    {
      "epoch": 42.26,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 220104
    },
    {
      "epoch": 42.26,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 220116
    },
    {
      "epoch": 42.27,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 220128
    },
    {
      "epoch": 42.27,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 220140
    },
    {
      "epoch": 42.27,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 220152
    },
    {
      "epoch": 42.27,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 220164
    },
    {
      "epoch": 42.28,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 220176
    },
    {
      "epoch": 42.28,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 220188
    },
    {
      "epoch": 42.28,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 220200
    },
    {
      "epoch": 42.28,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 220212
    },
    {
      "epoch": 42.29,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 220224
    },
    {
      "epoch": 42.29,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 220236
    },
    {
      "epoch": 42.29,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 220248
    },
    {
      "epoch": 42.29,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 220260
    },
    {
      "epoch": 42.29,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 220272
    },
    {
      "epoch": 42.3,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 220284
    },
    {
      "epoch": 42.3,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 220296
    },
    {
      "epoch": 42.3,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 220308
    },
    {
      "epoch": 42.3,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 220320
    },
    {
      "epoch": 42.31,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 220332
    },
    {
      "epoch": 42.31,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 220344
    },
    {
      "epoch": 42.31,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 220356
    },
    {
      "epoch": 42.31,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 220368
    },
    {
      "epoch": 42.32,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 220380
    },
    {
      "epoch": 42.32,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 220392
    },
    {
      "epoch": 42.32,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 220404
    },
    {
      "epoch": 42.32,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 220416
    },
    {
      "epoch": 42.32,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 220428
    },
    {
      "epoch": 42.33,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 220440
    },
    {
      "epoch": 42.33,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 220452
    },
    {
      "epoch": 42.33,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 220464
    },
    {
      "epoch": 42.33,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 220476
    },
    {
      "epoch": 42.34,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 220488
    },
    {
      "epoch": 42.34,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 220500
    },
    {
      "epoch": 42.34,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 220512
    },
    {
      "epoch": 42.34,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 220524
    },
    {
      "epoch": 42.35,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 220536
    },
    {
      "epoch": 42.35,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 220548
    },
    {
      "epoch": 42.35,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 220560
    },
    {
      "epoch": 42.35,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 220572
    },
    {
      "epoch": 42.35,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 220584
    },
    {
      "epoch": 42.36,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 220596
    },
    {
      "epoch": 42.36,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 220608
    },
    {
      "epoch": 42.36,
      "learning_rate": 0.001,
      "loss": 2.4733,
      "step": 220620
    },
    {
      "epoch": 42.36,
      "eval_ag_news_accuracy": 0.3286875,
      "eval_ag_news_bleu_score": 4.913234779707554,
      "eval_ag_news_bleu_score_sem": 0.15325418224317922,
      "eval_ag_news_emb_cos_sim": 0.82138991355896,
      "eval_ag_news_emb_cos_sim_sem": 0.006465480002441504,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4734721183776855,
      "eval_ag_news_n_ngrams_match_1": 14.586,
      "eval_ag_news_n_ngrams_match_2": 3.304,
      "eval_ag_news_n_ngrams_match_3": 0.922,
      "eval_ag_news_num_pred_words": 46.986,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.24851895714151,
      "eval_ag_news_pred_num_tokens": 62.9921875,
      "eval_ag_news_rouge_score": 0.3621328017137787,
      "eval_ag_news_runtime": 14.4859,
      "eval_ag_news_samples_per_second": 34.516,
      "eval_ag_news_steps_per_second": 0.069,
      "eval_ag_news_token_set_f1": 0.3589860213247812,
      "eval_ag_news_token_set_f1_sem": 0.004384040337489738,
      "eval_ag_news_token_set_precision": 0.3479854436396126,
      "eval_ag_news_token_set_recall": 0.3850237597480226,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 220625
    },
    {
      "epoch": 42.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.11646875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2037401470384888,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1185331013462973,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6836092472076416,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008281643022514598,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2020986080169678,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.018,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.74,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.23,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.584068422008286,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21838122962380246,
      "eval_anthropic_toxic_prompts_runtime": 11.6961,
      "eval_anthropic_toxic_prompts_samples_per_second": 42.749,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.085,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35550090106319376,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006414531472548422,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4481323566292048,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3191696051240382,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 220625
    },
    {
      "epoch": 42.36,
      "eval_arxiv_accuracy": 0.35146875,
      "eval_arxiv_bleu_score": 4.384790524916859,
      "eval_arxiv_bleu_score_sem": 0.12775255899959273,
      "eval_arxiv_emb_cos_sim": 0.779697835445404,
      "eval_arxiv_emb_cos_sim_sem": 0.006949149991962936,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3369674682617188,
      "eval_arxiv_n_ngrams_match_1": 15.404,
      "eval_arxiv_n_ngrams_match_2": 3.042,
      "eval_arxiv_n_ngrams_match_3": 0.66,
      "eval_arxiv_num_pred_words": 40.726,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.133680932039407,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3685214404913864,
      "eval_arxiv_runtime": 11.7416,
      "eval_arxiv_samples_per_second": 42.583,
      "eval_arxiv_steps_per_second": 0.085,
      "eval_arxiv_token_set_f1": 0.3619226048821038,
      "eval_arxiv_token_set_f1_sem": 0.004209469035555156,
      "eval_arxiv_token_set_precision": 0.315969204757354,
      "eval_arxiv_token_set_recall": 0.4399705064946582,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 220625
    },
    {
      "epoch": 42.36,
      "eval_python_code_alpaca_accuracy": 0.16396875,
      "eval_python_code_alpaca_bleu_score": 5.05735299034849,
      "eval_python_code_alpaca_bleu_score_sem": 0.1673952538857668,
      "eval_python_code_alpaca_emb_cos_sim": 0.7657151222229004,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009257155538901527,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8134825229644775,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.068,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.156,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.15,
      "eval_python_code_alpaca_num_pred_words": 43.436,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.667863479709474,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3423279830812419,
      "eval_python_code_alpaca_runtime": 11.2088,
      "eval_python_code_alpaca_samples_per_second": 44.608,
      "eval_python_code_alpaca_steps_per_second": 0.089,
      "eval_python_code_alpaca_token_set_f1": 0.4855619679600474,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005572310532327872,
      "eval_python_code_alpaca_token_set_precision": 0.5522563722957164,
      "eval_python_code_alpaca_token_set_recall": 0.45931423398540877,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 220625
    },
    {
      "epoch": 42.36,
      "eval_wikibio_accuracy": 0.33259375,
      "eval_wikibio_bleu_score": 6.074715963795568,
      "eval_wikibio_bleu_score_sem": 0.22514948267978072,
      "eval_wikibio_emb_cos_sim": 0.736054003238678,
      "eval_wikibio_emb_cos_sim_sem": 0.010288878121767323,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.631265163421631,
      "eval_wikibio_n_ngrams_match_1": 10.006,
      "eval_wikibio_n_ngrams_match_2": 3.396,
      "eval_wikibio_n_ngrams_match_3": 1.24,
      "eval_wikibio_num_pred_words": 35.608,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.76055968831781,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3518882309754411,
      "eval_wikibio_runtime": 11.181,
      "eval_wikibio_samples_per_second": 44.719,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.3202194496593714,
      "eval_wikibio_token_set_f1_sem": 0.005526329548682177,
      "eval_wikibio_token_set_precision": 0.32614185234820303,
      "eval_wikibio_token_set_recall": 0.3333646391040292,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 220625
    },
    {
      "epoch": 42.36,
      "eval_nq_accuracy": 0.53759375,
      "eval_nq_bleu_score": 12.222270247136262,
      "eval_nq_bleu_score_sem": 0.48775874131268104,
      "eval_nq_emb_cos_sim": 0.8407552242279053,
      "eval_nq_emb_cos_sim_sem": 0.007149006396334548,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.115541458129883,
      "eval_nq_n_ngrams_match_1": 23.468,
      "eval_nq_n_ngrams_match_2": 8.758,
      "eval_nq_n_ngrams_match_3": 4.104,
      "eval_nq_num_pred_words": 49.096,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.294075445102555,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4557016637875644,
      "eval_nq_runtime": 14.2294,
      "eval_nq_samples_per_second": 35.139,
      "eval_nq_steps_per_second": 0.07,
      "eval_nq_token_set_f1": 0.46947814500519336,
      "eval_nq_token_set_f1_sem": 0.0049933643153937815,
      "eval_nq_token_set_precision": 0.42926104990642944,
      "eval_nq_token_set_recall": 0.5266418809022172,
      "eval_nq_true_num_tokens": 64.0,
      "step": 220625
    },
    {
      "epoch": 42.36,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 220632
    },
    {
      "epoch": 42.37,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 220644
    },
    {
      "epoch": 42.37,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 220656
    },
    {
      "epoch": 42.37,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 220668
    },
    {
      "epoch": 42.37,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 220680
    },
    {
      "epoch": 42.38,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 220692
    },
    {
      "epoch": 42.38,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 220704
    },
    {
      "epoch": 42.38,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 220716
    },
    {
      "epoch": 42.38,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 220728
    },
    {
      "epoch": 42.38,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 220740
    },
    {
      "epoch": 42.39,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 220752
    },
    {
      "epoch": 42.39,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 220764
    },
    {
      "epoch": 42.39,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 220776
    },
    {
      "epoch": 42.39,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 220788
    },
    {
      "epoch": 42.4,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 220800
    },
    {
      "epoch": 42.4,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 220812
    },
    {
      "epoch": 42.4,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 220824
    },
    {
      "epoch": 42.4,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 220836
    },
    {
      "epoch": 42.41,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 220848
    },
    {
      "epoch": 42.41,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 220860
    },
    {
      "epoch": 42.41,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 220872
    },
    {
      "epoch": 42.41,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 220884
    },
    {
      "epoch": 42.41,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 220896
    },
    {
      "epoch": 42.42,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 220908
    },
    {
      "epoch": 42.42,
      "learning_rate": 0.001,
      "loss": 2.5041,
      "step": 220920
    },
    {
      "epoch": 42.42,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 220932
    },
    {
      "epoch": 42.42,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 220944
    },
    {
      "epoch": 42.43,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 220956
    },
    {
      "epoch": 42.43,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 220968
    },
    {
      "epoch": 42.43,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 220980
    },
    {
      "epoch": 42.43,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 220992
    },
    {
      "epoch": 42.44,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 221004
    },
    {
      "epoch": 42.44,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 221016
    },
    {
      "epoch": 42.44,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 221028
    },
    {
      "epoch": 42.44,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 221040
    },
    {
      "epoch": 42.44,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 221052
    },
    {
      "epoch": 42.45,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 221064
    },
    {
      "epoch": 42.45,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 221076
    },
    {
      "epoch": 42.45,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 221088
    },
    {
      "epoch": 42.45,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 221100
    },
    {
      "epoch": 42.46,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 221112
    },
    {
      "epoch": 42.46,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 221124
    },
    {
      "epoch": 42.46,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 221136
    },
    {
      "epoch": 42.46,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 221148
    },
    {
      "epoch": 42.47,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 221160
    },
    {
      "epoch": 42.47,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 221172
    },
    {
      "epoch": 42.47,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 221184
    },
    {
      "epoch": 42.47,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 221196
    },
    {
      "epoch": 42.47,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 221208
    },
    {
      "epoch": 42.48,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 221220
    },
    {
      "epoch": 42.48,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 221232
    },
    {
      "epoch": 42.48,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 221244
    },
    {
      "epoch": 42.48,
      "eval_ag_news_accuracy": 0.33,
      "eval_ag_news_bleu_score": 4.964829589879801,
      "eval_ag_news_bleu_score_sem": 0.16182026323247872,
      "eval_ag_news_emb_cos_sim": 0.8185749053955078,
      "eval_ag_news_emb_cos_sim_sem": 0.007249959952319283,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.456552267074585,
      "eval_ag_news_n_ngrams_match_1": 14.35,
      "eval_ag_news_n_ngrams_match_2": 3.244,
      "eval_ag_news_n_ngrams_match_3": 0.908,
      "eval_ag_news_num_pred_words": 46.246,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.70746896202341,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35957403037743746,
      "eval_ag_news_runtime": 17.2026,
      "eval_ag_news_samples_per_second": 29.065,
      "eval_ag_news_steps_per_second": 0.058,
      "eval_ag_news_token_set_f1": 0.35961498353933935,
      "eval_ag_news_token_set_f1_sem": 0.004551093483797757,
      "eval_ag_news_token_set_precision": 0.34512260518668486,
      "eval_ag_news_token_set_recall": 0.38994156327149254,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 221250
    },
    {
      "epoch": 42.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.116125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.048372009850658,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11413168733340211,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6789529323577881,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009790593712524737,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1827456951141357,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.214,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.86,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.686,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.83,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.11286933115267,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21617793979208577,
      "eval_anthropic_toxic_prompts_runtime": 16.3683,
      "eval_anthropic_toxic_prompts_samples_per_second": 30.547,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.061,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.354242424612089,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066501806268472845,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.438145795568399,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32164073704444873,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 221250
    },
    {
      "epoch": 42.48,
      "eval_arxiv_accuracy": 0.35203125,
      "eval_arxiv_bleu_score": 4.501454770698891,
      "eval_arxiv_bleu_score_sem": 0.1312538419078761,
      "eval_arxiv_emb_cos_sim": 0.7829389572143555,
      "eval_arxiv_emb_cos_sim_sem": 0.006906137157424045,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.328486442565918,
      "eval_arxiv_n_ngrams_match_1": 15.558,
      "eval_arxiv_n_ngrams_match_2": 3.088,
      "eval_arxiv_n_ngrams_match_3": 0.692,
      "eval_arxiv_num_pred_words": 40.954,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.896087403676802,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.371630732942738,
      "eval_arxiv_runtime": 12.1434,
      "eval_arxiv_samples_per_second": 41.175,
      "eval_arxiv_steps_per_second": 0.082,
      "eval_arxiv_token_set_f1": 0.36436814703754683,
      "eval_arxiv_token_set_f1_sem": 0.004064773325112701,
      "eval_arxiv_token_set_precision": 0.3164516263537375,
      "eval_arxiv_token_set_recall": 0.4460516047433709,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 221250
    },
    {
      "epoch": 42.48,
      "eval_python_code_alpaca_accuracy": 0.163625,
      "eval_python_code_alpaca_bleu_score": 4.863885875968682,
      "eval_python_code_alpaca_bleu_score_sem": 0.1701844172174006,
      "eval_python_code_alpaca_emb_cos_sim": 0.7633397579193115,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007786308234123268,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8259034156799316,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.764,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.944,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.016,
      "eval_python_code_alpaca_num_pred_words": 42.318,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.876184310853922,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33722470945843286,
      "eval_python_code_alpaca_runtime": 11.545,
      "eval_python_code_alpaca_samples_per_second": 43.309,
      "eval_python_code_alpaca_steps_per_second": 0.087,
      "eval_python_code_alpaca_token_set_f1": 0.47371346511410956,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0059670758444496565,
      "eval_python_code_alpaca_token_set_precision": 0.5317447831843691,
      "eval_python_code_alpaca_token_set_recall": 0.44921655113881387,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 221250
    },
    {
      "epoch": 42.48,
      "eval_wikibio_accuracy": 0.3293125,
      "eval_wikibio_bleu_score": 6.241991363100509,
      "eval_wikibio_bleu_score_sem": 0.2148057709867536,
      "eval_wikibio_emb_cos_sim": 0.7499901652336121,
      "eval_wikibio_emb_cos_sim_sem": 0.0094670567226534,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6126034259796143,
      "eval_wikibio_n_ngrams_match_1": 10.35,
      "eval_wikibio_n_ngrams_match_2": 3.526,
      "eval_wikibio_n_ngrams_match_3": 1.3,
      "eval_wikibio_num_pred_words": 36.06,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.06241658062203,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3647641482252846,
      "eval_wikibio_runtime": 16.1776,
      "eval_wikibio_samples_per_second": 30.907,
      "eval_wikibio_steps_per_second": 0.062,
      "eval_wikibio_token_set_f1": 0.32839988170065676,
      "eval_wikibio_token_set_f1_sem": 0.005525039758339359,
      "eval_wikibio_token_set_precision": 0.33716157196704494,
      "eval_wikibio_token_set_recall": 0.3387721449018883,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 221250
    },
    {
      "epoch": 42.48,
      "eval_nq_accuracy": 0.5365625,
      "eval_nq_bleu_score": 12.330745345704118,
      "eval_nq_bleu_score_sem": 0.48487521165461384,
      "eval_nq_emb_cos_sim": 0.8425266742706299,
      "eval_nq_emb_cos_sim_sem": 0.0063902017797339995,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1134583950042725,
      "eval_nq_n_ngrams_match_1": 23.716,
      "eval_nq_n_ngrams_match_2": 8.846,
      "eval_nq_n_ngrams_match_3": 4.17,
      "eval_nq_num_pred_words": 48.884,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.276816344520654,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46056870759133217,
      "eval_nq_runtime": 16.7213,
      "eval_nq_samples_per_second": 29.902,
      "eval_nq_steps_per_second": 0.06,
      "eval_nq_token_set_f1": 0.4712618744237596,
      "eval_nq_token_set_f1_sem": 0.0049357688121397765,
      "eval_nq_token_set_precision": 0.4312205787074414,
      "eval_nq_token_set_recall": 0.5283920948121197,
      "eval_nq_true_num_tokens": 64.0,
      "step": 221250
    },
    {
      "epoch": 42.48,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 221256
    },
    {
      "epoch": 42.49,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 221268
    },
    {
      "epoch": 42.49,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 221280
    },
    {
      "epoch": 42.49,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 221292
    },
    {
      "epoch": 42.49,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 221304
    },
    {
      "epoch": 42.5,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 221316
    },
    {
      "epoch": 42.5,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 221328
    },
    {
      "epoch": 42.5,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 221340
    },
    {
      "epoch": 42.5,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 221352
    },
    {
      "epoch": 42.5,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 221364
    },
    {
      "epoch": 42.51,
      "learning_rate": 0.001,
      "loss": 2.5077,
      "step": 221376
    },
    {
      "epoch": 42.51,
      "learning_rate": 0.001,
      "loss": 2.5,
      "step": 221388
    },
    {
      "epoch": 42.51,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 221400
    },
    {
      "epoch": 42.51,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 221412
    },
    {
      "epoch": 42.52,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 221424
    },
    {
      "epoch": 42.52,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 221436
    },
    {
      "epoch": 42.52,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 221448
    },
    {
      "epoch": 42.52,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 221460
    },
    {
      "epoch": 42.53,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 221472
    },
    {
      "epoch": 42.53,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 221484
    },
    {
      "epoch": 42.53,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 221496
    },
    {
      "epoch": 42.53,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 221508
    },
    {
      "epoch": 42.53,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 221520
    },
    {
      "epoch": 42.54,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 221532
    },
    {
      "epoch": 42.54,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 221544
    },
    {
      "epoch": 42.54,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 221556
    },
    {
      "epoch": 42.54,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 221568
    },
    {
      "epoch": 42.55,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 221580
    },
    {
      "epoch": 42.55,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 221592
    },
    {
      "epoch": 42.55,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 221604
    },
    {
      "epoch": 42.55,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 221616
    },
    {
      "epoch": 42.56,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 221628
    },
    {
      "epoch": 42.56,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 221640
    },
    {
      "epoch": 42.56,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 221652
    },
    {
      "epoch": 42.56,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 221664
    },
    {
      "epoch": 42.56,
      "learning_rate": 0.001,
      "loss": 2.4725,
      "step": 221676
    },
    {
      "epoch": 42.57,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 221688
    },
    {
      "epoch": 42.57,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 221700
    },
    {
      "epoch": 42.57,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 221712
    },
    {
      "epoch": 42.57,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 221724
    },
    {
      "epoch": 42.58,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 221736
    },
    {
      "epoch": 42.58,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 221748
    },
    {
      "epoch": 42.58,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 221760
    },
    {
      "epoch": 42.58,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 221772
    },
    {
      "epoch": 42.59,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 221784
    },
    {
      "epoch": 42.59,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 221796
    },
    {
      "epoch": 42.59,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 221808
    },
    {
      "epoch": 42.59,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 221820
    },
    {
      "epoch": 42.59,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 221832
    },
    {
      "epoch": 42.6,
      "learning_rate": 0.001,
      "loss": 2.5075,
      "step": 221844
    },
    {
      "epoch": 42.6,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 221856
    },
    {
      "epoch": 42.6,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 221868
    },
    {
      "epoch": 42.6,
      "eval_ag_news_accuracy": 0.3305,
      "eval_ag_news_bleu_score": 5.048784266999561,
      "eval_ag_news_bleu_score_sem": 0.16230843330009173,
      "eval_ag_news_emb_cos_sim": 0.8227436542510986,
      "eval_ag_news_emb_cos_sim_sem": 0.007101127576426172,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4607505798339844,
      "eval_ag_news_n_ngrams_match_1": 14.472,
      "eval_ag_news_n_ngrams_match_2": 3.342,
      "eval_ag_news_n_ngrams_match_3": 0.97,
      "eval_ag_news_num_pred_words": 46.588,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.840866660227118,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3608835502818998,
      "eval_ag_news_runtime": 14.6856,
      "eval_ag_news_samples_per_second": 34.047,
      "eval_ag_news_steps_per_second": 0.068,
      "eval_ag_news_token_set_f1": 0.36106274917629305,
      "eval_ag_news_token_set_f1_sem": 0.004426777841001614,
      "eval_ag_news_token_set_precision": 0.34680449603887004,
      "eval_ag_news_token_set_recall": 0.39176875324193483,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 221875
    },
    {
      "epoch": 42.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.11553125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2235452102844517,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12004648662748921,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.684950590133667,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008897843777178536,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1883480548858643,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.332,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.76,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.476,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.248337416428615,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21765704624815113,
      "eval_anthropic_toxic_prompts_runtime": 11.6409,
      "eval_anthropic_toxic_prompts_samples_per_second": 42.952,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.086,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3647078563306401,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066032271600874725,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4453078299479659,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3354832502442887,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 221875
    },
    {
      "epoch": 42.6,
      "eval_arxiv_accuracy": 0.35296875,
      "eval_arxiv_bleu_score": 4.534394328629161,
      "eval_arxiv_bleu_score_sem": 0.13158237441585904,
      "eval_arxiv_emb_cos_sim": 0.7838080525398254,
      "eval_arxiv_emb_cos_sim_sem": 0.007122407783657126,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3306009769439697,
      "eval_arxiv_n_ngrams_match_1": 15.654,
      "eval_arxiv_n_ngrams_match_2": 3.18,
      "eval_arxiv_n_ngrams_match_3": 0.708,
      "eval_arxiv_num_pred_words": 40.408,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.95513704875511,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3742262138438309,
      "eval_arxiv_runtime": 11.9256,
      "eval_arxiv_samples_per_second": 41.927,
      "eval_arxiv_steps_per_second": 0.084,
      "eval_arxiv_token_set_f1": 0.3705539020906193,
      "eval_arxiv_token_set_f1_sem": 0.00426309678106626,
      "eval_arxiv_token_set_precision": 0.322085981949491,
      "eval_arxiv_token_set_recall": 0.455209287825526,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 221875
    },
    {
      "epoch": 42.6,
      "eval_python_code_alpaca_accuracy": 0.1619375,
      "eval_python_code_alpaca_bleu_score": 4.9155830441087,
      "eval_python_code_alpaca_bleu_score_sem": 0.1696741164803445,
      "eval_python_code_alpaca_emb_cos_sim": 0.7698432207107544,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0065826884125167765,
      "eval_python_code_alpaca_emb_top1_equal": 0.1875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.839216709136963,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.05,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.086,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.128,
      "eval_python_code_alpaca_num_pred_words": 44.22,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.102364163658642,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34120464766921027,
      "eval_python_code_alpaca_runtime": 11.4107,
      "eval_python_code_alpaca_samples_per_second": 43.818,
      "eval_python_code_alpaca_steps_per_second": 0.088,
      "eval_python_code_alpaca_token_set_f1": 0.4888048798247338,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005491730440376083,
      "eval_python_code_alpaca_token_set_precision": 0.5493986762846206,
      "eval_python_code_alpaca_token_set_recall": 0.4604317583363051,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 221875
    },
    {
      "epoch": 42.6,
      "eval_wikibio_accuracy": 0.32846875,
      "eval_wikibio_bleu_score": 6.021263570128182,
      "eval_wikibio_bleu_score_sem": 0.21727071297480205,
      "eval_wikibio_emb_cos_sim": 0.7418859601020813,
      "eval_wikibio_emb_cos_sim_sem": 0.010263068445410464,
      "eval_wikibio_emb_top1_equal": 0.265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03919146934646163,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6263315677642822,
      "eval_wikibio_n_ngrams_match_1": 10.074,
      "eval_wikibio_n_ngrams_match_2": 3.432,
      "eval_wikibio_n_ngrams_match_3": 1.246,
      "eval_wikibio_num_pred_words": 35.63,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.57472315312593,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3582569460966102,
      "eval_wikibio_runtime": 12.5989,
      "eval_wikibio_samples_per_second": 39.686,
      "eval_wikibio_steps_per_second": 0.079,
      "eval_wikibio_token_set_f1": 0.31979591122633777,
      "eval_wikibio_token_set_f1_sem": 0.00554928472390408,
      "eval_wikibio_token_set_precision": 0.32734950004508384,
      "eval_wikibio_token_set_recall": 0.33355790427242027,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 221875
    },
    {
      "epoch": 42.6,
      "eval_nq_accuracy": 0.5363125,
      "eval_nq_bleu_score": 12.380004795370223,
      "eval_nq_bleu_score_sem": 0.4742240830847173,
      "eval_nq_emb_cos_sim": 0.8485004901885986,
      "eval_nq_emb_cos_sim_sem": 0.006622361109311179,
      "eval_nq_emb_top1_equal": 0.3359375,
      "eval_nq_emb_top1_equal_sem": 0.04191137143408563,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.114377498626709,
      "eval_nq_n_ngrams_match_1": 23.794,
      "eval_nq_n_ngrams_match_2": 8.91,
      "eval_nq_n_ngrams_match_3": 4.146,
      "eval_nq_num_pred_words": 49.326,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.284427093402813,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4616164035626542,
      "eval_nq_runtime": 12.6641,
      "eval_nq_samples_per_second": 39.482,
      "eval_nq_steps_per_second": 0.079,
      "eval_nq_token_set_f1": 0.4738818381738094,
      "eval_nq_token_set_f1_sem": 0.004879494647536723,
      "eval_nq_token_set_precision": 0.4329971561392119,
      "eval_nq_token_set_recall": 0.5316145906152132,
      "eval_nq_true_num_tokens": 64.0,
      "step": 221875
    },
    {
      "epoch": 42.6,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 221880
    },
    {
      "epoch": 42.61,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 221892
    },
    {
      "epoch": 42.61,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 221904
    },
    {
      "epoch": 42.61,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 221916
    },
    {
      "epoch": 42.61,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 221928
    },
    {
      "epoch": 42.62,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 221940
    },
    {
      "epoch": 42.62,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 221952
    },
    {
      "epoch": 42.62,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 221964
    },
    {
      "epoch": 42.62,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 221976
    },
    {
      "epoch": 42.62,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 221988
    },
    {
      "epoch": 42.63,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 222000
    },
    {
      "epoch": 42.63,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 222012
    },
    {
      "epoch": 42.63,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 222024
    },
    {
      "epoch": 42.63,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 222036
    },
    {
      "epoch": 42.64,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 222048
    },
    {
      "epoch": 42.64,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 222060
    },
    {
      "epoch": 42.64,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 222072
    },
    {
      "epoch": 42.64,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 222084
    },
    {
      "epoch": 42.65,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 222096
    },
    {
      "epoch": 42.65,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 222108
    },
    {
      "epoch": 42.65,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 222120
    },
    {
      "epoch": 42.65,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 222132
    },
    {
      "epoch": 42.65,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 222144
    },
    {
      "epoch": 42.66,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 222156
    },
    {
      "epoch": 42.66,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 222168
    },
    {
      "epoch": 42.66,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 222180
    },
    {
      "epoch": 42.66,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 222192
    },
    {
      "epoch": 42.67,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 222204
    },
    {
      "epoch": 42.67,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 222216
    },
    {
      "epoch": 42.67,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 222228
    },
    {
      "epoch": 42.67,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 222240
    },
    {
      "epoch": 42.68,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 222252
    },
    {
      "epoch": 42.68,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 222264
    },
    {
      "epoch": 42.68,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 222276
    },
    {
      "epoch": 42.68,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 222288
    },
    {
      "epoch": 42.68,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 222300
    },
    {
      "epoch": 42.69,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 222312
    },
    {
      "epoch": 42.69,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 222324
    },
    {
      "epoch": 42.69,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 222336
    },
    {
      "epoch": 42.69,
      "learning_rate": 0.001,
      "loss": 2.5018,
      "step": 222348
    },
    {
      "epoch": 42.7,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 222360
    },
    {
      "epoch": 42.7,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 222372
    },
    {
      "epoch": 42.7,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 222384
    },
    {
      "epoch": 42.7,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 222396
    },
    {
      "epoch": 42.71,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 222408
    },
    {
      "epoch": 42.71,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 222420
    },
    {
      "epoch": 42.71,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 222432
    },
    {
      "epoch": 42.71,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 222444
    },
    {
      "epoch": 42.71,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 222456
    },
    {
      "epoch": 42.72,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 222468
    },
    {
      "epoch": 42.72,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 222480
    },
    {
      "epoch": 42.72,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 222492
    },
    {
      "epoch": 42.72,
      "eval_ag_news_accuracy": 0.3299375,
      "eval_ag_news_bleu_score": 4.957281724314242,
      "eval_ag_news_bleu_score_sem": 0.1576714803685843,
      "eval_ag_news_emb_cos_sim": 0.8234381079673767,
      "eval_ag_news_emb_cos_sim_sem": 0.0070108898566753316,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4598145484924316,
      "eval_ag_news_n_ngrams_match_1": 14.418,
      "eval_ag_news_n_ngrams_match_2": 3.27,
      "eval_ag_news_n_ngrams_match_3": 0.916,
      "eval_ag_news_num_pred_words": 46.814,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.81107655550185,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36017806505021455,
      "eval_ag_news_runtime": 12.0675,
      "eval_ag_news_samples_per_second": 41.433,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.3598791809241332,
      "eval_ag_news_token_set_f1_sem": 0.0044815531472226855,
      "eval_ag_news_token_set_precision": 0.34719809803693247,
      "eval_ag_news_token_set_recall": 0.38810075036977765,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 222500
    },
    {
      "epoch": 42.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.11553125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.243367604952888,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12098285126094625,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6762825846672058,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00964989567839423,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1925556659698486,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.288,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.016,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.55,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.350579937250426,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21756804837210206,
      "eval_anthropic_toxic_prompts_runtime": 11.5352,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.345,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3608361362194086,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006606240899031136,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44403121576391713,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33108519344120535,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 222500
    },
    {
      "epoch": 42.72,
      "eval_arxiv_accuracy": 0.35440625,
      "eval_arxiv_bleu_score": 4.711477931346029,
      "eval_arxiv_bleu_score_sem": 0.13771927043094787,
      "eval_arxiv_emb_cos_sim": 0.7978407144546509,
      "eval_arxiv_emb_cos_sim_sem": 0.00543953548908875,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.32669997215271,
      "eval_arxiv_n_ngrams_match_1": 15.898,
      "eval_arxiv_n_ngrams_match_2": 3.302,
      "eval_arxiv_n_ngrams_match_3": 0.78,
      "eval_arxiv_num_pred_words": 41.862,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.846296357243762,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37944273905750814,
      "eval_arxiv_runtime": 11.7566,
      "eval_arxiv_samples_per_second": 42.529,
      "eval_arxiv_steps_per_second": 0.085,
      "eval_arxiv_token_set_f1": 0.3715815188929824,
      "eval_arxiv_token_set_f1_sem": 0.004152283815241085,
      "eval_arxiv_token_set_precision": 0.3245473206361563,
      "eval_arxiv_token_set_recall": 0.4483371909096113,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 222500
    },
    {
      "epoch": 42.72,
      "eval_python_code_alpaca_accuracy": 0.1616875,
      "eval_python_code_alpaca_bleu_score": 4.64954031901907,
      "eval_python_code_alpaca_bleu_score_sem": 0.1478891727972229,
      "eval_python_code_alpaca_emb_cos_sim": 0.7676364183425903,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007114602714148514,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8402645587921143,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.83,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.95,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.996,
      "eval_python_code_alpaca_num_pred_words": 43.75,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.120294262433127,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33306373010805557,
      "eval_python_code_alpaca_runtime": 11.2868,
      "eval_python_code_alpaca_samples_per_second": 44.299,
      "eval_python_code_alpaca_steps_per_second": 0.089,
      "eval_python_code_alpaca_token_set_f1": 0.48378070866892925,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005431199677241341,
      "eval_python_code_alpaca_token_set_precision": 0.5361468376670413,
      "eval_python_code_alpaca_token_set_recall": 0.4645847218685237,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 222500
    },
    {
      "epoch": 42.72,
      "eval_wikibio_accuracy": 0.33321875,
      "eval_wikibio_bleu_score": 6.246396044227107,
      "eval_wikibio_bleu_score_sem": 0.23267457994467938,
      "eval_wikibio_emb_cos_sim": 0.7441648244857788,
      "eval_wikibio_emb_cos_sim_sem": 0.009467850927330027,
      "eval_wikibio_emb_top1_equal": 0.25,
      "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6119182109832764,
      "eval_wikibio_n_ngrams_match_1": 10.27,
      "eval_wikibio_n_ngrams_match_2": 3.496,
      "eval_wikibio_n_ngrams_match_3": 1.312,
      "eval_wikibio_num_pred_words": 36.118,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.03702955575884,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3639763860351517,
      "eval_wikibio_runtime": 12.2257,
      "eval_wikibio_samples_per_second": 40.897,
      "eval_wikibio_steps_per_second": 0.082,
      "eval_wikibio_token_set_f1": 0.32728343043332825,
      "eval_wikibio_token_set_f1_sem": 0.005672756190866395,
      "eval_wikibio_token_set_precision": 0.33440982020200843,
      "eval_wikibio_token_set_recall": 0.33643955165193756,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 222500
    },
    {
      "epoch": 42.72,
      "eval_nq_accuracy": 0.5366875,
      "eval_nq_bleu_score": 12.435255426330572,
      "eval_nq_bleu_score_sem": 0.5029275654245562,
      "eval_nq_emb_cos_sim": 0.8402013182640076,
      "eval_nq_emb_cos_sim_sem": 0.006868461167577845,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1157002449035645,
      "eval_nq_n_ngrams_match_1": 23.73,
      "eval_nq_n_ngrams_match_2": 8.882,
      "eval_nq_n_ngrams_match_3": 4.206,
      "eval_nq_num_pred_words": 49.256,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.295392539148946,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45838952591589066,
      "eval_nq_runtime": 13.0358,
      "eval_nq_samples_per_second": 38.356,
      "eval_nq_steps_per_second": 0.077,
      "eval_nq_token_set_f1": 0.4714923727026808,
      "eval_nq_token_set_f1_sem": 0.004964865144185788,
      "eval_nq_token_set_precision": 0.43091017470444565,
      "eval_nq_token_set_recall": 0.5288284008955988,
      "eval_nq_true_num_tokens": 64.0,
      "step": 222500
    },
    {
      "epoch": 42.72,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 222504
    },
    {
      "epoch": 42.73,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 222516
    },
    {
      "epoch": 42.73,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 222528
    },
    {
      "epoch": 42.73,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 222540
    },
    {
      "epoch": 42.73,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 222552
    },
    {
      "epoch": 42.74,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 222564
    },
    {
      "epoch": 42.74,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 222576
    },
    {
      "epoch": 42.74,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 222588
    },
    {
      "epoch": 42.74,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 222600
    },
    {
      "epoch": 42.74,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 222612
    },
    {
      "epoch": 42.75,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 222624
    },
    {
      "epoch": 42.75,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 222636
    },
    {
      "epoch": 42.75,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 222648
    },
    {
      "epoch": 42.75,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 222660
    },
    {
      "epoch": 42.76,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 222672
    },
    {
      "epoch": 42.76,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 222684
    },
    {
      "epoch": 42.76,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 222696
    },
    {
      "epoch": 42.76,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 222708
    },
    {
      "epoch": 42.76,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 222720
    },
    {
      "epoch": 42.77,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 222732
    },
    {
      "epoch": 42.77,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 222744
    },
    {
      "epoch": 42.77,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 222756
    },
    {
      "epoch": 42.77,
      "learning_rate": 0.001,
      "loss": 2.5032,
      "step": 222768
    },
    {
      "epoch": 42.78,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 222780
    },
    {
      "epoch": 42.78,
      "learning_rate": 0.001,
      "loss": 2.5074,
      "step": 222792
    },
    {
      "epoch": 42.78,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 222804
    },
    {
      "epoch": 42.78,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 222816
    },
    {
      "epoch": 42.79,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 222828
    },
    {
      "epoch": 42.79,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 222840
    },
    {
      "epoch": 42.79,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 222852
    },
    {
      "epoch": 42.79,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 222864
    },
    {
      "epoch": 42.79,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 222876
    },
    {
      "epoch": 42.8,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 222888
    },
    {
      "epoch": 42.8,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 222900
    },
    {
      "epoch": 42.8,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 222912
    },
    {
      "epoch": 42.8,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 222924
    },
    {
      "epoch": 42.81,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 222936
    },
    {
      "epoch": 42.81,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 222948
    },
    {
      "epoch": 42.81,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 222960
    },
    {
      "epoch": 42.81,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 222972
    },
    {
      "epoch": 42.82,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 222984
    },
    {
      "epoch": 42.82,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 222996
    },
    {
      "epoch": 42.82,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 223008
    },
    {
      "epoch": 42.82,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 223020
    },
    {
      "epoch": 42.82,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 223032
    },
    {
      "epoch": 42.83,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 223044
    },
    {
      "epoch": 42.83,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 223056
    },
    {
      "epoch": 42.83,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 223068
    },
    {
      "epoch": 42.83,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 223080
    },
    {
      "epoch": 42.84,
      "learning_rate": 0.001,
      "loss": 2.5015,
      "step": 223092
    },
    {
      "epoch": 42.84,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 223104
    },
    {
      "epoch": 42.84,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 223116
    },
    {
      "epoch": 42.84,
      "eval_ag_news_accuracy": 0.329625,
      "eval_ag_news_bleu_score": 5.0845973097105235,
      "eval_ag_news_bleu_score_sem": 0.16688284002521692,
      "eval_ag_news_emb_cos_sim": 0.8280059099197388,
      "eval_ag_news_emb_cos_sim_sem": 0.006455267952756216,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4598050117492676,
      "eval_ag_news_n_ngrams_match_1": 14.43,
      "eval_ag_news_n_ngrams_match_2": 3.336,
      "eval_ag_news_n_ngrams_match_3": 0.954,
      "eval_ag_news_num_pred_words": 46.694,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.81077318288156,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36014041634686855,
      "eval_ag_news_runtime": 13.8194,
      "eval_ag_news_samples_per_second": 36.181,
      "eval_ag_news_steps_per_second": 0.072,
      "eval_ag_news_token_set_f1": 0.3611360384805379,
      "eval_ag_news_token_set_f1_sem": 0.004262939844210537,
      "eval_ag_news_token_set_precision": 0.34730017644891503,
      "eval_ag_news_token_set_recall": 0.3891751187326431,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 223125
    },
    {
      "epoch": 42.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.115125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.10029998843007,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11299300448272312,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6762984991073608,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009655929263176368,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1868436336517334,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.34,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.934,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.708,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.294,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.211885129399445,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21753376397019134,
      "eval_anthropic_toxic_prompts_runtime": 14.0482,
      "eval_anthropic_toxic_prompts_samples_per_second": 35.592,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.071,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35845704877558726,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006505158292638676,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4431830318466098,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32807588811699495,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 223125
    },
    {
      "epoch": 42.84,
      "eval_arxiv_accuracy": 0.35278125,
      "eval_arxiv_bleu_score": 4.479037340466978,
      "eval_arxiv_bleu_score_sem": 0.12893957314213272,
      "eval_arxiv_emb_cos_sim": 0.7784118056297302,
      "eval_arxiv_emb_cos_sim_sem": 0.007566188415241029,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3270487785339355,
      "eval_arxiv_n_ngrams_match_1": 15.732,
      "eval_arxiv_n_ngrams_match_2": 3.084,
      "eval_arxiv_n_ngrams_match_3": 0.694,
      "eval_arxiv_num_pred_words": 41.168,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.856011017275875,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3737216125632223,
      "eval_arxiv_runtime": 13.3723,
      "eval_arxiv_samples_per_second": 37.391,
      "eval_arxiv_steps_per_second": 0.075,
      "eval_arxiv_token_set_f1": 0.3697991330743102,
      "eval_arxiv_token_set_f1_sem": 0.004085243099712074,
      "eval_arxiv_token_set_precision": 0.322918915249449,
      "eval_arxiv_token_set_recall": 0.44783622908033627,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 223125
    },
    {
      "epoch": 42.84,
      "eval_python_code_alpaca_accuracy": 0.1625625,
      "eval_python_code_alpaca_bleu_score": 4.564035888879271,
      "eval_python_code_alpaca_bleu_score_sem": 0.1499901868215338,
      "eval_python_code_alpaca_emb_cos_sim": 0.7590149641036987,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00816076717807437,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8386123180389404,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.664,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.886,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.974,
      "eval_python_code_alpaca_num_pred_words": 43.176,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.092030770023108,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33195857535070894,
      "eval_python_code_alpaca_runtime": 11.2848,
      "eval_python_code_alpaca_samples_per_second": 44.308,
      "eval_python_code_alpaca_steps_per_second": 0.089,
      "eval_python_code_alpaca_token_set_f1": 0.4719959598813166,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005649899719237582,
      "eval_python_code_alpaca_token_set_precision": 0.5290801912013479,
      "eval_python_code_alpaca_token_set_recall": 0.45008603053897156,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 223125
    },
    {
      "epoch": 42.84,
      "eval_wikibio_accuracy": 0.33184375,
      "eval_wikibio_bleu_score": 6.034415948747848,
      "eval_wikibio_bleu_score_sem": 0.21408917625230753,
      "eval_wikibio_emb_cos_sim": 0.7679538726806641,
      "eval_wikibio_emb_cos_sim_sem": 0.007276560540630229,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.604398012161255,
      "eval_wikibio_n_ngrams_match_1": 10.258,
      "eval_wikibio_n_ngrams_match_2": 3.46,
      "eval_wikibio_n_ngrams_match_3": 1.284,
      "eval_wikibio_num_pred_words": 36.104,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.7595483941818,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3629422883793961,
      "eval_wikibio_runtime": 13.8163,
      "eval_wikibio_samples_per_second": 36.189,
      "eval_wikibio_steps_per_second": 0.072,
      "eval_wikibio_token_set_f1": 0.32519809615490564,
      "eval_wikibio_token_set_f1_sem": 0.0053623959011498915,
      "eval_wikibio_token_set_precision": 0.3337896443538468,
      "eval_wikibio_token_set_recall": 0.33441333025866604,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 223125
    },
    {
      "epoch": 42.84,
      "eval_nq_accuracy": 0.5374375,
      "eval_nq_bleu_score": 12.29211278980963,
      "eval_nq_bleu_score_sem": 0.4991774962922198,
      "eval_nq_emb_cos_sim": 0.8381524682044983,
      "eval_nq_emb_cos_sim_sem": 0.007257072233005496,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.115370035171509,
      "eval_nq_n_ngrams_match_1": 23.59,
      "eval_nq_n_ngrams_match_2": 8.888,
      "eval_nq_n_ngrams_match_3": 4.13,
      "eval_nq_num_pred_words": 49.33,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.292653772009963,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4579031525484577,
      "eval_nq_runtime": 12.2839,
      "eval_nq_samples_per_second": 40.704,
      "eval_nq_steps_per_second": 0.081,
      "eval_nq_token_set_f1": 0.4719038445739792,
      "eval_nq_token_set_f1_sem": 0.004926442645843498,
      "eval_nq_token_set_precision": 0.43049878518161966,
      "eval_nq_token_set_recall": 0.5304848033198145,
      "eval_nq_true_num_tokens": 64.0,
      "step": 223125
    },
    {
      "epoch": 42.84,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 223128
    },
    {
      "epoch": 42.85,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 223140
    },
    {
      "epoch": 42.85,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 223152
    },
    {
      "epoch": 42.85,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 223164
    },
    {
      "epoch": 42.85,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 223176
    },
    {
      "epoch": 42.85,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 223188
    },
    {
      "epoch": 42.86,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 223200
    },
    {
      "epoch": 42.86,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 223212
    },
    {
      "epoch": 42.86,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 223224
    },
    {
      "epoch": 42.86,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 223236
    },
    {
      "epoch": 42.87,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 223248
    },
    {
      "epoch": 42.87,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 223260
    },
    {
      "epoch": 42.87,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 223272
    },
    {
      "epoch": 42.87,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 223284
    },
    {
      "epoch": 42.88,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 223296
    },
    {
      "epoch": 42.88,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 223308
    },
    {
      "epoch": 42.88,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 223320
    },
    {
      "epoch": 42.88,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 223332
    },
    {
      "epoch": 42.88,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 223344
    },
    {
      "epoch": 42.89,
      "learning_rate": 0.001,
      "loss": 2.5024,
      "step": 223356
    },
    {
      "epoch": 42.89,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 223368
    },
    {
      "epoch": 42.89,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 223380
    },
    {
      "epoch": 42.89,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 223392
    },
    {
      "epoch": 42.9,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 223404
    },
    {
      "epoch": 42.9,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 223416
    },
    {
      "epoch": 42.9,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 223428
    },
    {
      "epoch": 42.9,
      "learning_rate": 0.001,
      "loss": 2.5005,
      "step": 223440
    },
    {
      "epoch": 42.91,
      "learning_rate": 0.001,
      "loss": 2.5011,
      "step": 223452
    },
    {
      "epoch": 42.91,
      "learning_rate": 0.001,
      "loss": 2.5093,
      "step": 223464
    },
    {
      "epoch": 42.91,
      "learning_rate": 0.001,
      "loss": 2.503,
      "step": 223476
    },
    {
      "epoch": 42.91,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 223488
    },
    {
      "epoch": 42.91,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 223500
    },
    {
      "epoch": 42.92,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 223512
    },
    {
      "epoch": 42.92,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 223524
    },
    {
      "epoch": 42.92,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 223536
    },
    {
      "epoch": 42.92,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 223548
    },
    {
      "epoch": 42.93,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 223560
    },
    {
      "epoch": 42.93,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 223572
    },
    {
      "epoch": 42.93,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 223584
    },
    {
      "epoch": 42.93,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 223596
    },
    {
      "epoch": 42.94,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 223608
    },
    {
      "epoch": 42.94,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 223620
    },
    {
      "epoch": 42.94,
      "learning_rate": 0.001,
      "loss": 2.5055,
      "step": 223632
    },
    {
      "epoch": 42.94,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 223644
    },
    {
      "epoch": 42.94,
      "learning_rate": 0.001,
      "loss": 2.5004,
      "step": 223656
    },
    {
      "epoch": 42.95,
      "learning_rate": 0.001,
      "loss": 2.5036,
      "step": 223668
    },
    {
      "epoch": 42.95,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 223680
    },
    {
      "epoch": 42.95,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 223692
    },
    {
      "epoch": 42.95,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 223704
    },
    {
      "epoch": 42.96,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 223716
    },
    {
      "epoch": 42.96,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 223728
    },
    {
      "epoch": 42.96,
      "learning_rate": 0.001,
      "loss": 2.5061,
      "step": 223740
    },
    {
      "epoch": 42.96,
      "eval_ag_news_accuracy": 0.32746875,
      "eval_ag_news_bleu_score": 4.947097241774946,
      "eval_ag_news_bleu_score_sem": 0.1586502852056849,
      "eval_ag_news_emb_cos_sim": 0.8249166011810303,
      "eval_ag_news_emb_cos_sim_sem": 0.005872600942983266,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4830172061920166,
      "eval_ag_news_n_ngrams_match_1": 14.486,
      "eval_ag_news_n_ngrams_match_2": 3.228,
      "eval_ag_news_n_ngrams_match_3": 0.948,
      "eval_ag_news_num_pred_words": 47.162,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.557807648089735,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3581210894144573,
      "eval_ag_news_runtime": 12.0003,
      "eval_ag_news_samples_per_second": 41.666,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.3576250521731259,
      "eval_ag_news_token_set_f1_sem": 0.004230513746466198,
      "eval_ag_news_token_set_precision": 0.34621165517510566,
      "eval_ag_news_token_set_recall": 0.38173293351991555,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 223750
    },
    {
      "epoch": 42.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.1156875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1715246929844176,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11695843213029491,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6847228407859802,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0092048341777423,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1856446266174316,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.496,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.016,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.76,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.972,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.182872305581288,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2197862691566087,
      "eval_anthropic_toxic_prompts_runtime": 11.1205,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.962,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.09,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36024315246900496,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006542771600281045,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4580979423416351,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32226907155964357,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 223750
    },
    {
      "epoch": 42.96,
      "eval_arxiv_accuracy": 0.352375,
      "eval_arxiv_bleu_score": 4.4939667133590175,
      "eval_arxiv_bleu_score_sem": 0.1291008210131496,
      "eval_arxiv_emb_cos_sim": 0.7866687774658203,
      "eval_arxiv_emb_cos_sim_sem": 0.006545410842097907,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.330798864364624,
      "eval_arxiv_n_ngrams_match_1": 15.634,
      "eval_arxiv_n_ngrams_match_2": 3.092,
      "eval_arxiv_n_ngrams_match_3": 0.698,
      "eval_arxiv_num_pred_words": 41.036,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.960669566109463,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.372381418176841,
      "eval_arxiv_runtime": 12.9733,
      "eval_arxiv_samples_per_second": 38.541,
      "eval_arxiv_steps_per_second": 0.077,
      "eval_arxiv_token_set_f1": 0.3645117870270614,
      "eval_arxiv_token_set_f1_sem": 0.004115987123984987,
      "eval_arxiv_token_set_precision": 0.3182601159337704,
      "eval_arxiv_token_set_recall": 0.4422560015471147,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 223750
    },
    {
      "epoch": 42.96,
      "eval_python_code_alpaca_accuracy": 0.1630625,
      "eval_python_code_alpaca_bleu_score": 4.851000368179831,
      "eval_python_code_alpaca_bleu_score_sem": 0.15934825409434514,
      "eval_python_code_alpaca_emb_cos_sim": 0.7696998715400696,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007736320096155824,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.846017599105835,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.116,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.044,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.066,
      "eval_python_code_alpaca_num_pred_words": 43.738,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.21907186884294,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.345137885265147,
      "eval_python_code_alpaca_runtime": 10.997,
      "eval_python_code_alpaca_samples_per_second": 45.467,
      "eval_python_code_alpaca_steps_per_second": 0.091,
      "eval_python_code_alpaca_token_set_f1": 0.4828662986513079,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005473060353367226,
      "eval_python_code_alpaca_token_set_precision": 0.5516071864190307,
      "eval_python_code_alpaca_token_set_recall": 0.44712544174120716,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 223750
    },
    {
      "epoch": 42.96,
      "eval_wikibio_accuracy": 0.328625,
      "eval_wikibio_bleu_score": 6.278204611582496,
      "eval_wikibio_bleu_score_sem": 0.22794619497576218,
      "eval_wikibio_emb_cos_sim": 0.7612966895103455,
      "eval_wikibio_emb_cos_sim_sem": 0.00870326099722685,
      "eval_wikibio_emb_top1_equal": 0.265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03919146934646163,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.65120267868042,
      "eval_wikibio_n_ngrams_match_1": 10.618,
      "eval_wikibio_n_ngrams_match_2": 3.63,
      "eval_wikibio_n_ngrams_match_3": 1.366,
      "eval_wikibio_num_pred_words": 37.756,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.52096654635519,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.37142049220597984,
      "eval_wikibio_runtime": 11.4259,
      "eval_wikibio_samples_per_second": 43.76,
      "eval_wikibio_steps_per_second": 0.088,
      "eval_wikibio_token_set_f1": 0.329697247099366,
      "eval_wikibio_token_set_f1_sem": 0.004978581454158171,
      "eval_wikibio_token_set_precision": 0.34606009155171524,
      "eval_wikibio_token_set_recall": 0.32908385631534526,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 223750
    },
    {
      "epoch": 42.96,
      "eval_nq_accuracy": 0.537,
      "eval_nq_bleu_score": 11.926327075427237,
      "eval_nq_bleu_score_sem": 0.48943310267388396,
      "eval_nq_emb_cos_sim": 0.8354673385620117,
      "eval_nq_emb_cos_sim_sem": 0.007611759695193432,
      "eval_nq_emb_top1_equal": 0.234375,
      "eval_nq_emb_top1_equal_sem": 0.03758909358128201,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.117015838623047,
      "eval_nq_n_ngrams_match_1": 23.436,
      "eval_nq_n_ngrams_match_2": 8.714,
      "eval_nq_n_ngrams_match_3": 3.988,
      "eval_nq_num_pred_words": 49.166,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.306313087401342,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.454532471944197,
      "eval_nq_runtime": 12.5535,
      "eval_nq_samples_per_second": 39.83,
      "eval_nq_steps_per_second": 0.08,
      "eval_nq_token_set_f1": 0.46579723138269247,
      "eval_nq_token_set_f1_sem": 0.0050853466978272865,
      "eval_nq_token_set_precision": 0.4265639900605402,
      "eval_nq_token_set_recall": 0.5221931161974545,
      "eval_nq_true_num_tokens": 64.0,
      "step": 223750
    },
    {
      "epoch": 42.96,
      "learning_rate": 0.001,
      "loss": 2.5118,
      "step": 223752
    },
    {
      "epoch": 42.97,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 223764
    },
    {
      "epoch": 42.97,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 223776
    },
    {
      "epoch": 42.97,
      "learning_rate": 0.001,
      "loss": 2.5067,
      "step": 223788
    },
    {
      "epoch": 42.97,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 223800
    },
    {
      "epoch": 42.97,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 223812
    },
    {
      "epoch": 42.98,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 223824
    },
    {
      "epoch": 42.98,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 223836
    },
    {
      "epoch": 42.98,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 223848
    },
    {
      "epoch": 42.98,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 223860
    },
    {
      "epoch": 42.99,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 223872
    },
    {
      "epoch": 42.99,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 223884
    },
    {
      "epoch": 42.99,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 223896
    },
    {
      "epoch": 42.99,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 223908
    },
    {
      "epoch": 43.0,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 223920
    },
    {
      "epoch": 43.0,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 223932
    },
    {
      "epoch": 43.0,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 223944
    },
    {
      "epoch": 43.0,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 223956
    },
    {
      "epoch": 43.0,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 223968
    },
    {
      "epoch": 43.01,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 223980
    },
    {
      "epoch": 43.01,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 223992
    },
    {
      "epoch": 43.01,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 224004
    },
    {
      "epoch": 43.01,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 224016
    },
    {
      "epoch": 43.02,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 224028
    },
    {
      "epoch": 43.02,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 224040
    },
    {
      "epoch": 43.02,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 224052
    },
    {
      "epoch": 43.02,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 224064
    },
    {
      "epoch": 43.03,
      "learning_rate": 0.001,
      "loss": 2.4769,
      "step": 224076
    },
    {
      "epoch": 43.03,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 224088
    },
    {
      "epoch": 43.03,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 224100
    },
    {
      "epoch": 43.03,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 224112
    },
    {
      "epoch": 43.03,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 224124
    },
    {
      "epoch": 43.04,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 224136
    },
    {
      "epoch": 43.04,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 224148
    },
    {
      "epoch": 43.04,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 224160
    },
    {
      "epoch": 43.04,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 224172
    },
    {
      "epoch": 43.05,
      "learning_rate": 0.001,
      "loss": 2.4672,
      "step": 224184
    },
    {
      "epoch": 43.05,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 224196
    },
    {
      "epoch": 43.05,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 224208
    },
    {
      "epoch": 43.05,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 224220
    },
    {
      "epoch": 43.06,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 224232
    },
    {
      "epoch": 43.06,
      "learning_rate": 0.001,
      "loss": 2.473,
      "step": 224244
    },
    {
      "epoch": 43.06,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 224256
    },
    {
      "epoch": 43.06,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 224268
    },
    {
      "epoch": 43.06,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 224280
    },
    {
      "epoch": 43.07,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 224292
    },
    {
      "epoch": 43.07,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 224304
    },
    {
      "epoch": 43.07,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 224316
    },
    {
      "epoch": 43.07,
      "learning_rate": 0.001,
      "loss": 2.4741,
      "step": 224328
    },
    {
      "epoch": 43.08,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 224340
    },
    {
      "epoch": 43.08,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 224352
    },
    {
      "epoch": 43.08,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 224364
    },
    {
      "epoch": 43.08,
      "eval_ag_news_accuracy": 0.32909375,
      "eval_ag_news_bleu_score": 5.01400026409122,
      "eval_ag_news_bleu_score_sem": 0.15273230151708805,
      "eval_ag_news_emb_cos_sim": 0.8270438313484192,
      "eval_ag_news_emb_cos_sim_sem": 0.006000475798454334,
      "eval_ag_news_emb_top1_equal": 0.2890625,
      "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4638290405273438,
      "eval_ag_news_n_ngrams_match_1": 14.546,
      "eval_ag_news_n_ngrams_match_2": 3.248,
      "eval_ag_news_n_ngrams_match_3": 0.952,
      "eval_ag_news_num_pred_words": 46.74,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.939038548301856,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36438620367713326,
      "eval_ag_news_runtime": 14.4062,
      "eval_ag_news_samples_per_second": 34.707,
      "eval_ag_news_steps_per_second": 0.069,
      "eval_ag_news_token_set_f1": 0.36149512209529017,
      "eval_ag_news_token_set_f1_sem": 0.004451765491663545,
      "eval_ag_news_token_set_precision": 0.35044511747507473,
      "eval_ag_news_token_set_recall": 0.3856160775602759,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 224375
    },
    {
      "epoch": 43.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11690625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2510326416432935,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12059571502026227,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6830965280532837,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008142744790524407,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1818060874938965,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.394,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.004,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.2,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.090223336224096,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21946284326705479,
      "eval_anthropic_toxic_prompts_runtime": 13.856,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.085,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.072,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36363135023222964,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006391148538094834,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4533600216063258,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.328446911816258,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 224375
    },
    {
      "epoch": 43.08,
      "eval_arxiv_accuracy": 0.353625,
      "eval_arxiv_bleu_score": 4.445070120704116,
      "eval_arxiv_bleu_score_sem": 0.1327820442948291,
      "eval_arxiv_emb_cos_sim": 0.7907286882400513,
      "eval_arxiv_emb_cos_sim_sem": 0.0059995018111270275,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3163387775421143,
      "eval_arxiv_n_ngrams_match_1": 15.802,
      "eval_arxiv_n_ngrams_match_2": 3.086,
      "eval_arxiv_n_ngrams_match_3": 0.648,
      "eval_arxiv_num_pred_words": 40.676,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.559265023092298,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37799241386009425,
      "eval_arxiv_runtime": 14.3861,
      "eval_arxiv_samples_per_second": 34.756,
      "eval_arxiv_steps_per_second": 0.07,
      "eval_arxiv_token_set_f1": 0.3725749817647709,
      "eval_arxiv_token_set_f1_sem": 0.0043395176400665,
      "eval_arxiv_token_set_precision": 0.3257702584999924,
      "eval_arxiv_token_set_recall": 0.4500097548103646,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 224375
    },
    {
      "epoch": 43.08,
      "eval_python_code_alpaca_accuracy": 0.16240625,
      "eval_python_code_alpaca_bleu_score": 4.999721520727722,
      "eval_python_code_alpaca_bleu_score_sem": 0.1708161318990044,
      "eval_python_code_alpaca_emb_cos_sim": 0.7690632939338684,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006823445699190179,
      "eval_python_code_alpaca_emb_top1_equal": 0.1953125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.817096471786499,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.884,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.048,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.132,
      "eval_python_code_alpaca_num_pred_words": 42.718,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.72820926290417,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3441966316221813,
      "eval_python_code_alpaca_runtime": 13.8038,
      "eval_python_code_alpaca_samples_per_second": 36.222,
      "eval_python_code_alpaca_steps_per_second": 0.072,
      "eval_python_code_alpaca_token_set_f1": 0.47988166748242034,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005569815825814766,
      "eval_python_code_alpaca_token_set_precision": 0.5432677588892975,
      "eval_python_code_alpaca_token_set_recall": 0.45137410805920786,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 224375
    },
    {
      "epoch": 43.08,
      "eval_wikibio_accuracy": 0.33221875,
      "eval_wikibio_bleu_score": 6.163750457787334,
      "eval_wikibio_bleu_score_sem": 0.21996558162262686,
      "eval_wikibio_emb_cos_sim": 0.7439298629760742,
      "eval_wikibio_emb_cos_sim_sem": 0.009441347145061755,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.613781213760376,
      "eval_wikibio_n_ngrams_match_1": 10.222,
      "eval_wikibio_n_ngrams_match_2": 3.54,
      "eval_wikibio_n_ngrams_match_3": 1.316,
      "eval_wikibio_num_pred_words": 36.32,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.10609395828799,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35885054629553403,
      "eval_wikibio_runtime": 14.7465,
      "eval_wikibio_samples_per_second": 33.906,
      "eval_wikibio_steps_per_second": 0.068,
      "eval_wikibio_token_set_f1": 0.32249625868086373,
      "eval_wikibio_token_set_f1_sem": 0.0055730423306869295,
      "eval_wikibio_token_set_precision": 0.33306748991328516,
      "eval_wikibio_token_set_recall": 0.3294017772412334,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 224375
    },
    {
      "epoch": 43.08,
      "eval_nq_accuracy": 0.53696875,
      "eval_nq_bleu_score": 12.32116511533254,
      "eval_nq_bleu_score_sem": 0.5030054763386901,
      "eval_nq_emb_cos_sim": 0.8386412858963013,
      "eval_nq_emb_cos_sim_sem": 0.007426268780718956,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1135826110839844,
      "eval_nq_n_ngrams_match_1": 23.592,
      "eval_nq_n_ngrams_match_2": 8.828,
      "eval_nq_n_ngrams_match_3": 4.15,
      "eval_nq_num_pred_words": 48.634,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.277844522056236,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46018523243806136,
      "eval_nq_runtime": 14.6245,
      "eval_nq_samples_per_second": 34.189,
      "eval_nq_steps_per_second": 0.068,
      "eval_nq_token_set_f1": 0.4715927619269125,
      "eval_nq_token_set_f1_sem": 0.005178589142448564,
      "eval_nq_token_set_precision": 0.4307494796876743,
      "eval_nq_token_set_recall": 0.5300929339626383,
      "eval_nq_true_num_tokens": 64.0,
      "step": 224375
    },
    {
      "epoch": 43.08,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 224376
    },
    {
      "epoch": 43.09,
      "learning_rate": 0.001,
      "loss": 2.4797,
      "step": 224388
    },
    {
      "epoch": 43.09,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 224400
    },
    {
      "epoch": 43.09,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 224412
    },
    {
      "epoch": 43.09,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 224424
    },
    {
      "epoch": 43.09,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 224436
    },
    {
      "epoch": 43.1,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 224448
    },
    {
      "epoch": 43.1,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 224460
    },
    {
      "epoch": 43.1,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 224472
    },
    {
      "epoch": 43.1,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 224484
    },
    {
      "epoch": 43.11,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 224496
    },
    {
      "epoch": 43.11,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 224508
    },
    {
      "epoch": 43.11,
      "learning_rate": 0.001,
      "loss": 2.4743,
      "step": 224520
    },
    {
      "epoch": 43.11,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 224532
    },
    {
      "epoch": 43.12,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 224544
    },
    {
      "epoch": 43.12,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 224556
    },
    {
      "epoch": 43.12,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 224568
    },
    {
      "epoch": 43.12,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 224580
    },
    {
      "epoch": 43.12,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 224592
    },
    {
      "epoch": 43.13,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 224604
    },
    {
      "epoch": 43.13,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 224616
    },
    {
      "epoch": 43.13,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 224628
    },
    {
      "epoch": 43.13,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 224640
    },
    {
      "epoch": 43.14,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 224652
    },
    {
      "epoch": 43.14,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 224664
    },
    {
      "epoch": 43.14,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 224676
    },
    {
      "epoch": 43.14,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 224688
    },
    {
      "epoch": 43.15,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 224700
    },
    {
      "epoch": 43.15,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 224712
    },
    {
      "epoch": 43.15,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 224724
    },
    {
      "epoch": 43.15,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 224736
    },
    {
      "epoch": 43.15,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 224748
    },
    {
      "epoch": 43.16,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 224760
    },
    {
      "epoch": 43.16,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 224772
    },
    {
      "epoch": 43.16,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 224784
    },
    {
      "epoch": 43.16,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 224796
    },
    {
      "epoch": 43.17,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 224808
    },
    {
      "epoch": 43.17,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 224820
    },
    {
      "epoch": 43.17,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 224832
    },
    {
      "epoch": 43.17,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 224844
    },
    {
      "epoch": 43.18,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 224856
    },
    {
      "epoch": 43.18,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 224868
    },
    {
      "epoch": 43.18,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 224880
    },
    {
      "epoch": 43.18,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 224892
    },
    {
      "epoch": 43.18,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 224904
    },
    {
      "epoch": 43.19,
      "learning_rate": 0.001,
      "loss": 2.4725,
      "step": 224916
    },
    {
      "epoch": 43.19,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 224928
    },
    {
      "epoch": 43.19,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 224940
    },
    {
      "epoch": 43.19,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 224952
    },
    {
      "epoch": 43.2,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 224964
    },
    {
      "epoch": 43.2,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 224976
    },
    {
      "epoch": 43.2,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 224988
    },
    {
      "epoch": 43.2,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 225000
    },
    {
      "epoch": 43.2,
      "eval_ag_news_accuracy": 0.32753125,
      "eval_ag_news_bleu_score": 5.211276532848724,
      "eval_ag_news_bleu_score_sem": 0.17252462750866365,
      "eval_ag_news_emb_cos_sim": 0.8245654702186584,
      "eval_ag_news_emb_cos_sim_sem": 0.006948924111196698,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.474595308303833,
      "eval_ag_news_n_ngrams_match_1": 14.406,
      "eval_ag_news_n_ngrams_match_2": 3.338,
      "eval_ag_news_n_ngrams_match_3": 0.998,
      "eval_ag_news_num_pred_words": 46.486,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.28476051803534,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3598653902184827,
      "eval_ag_news_runtime": 14.3423,
      "eval_ag_news_samples_per_second": 34.862,
      "eval_ag_news_steps_per_second": 0.07,
      "eval_ag_news_token_set_f1": 0.36276770394489916,
      "eval_ag_news_token_set_f1_sem": 0.004413243434977232,
      "eval_ag_news_token_set_precision": 0.34711509780671834,
      "eval_ag_news_token_set_recall": 0.39569878543589954,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 225000
    },
    {
      "epoch": 43.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.11671875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0833680660235285,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11897582499138239,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6771214604377747,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009058558930173688,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1975910663604736,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.22,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.698,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.792,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.473504083394158,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21431399798056927,
      "eval_anthropic_toxic_prompts_runtime": 13.4659,
      "eval_anthropic_toxic_prompts_samples_per_second": 37.131,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.074,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3579694121206704,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006631391204199331,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4361442732108506,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3305398430656689,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 225000
    },
    {
      "epoch": 43.2,
      "eval_arxiv_accuracy": 0.352375,
      "eval_arxiv_bleu_score": 4.40550909040396,
      "eval_arxiv_bleu_score_sem": 0.13550989774980318,
      "eval_arxiv_emb_cos_sim": 0.7769677639007568,
      "eval_arxiv_emb_cos_sim_sem": 0.007418074511406127,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.324664831161499,
      "eval_arxiv_n_ngrams_match_1": 15.142,
      "eval_arxiv_n_ngrams_match_2": 3.048,
      "eval_arxiv_n_ngrams_match_3": 0.674,
      "eval_arxiv_num_pred_words": 39.704,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.789682845852717,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36540441175968996,
      "eval_arxiv_runtime": 14.2562,
      "eval_arxiv_samples_per_second": 35.072,
      "eval_arxiv_steps_per_second": 0.07,
      "eval_arxiv_token_set_f1": 0.3599004113280265,
      "eval_arxiv_token_set_f1_sem": 0.004475997533823196,
      "eval_arxiv_token_set_precision": 0.30979782520523863,
      "eval_arxiv_token_set_recall": 0.4507911260698354,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 225000
    },
    {
      "epoch": 43.2,
      "eval_python_code_alpaca_accuracy": 0.1651875,
      "eval_python_code_alpaca_bleu_score": 4.8531095230752275,
      "eval_python_code_alpaca_bleu_score_sem": 0.15412118315054213,
      "eval_python_code_alpaca_emb_cos_sim": 0.7674447298049927,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007438374804409203,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8156230449676514,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.856,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.028,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.066,
      "eval_python_code_alpaca_num_pred_words": 43.416,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.70357962018885,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3400893734969571,
      "eval_python_code_alpaca_runtime": 13.8528,
      "eval_python_code_alpaca_samples_per_second": 36.094,
      "eval_python_code_alpaca_steps_per_second": 0.072,
      "eval_python_code_alpaca_token_set_f1": 0.4799522378756545,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005705710467907103,
      "eval_python_code_alpaca_token_set_precision": 0.5392585395325675,
      "eval_python_code_alpaca_token_set_recall": 0.4538301922814778,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 225000
    },
    {
      "epoch": 43.2,
      "eval_wikibio_accuracy": 0.32965625,
      "eval_wikibio_bleu_score": 5.801037856657294,
      "eval_wikibio_bleu_score_sem": 0.2178655353761267,
      "eval_wikibio_emb_cos_sim": 0.7327523827552795,
      "eval_wikibio_emb_cos_sim_sem": 0.010329804687304925,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.601663112640381,
      "eval_wikibio_n_ngrams_match_1": 9.698,
      "eval_wikibio_n_ngrams_match_2": 3.238,
      "eval_wikibio_n_ngrams_match_3": 1.196,
      "eval_wikibio_num_pred_words": 34.858,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.65915207239483,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34634169436391915,
      "eval_wikibio_runtime": 14.9041,
      "eval_wikibio_samples_per_second": 33.548,
      "eval_wikibio_steps_per_second": 0.067,
      "eval_wikibio_token_set_f1": 0.3080959545079406,
      "eval_wikibio_token_set_f1_sem": 0.005920249124020165,
      "eval_wikibio_token_set_precision": 0.3151915658575392,
      "eval_wikibio_token_set_recall": 0.32133225752904643,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 225000
    },
    {
      "epoch": 43.2,
      "eval_nq_accuracy": 0.536625,
      "eval_nq_bleu_score": 11.7493652192864,
      "eval_nq_bleu_score_sem": 0.48137986508988995,
      "eval_nq_emb_cos_sim": 0.8319485187530518,
      "eval_nq_emb_cos_sim_sem": 0.00709220627397653,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1160738468170166,
      "eval_nq_n_ngrams_match_1": 23.244,
      "eval_nq_n_ngrams_match_2": 8.56,
      "eval_nq_n_ngrams_match_3": 3.894,
      "eval_nq_num_pred_words": 48.84,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.298492292675277,
      "eval_nq_pred_num_tokens": 62.9609375,
      "eval_nq_rouge_score": 0.45197330265622393,
      "eval_nq_runtime": 14.8599,
      "eval_nq_samples_per_second": 33.648,
      "eval_nq_steps_per_second": 0.067,
      "eval_nq_token_set_f1": 0.46469942027029504,
      "eval_nq_token_set_f1_sem": 0.004887356329479464,
      "eval_nq_token_set_precision": 0.42270919429197945,
      "eval_nq_token_set_recall": 0.5245490890095952,
      "eval_nq_true_num_tokens": 64.0,
      "step": 225000
    },
    {
      "epoch": 43.21,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 225012
    },
    {
      "epoch": 43.21,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 225024
    },
    {
      "epoch": 43.21,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 225036
    },
    {
      "epoch": 43.21,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 225048
    },
    {
      "epoch": 43.21,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 225060
    },
    {
      "epoch": 43.22,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 225072
    },
    {
      "epoch": 43.22,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 225084
    },
    {
      "epoch": 43.22,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 225096
    },
    {
      "epoch": 43.22,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 225108
    },
    {
      "epoch": 43.23,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 225120
    },
    {
      "epoch": 43.23,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 225132
    },
    {
      "epoch": 43.23,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 225144
    },
    {
      "epoch": 43.23,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 225156
    },
    {
      "epoch": 43.24,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 225168
    },
    {
      "epoch": 43.24,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 225180
    },
    {
      "epoch": 43.24,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 225192
    },
    {
      "epoch": 43.24,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 225204
    },
    {
      "epoch": 43.24,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 225216
    },
    {
      "epoch": 43.25,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 225228
    },
    {
      "epoch": 43.25,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 225240
    },
    {
      "epoch": 43.25,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 225252
    },
    {
      "epoch": 43.25,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 225264
    },
    {
      "epoch": 43.26,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 225276
    },
    {
      "epoch": 43.26,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 225288
    },
    {
      "epoch": 43.26,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 225300
    },
    {
      "epoch": 43.26,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 225312
    },
    {
      "epoch": 43.26,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 225324
    },
    {
      "epoch": 43.27,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 225336
    },
    {
      "epoch": 43.27,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 225348
    },
    {
      "epoch": 43.27,
      "learning_rate": 0.001,
      "loss": 2.4777,
      "step": 225360
    },
    {
      "epoch": 43.27,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 225372
    },
    {
      "epoch": 43.28,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 225384
    },
    {
      "epoch": 43.28,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 225396
    },
    {
      "epoch": 43.28,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 225408
    },
    {
      "epoch": 43.28,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 225420
    },
    {
      "epoch": 43.29,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 225432
    },
    {
      "epoch": 43.29,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 225444
    },
    {
      "epoch": 43.29,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 225456
    },
    {
      "epoch": 43.29,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 225468
    },
    {
      "epoch": 43.29,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 225480
    },
    {
      "epoch": 43.3,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 225492
    },
    {
      "epoch": 43.3,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 225504
    },
    {
      "epoch": 43.3,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 225516
    },
    {
      "epoch": 43.3,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 225528
    },
    {
      "epoch": 43.31,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 225540
    },
    {
      "epoch": 43.31,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 225552
    },
    {
      "epoch": 43.31,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 225564
    },
    {
      "epoch": 43.31,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 225576
    },
    {
      "epoch": 43.32,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 225588
    },
    {
      "epoch": 43.32,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 225600
    },
    {
      "epoch": 43.32,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 225612
    },
    {
      "epoch": 43.32,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 225624
    },
    {
      "epoch": 43.32,
      "eval_ag_news_accuracy": 0.329625,
      "eval_ag_news_bleu_score": 5.007209680712018,
      "eval_ag_news_bleu_score_sem": 0.15787546553026713,
      "eval_ag_news_emb_cos_sim": 0.8209670782089233,
      "eval_ag_news_emb_cos_sim_sem": 0.006888270844922473,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4715840816497803,
      "eval_ag_news_n_ngrams_match_1": 14.374,
      "eval_ag_news_n_ngrams_match_2": 3.198,
      "eval_ag_news_n_ngrams_match_3": 0.92,
      "eval_ag_news_num_pred_words": 46.454,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.18769001064209,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3594543254780415,
      "eval_ag_news_runtime": 14.1571,
      "eval_ag_news_samples_per_second": 35.318,
      "eval_ag_news_steps_per_second": 0.071,
      "eval_ag_news_token_set_f1": 0.35642959025874726,
      "eval_ag_news_token_set_f1_sem": 0.004416355497977135,
      "eval_ag_news_token_set_precision": 0.34342920762898865,
      "eval_ag_news_token_set_recall": 0.38472586235289224,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 225625
    },
    {
      "epoch": 43.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.11671875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.177581048954006,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11991898957594833,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6827985644340515,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009172370567676785,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2150938510894775,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.244,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.934,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.874,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.905629221920645,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21636918304036196,
      "eval_anthropic_toxic_prompts_runtime": 13.6736,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.567,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.073,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3588128479446289,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006505869840539364,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44135623305194505,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32780924040399356,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 225625
    },
    {
      "epoch": 43.32,
      "eval_arxiv_accuracy": 0.35303125,
      "eval_arxiv_bleu_score": 4.732146080760891,
      "eval_arxiv_bleu_score_sem": 0.13805026088782762,
      "eval_arxiv_emb_cos_sim": 0.7880035638809204,
      "eval_arxiv_emb_cos_sim_sem": 0.00648094855438951,
      "eval_arxiv_emb_top1_equal": 0.2265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3333144187927246,
      "eval_arxiv_n_ngrams_match_1": 15.832,
      "eval_arxiv_n_ngrams_match_2": 3.256,
      "eval_arxiv_n_ngrams_match_3": 0.788,
      "eval_arxiv_num_pred_words": 40.95,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.031094694233,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37843801869766436,
      "eval_arxiv_runtime": 13.7925,
      "eval_arxiv_samples_per_second": 36.252,
      "eval_arxiv_steps_per_second": 0.073,
      "eval_arxiv_token_set_f1": 0.37021707299515594,
      "eval_arxiv_token_set_f1_sem": 0.004351910512045278,
      "eval_arxiv_token_set_precision": 0.32470253058240534,
      "eval_arxiv_token_set_recall": 0.44605730211835404,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 225625
    },
    {
      "epoch": 43.32,
      "eval_python_code_alpaca_accuracy": 0.16225,
      "eval_python_code_alpaca_bleu_score": 4.50693783852999,
      "eval_python_code_alpaca_bleu_score_sem": 0.13918322989300988,
      "eval_python_code_alpaca_emb_cos_sim": 0.75345778465271,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008189933455788417,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.847130060195923,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.782,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.868,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.918,
      "eval_python_code_alpaca_num_pred_words": 42.844,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.238238075157213,
      "eval_python_code_alpaca_pred_num_tokens": 62.9921875,
      "eval_python_code_alpaca_rouge_score": 0.3344091486976238,
      "eval_python_code_alpaca_runtime": 15.4286,
      "eval_python_code_alpaca_samples_per_second": 32.407,
      "eval_python_code_alpaca_steps_per_second": 0.065,
      "eval_python_code_alpaca_token_set_f1": 0.47689932860094325,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005593843710542998,
      "eval_python_code_alpaca_token_set_precision": 0.5333945379294924,
      "eval_python_code_alpaca_token_set_recall": 0.4560878928593189,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 225625
    },
    {
      "epoch": 43.32,
      "eval_wikibio_accuracy": 0.32946875,
      "eval_wikibio_bleu_score": 6.262314292456134,
      "eval_wikibio_bleu_score_sem": 0.22480394580126178,
      "eval_wikibio_emb_cos_sim": 0.7432674765586853,
      "eval_wikibio_emb_cos_sim_sem": 0.009720175548286147,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6118967533111572,
      "eval_wikibio_n_ngrams_match_1": 10.194,
      "eval_wikibio_n_ngrams_match_2": 3.538,
      "eval_wikibio_n_ngrams_match_3": 1.36,
      "eval_wikibio_num_pred_words": 36.174,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.036234835848816,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3594818662399503,
      "eval_wikibio_runtime": 13.6846,
      "eval_wikibio_samples_per_second": 36.537,
      "eval_wikibio_steps_per_second": 0.073,
      "eval_wikibio_token_set_f1": 0.3231579133748274,
      "eval_wikibio_token_set_f1_sem": 0.005596099174528171,
      "eval_wikibio_token_set_precision": 0.33064577235064085,
      "eval_wikibio_token_set_recall": 0.3356395695646333,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 225625
    },
    {
      "epoch": 43.32,
      "eval_nq_accuracy": 0.536625,
      "eval_nq_bleu_score": 12.052785832010397,
      "eval_nq_bleu_score_sem": 0.48510503214570616,
      "eval_nq_emb_cos_sim": 0.8349946737289429,
      "eval_nq_emb_cos_sim_sem": 0.007090471009197996,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.112039566040039,
      "eval_nq_n_ngrams_match_1": 23.56,
      "eval_nq_n_ngrams_match_2": 8.664,
      "eval_nq_n_ngrams_match_3": 4.04,
      "eval_nq_num_pred_words": 49.128,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.265081284749389,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4570524754387344,
      "eval_nq_runtime": 15.1352,
      "eval_nq_samples_per_second": 33.036,
      "eval_nq_steps_per_second": 0.066,
      "eval_nq_token_set_f1": 0.4705470357107136,
      "eval_nq_token_set_f1_sem": 0.004879032714876459,
      "eval_nq_token_set_precision": 0.4311862561023063,
      "eval_nq_token_set_recall": 0.5261074936810667,
      "eval_nq_true_num_tokens": 64.0,
      "step": 225625
    },
    {
      "epoch": 43.32,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 225636
    },
    {
      "epoch": 43.33,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 225648
    },
    {
      "epoch": 43.33,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 225660
    },
    {
      "epoch": 43.33,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 225672
    },
    {
      "epoch": 43.33,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 225684
    },
    {
      "epoch": 43.34,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 225696
    },
    {
      "epoch": 43.34,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 225708
    },
    {
      "epoch": 43.34,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 225720
    },
    {
      "epoch": 43.34,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 225732
    },
    {
      "epoch": 43.35,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 225744
    },
    {
      "epoch": 43.35,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 225756
    },
    {
      "epoch": 43.35,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 225768
    },
    {
      "epoch": 43.35,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 225780
    },
    {
      "epoch": 43.35,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 225792
    },
    {
      "epoch": 43.36,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 225804
    },
    {
      "epoch": 43.36,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 225816
    },
    {
      "epoch": 43.36,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 225828
    },
    {
      "epoch": 43.36,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 225840
    },
    {
      "epoch": 43.37,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 225852
    },
    {
      "epoch": 43.37,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 225864
    },
    {
      "epoch": 43.37,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 225876
    },
    {
      "epoch": 43.37,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 225888
    },
    {
      "epoch": 43.38,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 225900
    },
    {
      "epoch": 43.38,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 225912
    },
    {
      "epoch": 43.38,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 225924
    },
    {
      "epoch": 43.38,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 225936
    },
    {
      "epoch": 43.38,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 225948
    },
    {
      "epoch": 43.39,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 225960
    },
    {
      "epoch": 43.39,
      "learning_rate": 0.001,
      "loss": 2.4762,
      "step": 225972
    },
    {
      "epoch": 43.39,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 225984
    },
    {
      "epoch": 43.39,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 225996
    },
    {
      "epoch": 43.4,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 226008
    },
    {
      "epoch": 43.4,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 226020
    },
    {
      "epoch": 43.4,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 226032
    },
    {
      "epoch": 43.4,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 226044
    },
    {
      "epoch": 43.41,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 226056
    },
    {
      "epoch": 43.41,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 226068
    },
    {
      "epoch": 43.41,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 226080
    },
    {
      "epoch": 43.41,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 226092
    },
    {
      "epoch": 43.41,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 226104
    },
    {
      "epoch": 43.42,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 226116
    },
    {
      "epoch": 43.42,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 226128
    },
    {
      "epoch": 43.42,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 226140
    },
    {
      "epoch": 43.42,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 226152
    },
    {
      "epoch": 43.43,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 226164
    },
    {
      "epoch": 43.43,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 226176
    },
    {
      "epoch": 43.43,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 226188
    },
    {
      "epoch": 43.43,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 226200
    },
    {
      "epoch": 43.44,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 226212
    },
    {
      "epoch": 43.44,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 226224
    },
    {
      "epoch": 43.44,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 226236
    },
    {
      "epoch": 43.44,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 226248
    },
    {
      "epoch": 43.44,
      "eval_ag_news_accuracy": 0.32978125,
      "eval_ag_news_bleu_score": 5.093676275547452,
      "eval_ag_news_bleu_score_sem": 0.16487175943824905,
      "eval_ag_news_emb_cos_sim": 0.8234910368919373,
      "eval_ag_news_emb_cos_sim_sem": 0.006803550411059513,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4731106758117676,
      "eval_ag_news_n_ngrams_match_1": 14.422,
      "eval_ag_news_n_ngrams_match_2": 3.276,
      "eval_ag_news_n_ngrams_match_3": 0.948,
      "eval_ag_news_num_pred_words": 46.11,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.23686507593382,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3623755152636988,
      "eval_ag_news_runtime": 14.5685,
      "eval_ag_news_samples_per_second": 34.321,
      "eval_ag_news_steps_per_second": 0.069,
      "eval_ag_news_token_set_f1": 0.36232721276381796,
      "eval_ag_news_token_set_f1_sem": 0.0043315819491174315,
      "eval_ag_news_token_set_precision": 0.3472226476482931,
      "eval_ag_news_token_set_recall": 0.39376838695199123,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 226250
    },
    {
      "epoch": 43.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.11546875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1573747343869236,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11883577326787367,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6744633913040161,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011031932987914956,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.20275616645813,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.954,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.214,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.600239199750323,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21727550300527126,
      "eval_anthropic_toxic_prompts_runtime": 18.2577,
      "eval_anthropic_toxic_prompts_samples_per_second": 27.386,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.055,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35934558975278397,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00662503788648407,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44541038352047774,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32663969420704203,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 226250
    },
    {
      "epoch": 43.44,
      "eval_arxiv_accuracy": 0.3524375,
      "eval_arxiv_bleu_score": 4.478099066227136,
      "eval_arxiv_bleu_score_sem": 0.1369632925480413,
      "eval_arxiv_emb_cos_sim": 0.7743604183197021,
      "eval_arxiv_emb_cos_sim_sem": 0.008632045335589091,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3318378925323486,
      "eval_arxiv_n_ngrams_match_1": 15.556,
      "eval_arxiv_n_ngrams_match_2": 3.076,
      "eval_arxiv_n_ngrams_match_3": 0.714,
      "eval_arxiv_num_pred_words": 40.726,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.98973658748906,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36978181886539296,
      "eval_arxiv_runtime": 19.7036,
      "eval_arxiv_samples_per_second": 25.376,
      "eval_arxiv_steps_per_second": 0.051,
      "eval_arxiv_token_set_f1": 0.3635901665162223,
      "eval_arxiv_token_set_f1_sem": 0.004390762577594476,
      "eval_arxiv_token_set_precision": 0.3169895731092615,
      "eval_arxiv_token_set_recall": 0.444598054307409,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 226250
    },
    {
      "epoch": 43.44,
      "eval_python_code_alpaca_accuracy": 0.1635,
      "eval_python_code_alpaca_bleu_score": 4.708267183592054,
      "eval_python_code_alpaca_bleu_score_sem": 0.14815326850460622,
      "eval_python_code_alpaca_emb_cos_sim": 0.7634607553482056,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00831298183856048,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.816610813140869,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.89,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.938,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.974,
      "eval_python_code_alpaca_num_pred_words": 42.422,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.720087035924255,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34077917287328185,
      "eval_python_code_alpaca_runtime": 21.2743,
      "eval_python_code_alpaca_samples_per_second": 23.503,
      "eval_python_code_alpaca_steps_per_second": 0.047,
      "eval_python_code_alpaca_token_set_f1": 0.4823328657908486,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005527231693143531,
      "eval_python_code_alpaca_token_set_precision": 0.5427530424492416,
      "eval_python_code_alpaca_token_set_recall": 0.4568862758040123,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 226250
    },
    {
      "epoch": 43.44,
      "eval_wikibio_accuracy": 0.33146875,
      "eval_wikibio_bleu_score": 6.2041723659798595,
      "eval_wikibio_bleu_score_sem": 0.21787592440732506,
      "eval_wikibio_emb_cos_sim": 0.7340708374977112,
      "eval_wikibio_emb_cos_sim_sem": 0.009662068215079783,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6151347160339355,
      "eval_wikibio_n_ngrams_match_1": 9.918,
      "eval_wikibio_n_ngrams_match_2": 3.428,
      "eval_wikibio_n_ngrams_match_3": 1.326,
      "eval_wikibio_num_pred_words": 35.022,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.15635114475906,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35082655990224754,
      "eval_wikibio_runtime": 20.946,
      "eval_wikibio_samples_per_second": 23.871,
      "eval_wikibio_steps_per_second": 0.048,
      "eval_wikibio_token_set_f1": 0.3189537830900528,
      "eval_wikibio_token_set_f1_sem": 0.005816014437589874,
      "eval_wikibio_token_set_precision": 0.3236274637690813,
      "eval_wikibio_token_set_recall": 0.3344237077877823,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 226250
    },
    {
      "epoch": 43.44,
      "eval_nq_accuracy": 0.5366875,
      "eval_nq_bleu_score": 12.050509228876638,
      "eval_nq_bleu_score_sem": 0.4944644192355625,
      "eval_nq_emb_cos_sim": 0.834195613861084,
      "eval_nq_emb_cos_sim_sem": 0.007457853892410824,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1161155700683594,
      "eval_nq_n_ngrams_match_1": 23.55,
      "eval_nq_n_ngrams_match_2": 8.74,
      "eval_nq_n_ngrams_match_3": 4.052,
      "eval_nq_num_pred_words": 49.232,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.298838539978203,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45668360891146675,
      "eval_nq_runtime": 14.6674,
      "eval_nq_samples_per_second": 34.089,
      "eval_nq_steps_per_second": 0.068,
      "eval_nq_token_set_f1": 0.4686929296459197,
      "eval_nq_token_set_f1_sem": 0.004831937761692989,
      "eval_nq_token_set_precision": 0.42789368343855533,
      "eval_nq_token_set_recall": 0.5263362551753112,
      "eval_nq_true_num_tokens": 64.0,
      "step": 226250
    },
    {
      "epoch": 43.44,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 226260
    },
    {
      "epoch": 43.45,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 226272
    },
    {
      "epoch": 43.45,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 226284
    },
    {
      "epoch": 43.45,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 226296
    },
    {
      "epoch": 43.45,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 226308
    },
    {
      "epoch": 43.46,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 226320
    },
    {
      "epoch": 43.46,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 226332
    },
    {
      "epoch": 43.46,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 226344
    },
    {
      "epoch": 43.46,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 226356
    },
    {
      "epoch": 43.47,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 226368
    },
    {
      "epoch": 43.47,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 226380
    },
    {
      "epoch": 43.47,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 226392
    },
    {
      "epoch": 43.47,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 226404
    },
    {
      "epoch": 43.47,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 226416
    },
    {
      "epoch": 43.48,
      "learning_rate": 0.001,
      "loss": 2.5025,
      "step": 226428
    },
    {
      "epoch": 43.48,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 226440
    },
    {
      "epoch": 43.48,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 226452
    },
    {
      "epoch": 43.48,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 226464
    },
    {
      "epoch": 43.49,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 226476
    },
    {
      "epoch": 43.49,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 226488
    },
    {
      "epoch": 43.49,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 226500
    },
    {
      "epoch": 43.49,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 226512
    },
    {
      "epoch": 43.5,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 226524
    },
    {
      "epoch": 43.5,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 226536
    },
    {
      "epoch": 43.5,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 226548
    },
    {
      "epoch": 43.5,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 226560
    },
    {
      "epoch": 43.5,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 226572
    },
    {
      "epoch": 43.51,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 226584
    },
    {
      "epoch": 43.51,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 226596
    },
    {
      "epoch": 43.51,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 226608
    },
    {
      "epoch": 43.51,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 226620
    },
    {
      "epoch": 43.52,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 226632
    },
    {
      "epoch": 43.52,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 226644
    },
    {
      "epoch": 43.52,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 226656
    },
    {
      "epoch": 43.52,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 226668
    },
    {
      "epoch": 43.53,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 226680
    },
    {
      "epoch": 43.53,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 226692
    },
    {
      "epoch": 43.53,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 226704
    },
    {
      "epoch": 43.53,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 226716
    },
    {
      "epoch": 43.53,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 226728
    },
    {
      "epoch": 43.54,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 226740
    },
    {
      "epoch": 43.54,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 226752
    },
    {
      "epoch": 43.54,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 226764
    },
    {
      "epoch": 43.54,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 226776
    },
    {
      "epoch": 43.55,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 226788
    },
    {
      "epoch": 43.55,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 226800
    },
    {
      "epoch": 43.55,
      "learning_rate": 0.001,
      "loss": 2.4797,
      "step": 226812
    },
    {
      "epoch": 43.55,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 226824
    },
    {
      "epoch": 43.56,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 226836
    },
    {
      "epoch": 43.56,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 226848
    },
    {
      "epoch": 43.56,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 226860
    },
    {
      "epoch": 43.56,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 226872
    },
    {
      "epoch": 43.56,
      "eval_ag_news_accuracy": 0.33009375,
      "eval_ag_news_bleu_score": 5.18683700258882,
      "eval_ag_news_bleu_score_sem": 0.16658588927471646,
      "eval_ag_news_emb_cos_sim": 0.8264328241348267,
      "eval_ag_news_emb_cos_sim_sem": 0.006069782998107959,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4740750789642334,
      "eval_ag_news_n_ngrams_match_1": 14.654,
      "eval_ag_news_n_ngrams_match_2": 3.392,
      "eval_ag_news_n_ngrams_match_3": 0.994,
      "eval_ag_news_num_pred_words": 47.484,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.26796940638506,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3635652825803162,
      "eval_ag_news_runtime": 15.423,
      "eval_ag_news_samples_per_second": 32.419,
      "eval_ag_news_steps_per_second": 0.065,
      "eval_ag_news_token_set_f1": 0.36300772399473347,
      "eval_ag_news_token_set_f1_sem": 0.004374503068819877,
      "eval_ag_news_token_set_precision": 0.35095653459588133,
      "eval_ag_news_token_set_recall": 0.3905892320102781,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 226875
    },
    {
      "epoch": 43.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.11665625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2283255266058406,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12286237103688849,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6831443309783936,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008379857033346125,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.191474199295044,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.48,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.028,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.892,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.324259831260143,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22156940193532354,
      "eval_anthropic_toxic_prompts_runtime": 20.7528,
      "eval_anthropic_toxic_prompts_samples_per_second": 24.093,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.048,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3656214365592688,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066540252995934655,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4576689392150905,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32922829780264373,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 226875
    },
    {
      "epoch": 43.56,
      "eval_arxiv_accuracy": 0.35121875,
      "eval_arxiv_bleu_score": 4.775489376411273,
      "eval_arxiv_bleu_score_sem": 0.1408563374964588,
      "eval_arxiv_emb_cos_sim": 0.7955030202865601,
      "eval_arxiv_emb_cos_sim_sem": 0.005562560822803033,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3303866386413574,
      "eval_arxiv_n_ngrams_match_1": 16.012,
      "eval_arxiv_n_ngrams_match_2": 3.28,
      "eval_arxiv_n_ngrams_match_3": 0.792,
      "eval_arxiv_num_pred_words": 41.938,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.949145834227096,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37894337090325375,
      "eval_arxiv_runtime": 14.2924,
      "eval_arxiv_samples_per_second": 34.984,
      "eval_arxiv_steps_per_second": 0.07,
      "eval_arxiv_token_set_f1": 0.3711074617169576,
      "eval_arxiv_token_set_f1_sem": 0.00393800402090114,
      "eval_arxiv_token_set_precision": 0.32621156166361315,
      "eval_arxiv_token_set_recall": 0.44156517817519597,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 226875
    },
    {
      "epoch": 43.56,
      "eval_python_code_alpaca_accuracy": 0.16315625,
      "eval_python_code_alpaca_bleu_score": 4.59472098123169,
      "eval_python_code_alpaca_bleu_score_sem": 0.13986066710656597,
      "eval_python_code_alpaca_emb_cos_sim": 0.7682456374168396,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006673759191070215,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.842379093170166,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.988,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.944,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.97,
      "eval_python_code_alpaca_num_pred_words": 43.942,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.156534014811434,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33862023276952685,
      "eval_python_code_alpaca_runtime": 14.7645,
      "eval_python_code_alpaca_samples_per_second": 33.865,
      "eval_python_code_alpaca_steps_per_second": 0.068,
      "eval_python_code_alpaca_token_set_f1": 0.48263569981446547,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0054130602868705255,
      "eval_python_code_alpaca_token_set_precision": 0.5457914115905191,
      "eval_python_code_alpaca_token_set_recall": 0.45169799507626196,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 226875
    },
    {
      "epoch": 43.56,
      "eval_wikibio_accuracy": 0.32953125,
      "eval_wikibio_bleu_score": 6.245418193755065,
      "eval_wikibio_bleu_score_sem": 0.21070933554292692,
      "eval_wikibio_emb_cos_sim": 0.7470102310180664,
      "eval_wikibio_emb_cos_sim_sem": 0.008467278158236323,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.647217273712158,
      "eval_wikibio_n_ngrams_match_1": 10.476,
      "eval_wikibio_n_ngrams_match_2": 3.57,
      "eval_wikibio_n_ngrams_match_3": 1.324,
      "eval_wikibio_num_pred_words": 36.954,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.36775041187095,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36972855906308055,
      "eval_wikibio_runtime": 13.4114,
      "eval_wikibio_samples_per_second": 37.282,
      "eval_wikibio_steps_per_second": 0.075,
      "eval_wikibio_token_set_f1": 0.3321133123839618,
      "eval_wikibio_token_set_f1_sem": 0.005077694500044495,
      "eval_wikibio_token_set_precision": 0.34170982891271984,
      "eval_wikibio_token_set_recall": 0.3383033697172483,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 226875
    },
    {
      "epoch": 43.56,
      "eval_nq_accuracy": 0.536375,
      "eval_nq_bleu_score": 12.183205604592198,
      "eval_nq_bleu_score_sem": 0.4936275432903302,
      "eval_nq_emb_cos_sim": 0.838262677192688,
      "eval_nq_emb_cos_sim_sem": 0.006786116103755178,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.113842248916626,
      "eval_nq_n_ngrams_match_1": 23.624,
      "eval_nq_n_ngrams_match_2": 8.814,
      "eval_nq_n_ngrams_match_3": 4.112,
      "eval_nq_num_pred_words": 48.95,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.279994042703253,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45859884429649944,
      "eval_nq_runtime": 15.0148,
      "eval_nq_samples_per_second": 33.3,
      "eval_nq_steps_per_second": 0.067,
      "eval_nq_token_set_f1": 0.47102018114009236,
      "eval_nq_token_set_f1_sem": 0.00488656495573706,
      "eval_nq_token_set_precision": 0.42948122136816175,
      "eval_nq_token_set_recall": 0.5287281667290759,
      "eval_nq_true_num_tokens": 64.0,
      "step": 226875
    },
    {
      "epoch": 43.56,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 226884
    },
    {
      "epoch": 43.57,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 226896
    },
    {
      "epoch": 43.57,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 226908
    },
    {
      "epoch": 43.57,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 226920
    },
    {
      "epoch": 43.57,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 226932
    },
    {
      "epoch": 43.58,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 226944
    },
    {
      "epoch": 43.58,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 226956
    },
    {
      "epoch": 43.58,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 226968
    },
    {
      "epoch": 43.58,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 226980
    },
    {
      "epoch": 43.59,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 226992
    },
    {
      "epoch": 43.59,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 227004
    },
    {
      "epoch": 43.59,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 227016
    },
    {
      "epoch": 43.59,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 227028
    },
    {
      "epoch": 43.59,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 227040
    },
    {
      "epoch": 43.6,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 227052
    },
    {
      "epoch": 43.6,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 227064
    },
    {
      "epoch": 43.6,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 227076
    },
    {
      "epoch": 43.6,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 227088
    },
    {
      "epoch": 43.61,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 227100
    },
    {
      "epoch": 43.61,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 227112
    },
    {
      "epoch": 43.61,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 227124
    },
    {
      "epoch": 43.61,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 227136
    },
    {
      "epoch": 43.62,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 227148
    },
    {
      "epoch": 43.62,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 227160
    },
    {
      "epoch": 43.62,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 227172
    },
    {
      "epoch": 43.62,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 227184
    },
    {
      "epoch": 43.62,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 227196
    },
    {
      "epoch": 43.63,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 227208
    },
    {
      "epoch": 43.63,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 227220
    },
    {
      "epoch": 43.63,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 227232
    },
    {
      "epoch": 43.63,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 227244
    },
    {
      "epoch": 43.64,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 227256
    },
    {
      "epoch": 43.64,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 227268
    },
    {
      "epoch": 43.64,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 227280
    },
    {
      "epoch": 43.64,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 227292
    },
    {
      "epoch": 43.65,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 227304
    },
    {
      "epoch": 43.65,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 227316
    },
    {
      "epoch": 43.65,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 227328
    },
    {
      "epoch": 43.65,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 227340
    },
    {
      "epoch": 43.65,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 227352
    },
    {
      "epoch": 43.66,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 227364
    },
    {
      "epoch": 43.66,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 227376
    },
    {
      "epoch": 43.66,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 227388
    },
    {
      "epoch": 43.66,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 227400
    },
    {
      "epoch": 43.67,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 227412
    },
    {
      "epoch": 43.67,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 227424
    },
    {
      "epoch": 43.67,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 227436
    },
    {
      "epoch": 43.67,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 227448
    },
    {
      "epoch": 43.68,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 227460
    },
    {
      "epoch": 43.68,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 227472
    },
    {
      "epoch": 43.68,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 227484
    },
    {
      "epoch": 43.68,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 227496
    },
    {
      "epoch": 43.68,
      "eval_ag_news_accuracy": 0.328875,
      "eval_ag_news_bleu_score": 5.286246946761519,
      "eval_ag_news_bleu_score_sem": 0.17030936643136907,
      "eval_ag_news_emb_cos_sim": 0.8202008008956909,
      "eval_ag_news_emb_cos_sim_sem": 0.0076707830638104,
      "eval_ag_news_emb_top1_equal": 0.3046875,
      "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4668161869049072,
      "eval_ag_news_n_ngrams_match_1": 14.472,
      "eval_ag_news_n_ngrams_match_2": 3.366,
      "eval_ag_news_n_ngrams_match_3": 0.982,
      "eval_ag_news_num_pred_words": 46.132,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.03458777031133,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3630351606190766,
      "eval_ag_news_runtime": 14.7917,
      "eval_ag_news_samples_per_second": 33.803,
      "eval_ag_news_steps_per_second": 0.068,
      "eval_ag_news_token_set_f1": 0.3604192821392029,
      "eval_ag_news_token_set_f1_sem": 0.004494872211112179,
      "eval_ag_news_token_set_precision": 0.3464709988923379,
      "eval_ag_news_token_set_recall": 0.39039131299477525,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 227500
    },
    {
      "epoch": 43.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.11578125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3022331245236614,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12581546669826427,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6830217242240906,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008841839176586163,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.210660219192505,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.414,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.048,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.786,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.246,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.79545125454489,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22190634144100985,
      "eval_anthropic_toxic_prompts_runtime": 14.2403,
      "eval_anthropic_toxic_prompts_samples_per_second": 35.112,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.07,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3617547289111145,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00640227351897671,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44939880893583517,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3286458522756195,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 227500
    },
    {
      "epoch": 43.68,
      "eval_arxiv_accuracy": 0.35290625,
      "eval_arxiv_bleu_score": 4.65388300473452,
      "eval_arxiv_bleu_score_sem": 0.14106568293216215,
      "eval_arxiv_emb_cos_sim": 0.7914077639579773,
      "eval_arxiv_emb_cos_sim_sem": 0.007106011342497372,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3320858478546143,
      "eval_arxiv_n_ngrams_match_1": 15.488,
      "eval_arxiv_n_ngrams_match_2": 3.122,
      "eval_arxiv_n_ngrams_match_3": 0.778,
      "eval_arxiv_num_pred_words": 40.636,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.99667765214614,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37164831168300744,
      "eval_arxiv_runtime": 13.837,
      "eval_arxiv_samples_per_second": 36.135,
      "eval_arxiv_steps_per_second": 0.072,
      "eval_arxiv_token_set_f1": 0.36418839645459333,
      "eval_arxiv_token_set_f1_sem": 0.00426535713540038,
      "eval_arxiv_token_set_precision": 0.31721653582457804,
      "eval_arxiv_token_set_recall": 0.44178076341031575,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 227500
    },
    {
      "epoch": 43.68,
      "eval_python_code_alpaca_accuracy": 0.16353125,
      "eval_python_code_alpaca_bleu_score": 4.67672936228896,
      "eval_python_code_alpaca_bleu_score_sem": 0.15054175835514544,
      "eval_python_code_alpaca_emb_cos_sim": 0.7713525295257568,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007235496997834966,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8360798358917236,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.832,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.89,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.978,
      "eval_python_code_alpaca_num_pred_words": 43.102,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.04880027057715,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3408123858704276,
      "eval_python_code_alpaca_runtime": 15.0396,
      "eval_python_code_alpaca_samples_per_second": 33.246,
      "eval_python_code_alpaca_steps_per_second": 0.066,
      "eval_python_code_alpaca_token_set_f1": 0.4810487994843043,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005479756175395937,
      "eval_python_code_alpaca_token_set_precision": 0.5396984804612589,
      "eval_python_code_alpaca_token_set_recall": 0.45620973247543833,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 227500
    },
    {
      "epoch": 43.68,
      "eval_wikibio_accuracy": 0.33303125,
      "eval_wikibio_bleu_score": 5.925104478803174,
      "eval_wikibio_bleu_score_sem": 0.218554434822167,
      "eval_wikibio_emb_cos_sim": 0.7368950843811035,
      "eval_wikibio_emb_cos_sim_sem": 0.01096662512905831,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6045265197753906,
      "eval_wikibio_n_ngrams_match_1": 9.858,
      "eval_wikibio_n_ngrams_match_2": 3.32,
      "eval_wikibio_n_ngrams_match_3": 1.228,
      "eval_wikibio_num_pred_words": 35.218,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.76427257958304,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34979286954505556,
      "eval_wikibio_runtime": 14.1959,
      "eval_wikibio_samples_per_second": 35.221,
      "eval_wikibio_steps_per_second": 0.07,
      "eval_wikibio_token_set_f1": 0.3159993821913835,
      "eval_wikibio_token_set_f1_sem": 0.005814667657070419,
      "eval_wikibio_token_set_precision": 0.3223637468900714,
      "eval_wikibio_token_set_recall": 0.32918955818752543,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 227500
    },
    {
      "epoch": 43.68,
      "eval_nq_accuracy": 0.537375,
      "eval_nq_bleu_score": 12.204728865234786,
      "eval_nq_bleu_score_sem": 0.48995800363840586,
      "eval_nq_emb_cos_sim": 0.8363226056098938,
      "eval_nq_emb_cos_sim_sem": 0.006792588279179632,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1125874519348145,
      "eval_nq_n_ngrams_match_1": 23.714,
      "eval_nq_n_ngrams_match_2": 8.848,
      "eval_nq_n_ngrams_match_3": 4.156,
      "eval_nq_num_pred_words": 48.752,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.269610846932784,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46150773248573296,
      "eval_nq_runtime": 14.5262,
      "eval_nq_samples_per_second": 34.421,
      "eval_nq_steps_per_second": 0.069,
      "eval_nq_token_set_f1": 0.472206477608648,
      "eval_nq_token_set_f1_sem": 0.00487267736465192,
      "eval_nq_token_set_precision": 0.43085782040690107,
      "eval_nq_token_set_recall": 0.529571015474834,
      "eval_nq_true_num_tokens": 64.0,
      "step": 227500
    },
    {
      "epoch": 43.68,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 227508
    },
    {
      "epoch": 43.69,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 227520
    },
    {
      "epoch": 43.69,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 227532
    },
    {
      "epoch": 43.69,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 227544
    },
    {
      "epoch": 43.69,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 227556
    },
    {
      "epoch": 43.7,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 227568
    },
    {
      "epoch": 43.7,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 227580
    },
    {
      "epoch": 43.7,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 227592
    },
    {
      "epoch": 43.7,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 227604
    },
    {
      "epoch": 43.71,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 227616
    },
    {
      "epoch": 43.71,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 227628
    },
    {
      "epoch": 43.71,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 227640
    },
    {
      "epoch": 43.71,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 227652
    },
    {
      "epoch": 43.71,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 227664
    },
    {
      "epoch": 43.72,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 227676
    },
    {
      "epoch": 43.72,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 227688
    },
    {
      "epoch": 43.72,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 227700
    },
    {
      "epoch": 43.72,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 227712
    },
    {
      "epoch": 43.73,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 227724
    },
    {
      "epoch": 43.73,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 227736
    },
    {
      "epoch": 43.73,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 227748
    },
    {
      "epoch": 43.73,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 227760
    },
    {
      "epoch": 43.74,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 227772
    },
    {
      "epoch": 43.74,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 227784
    },
    {
      "epoch": 43.74,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 227796
    },
    {
      "epoch": 43.74,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 227808
    },
    {
      "epoch": 43.74,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 227820
    },
    {
      "epoch": 43.75,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 227832
    },
    {
      "epoch": 43.75,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 227844
    },
    {
      "epoch": 43.75,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 227856
    },
    {
      "epoch": 43.75,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 227868
    },
    {
      "epoch": 43.76,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 227880
    },
    {
      "epoch": 43.76,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 227892
    },
    {
      "epoch": 43.76,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 227904
    },
    {
      "epoch": 43.76,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 227916
    },
    {
      "epoch": 43.76,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 227928
    },
    {
      "epoch": 43.77,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 227940
    },
    {
      "epoch": 43.77,
      "learning_rate": 0.001,
      "loss": 2.5086,
      "step": 227952
    },
    {
      "epoch": 43.77,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 227964
    },
    {
      "epoch": 43.77,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 227976
    },
    {
      "epoch": 43.78,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 227988
    },
    {
      "epoch": 43.78,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 228000
    },
    {
      "epoch": 43.78,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 228012
    },
    {
      "epoch": 43.78,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 228024
    },
    {
      "epoch": 43.79,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 228036
    },
    {
      "epoch": 43.79,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 228048
    },
    {
      "epoch": 43.79,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 228060
    },
    {
      "epoch": 43.79,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 228072
    },
    {
      "epoch": 43.79,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 228084
    },
    {
      "epoch": 43.8,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 228096
    },
    {
      "epoch": 43.8,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 228108
    },
    {
      "epoch": 43.8,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 228120
    },
    {
      "epoch": 43.8,
      "eval_ag_news_accuracy": 0.329375,
      "eval_ag_news_bleu_score": 5.218972618216884,
      "eval_ag_news_bleu_score_sem": 0.16391128781218012,
      "eval_ag_news_emb_cos_sim": 0.8281189203262329,
      "eval_ag_news_emb_cos_sim_sem": 0.0063615096540657005,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.458378791809082,
      "eval_ag_news_n_ngrams_match_1": 14.552,
      "eval_ag_news_n_ngrams_match_2": 3.332,
      "eval_ag_news_n_ngrams_match_3": 0.986,
      "eval_ag_news_num_pred_words": 46.716,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.765436361679612,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3652443826359567,
      "eval_ag_news_runtime": 14.1764,
      "eval_ag_news_samples_per_second": 35.27,
      "eval_ag_news_steps_per_second": 0.071,
      "eval_ag_news_token_set_f1": 0.36284572280796107,
      "eval_ag_news_token_set_f1_sem": 0.004418511882036516,
      "eval_ag_news_token_set_precision": 0.3495190759914665,
      "eval_ag_news_token_set_recall": 0.39328671755470007,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 228125
    },
    {
      "epoch": 43.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.1163125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2897430739701234,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1206368963141211,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6725733280181885,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009591677051166017,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1959078311920166,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.312,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.016,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.248,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.432344071333684,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21941529448734093,
      "eval_anthropic_toxic_prompts_runtime": 13.7867,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.267,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.073,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35693994853124955,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006663661397444088,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44442798369884906,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3271182805600298,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 228125
    },
    {
      "epoch": 43.8,
      "eval_arxiv_accuracy": 0.35428125,
      "eval_arxiv_bleu_score": 4.411160135841091,
      "eval_arxiv_bleu_score_sem": 0.12380017158173169,
      "eval_arxiv_emb_cos_sim": 0.7858262062072754,
      "eval_arxiv_emb_cos_sim_sem": 0.006228739837266796,
      "eval_arxiv_emb_top1_equal": 0.3125,
      "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.311851739883423,
      "eval_arxiv_n_ngrams_match_1": 15.628,
      "eval_arxiv_n_ngrams_match_2": 3.124,
      "eval_arxiv_n_ngrams_match_3": 0.702,
      "eval_arxiv_num_pred_words": 39.984,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.435882580930063,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37460315893574037,
      "eval_arxiv_runtime": 14.2604,
      "eval_arxiv_samples_per_second": 35.062,
      "eval_arxiv_steps_per_second": 0.07,
      "eval_arxiv_token_set_f1": 0.3687741371131204,
      "eval_arxiv_token_set_f1_sem": 0.0041142125403771344,
      "eval_arxiv_token_set_precision": 0.31997554079365725,
      "eval_arxiv_token_set_recall": 0.45575428448005867,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 228125
    },
    {
      "epoch": 43.8,
      "eval_python_code_alpaca_accuracy": 0.16203125,
      "eval_python_code_alpaca_bleu_score": 4.534021089959997,
      "eval_python_code_alpaca_bleu_score_sem": 0.14310167297927237,
      "eval_python_code_alpaca_emb_cos_sim": 0.762942373752594,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007049628078812907,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.852388858795166,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.664,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.772,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.892,
      "eval_python_code_alpaca_num_pred_words": 42.334,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.329129277197286,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34003918799971394,
      "eval_python_code_alpaca_runtime": 15.8406,
      "eval_python_code_alpaca_samples_per_second": 31.564,
      "eval_python_code_alpaca_steps_per_second": 0.063,
      "eval_python_code_alpaca_token_set_f1": 0.4756593455806314,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005354015323394554,
      "eval_python_code_alpaca_token_set_precision": 0.5266720470534918,
      "eval_python_code_alpaca_token_set_recall": 0.4557619544848569,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 228125
    },
    {
      "epoch": 43.8,
      "eval_wikibio_accuracy": 0.334125,
      "eval_wikibio_bleu_score": 6.091998676287523,
      "eval_wikibio_bleu_score_sem": 0.21867120931586123,
      "eval_wikibio_emb_cos_sim": 0.7392401099205017,
      "eval_wikibio_emb_cos_sim_sem": 0.00929029613272125,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.5713040828704834,
      "eval_wikibio_n_ngrams_match_1": 10.034,
      "eval_wikibio_n_ngrams_match_2": 3.384,
      "eval_wikibio_n_ngrams_match_3": 1.256,
      "eval_wikibio_num_pred_words": 35.45,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 35.56293994583742,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35867961807613336,
      "eval_wikibio_runtime": 14.1776,
      "eval_wikibio_samples_per_second": 35.267,
      "eval_wikibio_steps_per_second": 0.071,
      "eval_wikibio_token_set_f1": 0.32150157306675037,
      "eval_wikibio_token_set_f1_sem": 0.005495988730047056,
      "eval_wikibio_token_set_precision": 0.3296388393259386,
      "eval_wikibio_token_set_recall": 0.33309311224423366,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 228125
    },
    {
      "epoch": 43.8,
      "eval_nq_accuracy": 0.53803125,
      "eval_nq_bleu_score": 12.22990612075819,
      "eval_nq_bleu_score_sem": 0.486046002316013,
      "eval_nq_emb_cos_sim": 0.8454262018203735,
      "eval_nq_emb_cos_sim_sem": 0.006555047323883218,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1076643466949463,
      "eval_nq_n_ngrams_match_1": 23.516,
      "eval_nq_n_ngrams_match_2": 8.832,
      "eval_nq_n_ngrams_match_3": 4.096,
      "eval_nq_num_pred_words": 49.03,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.228998733321404,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45847912262809987,
      "eval_nq_runtime": 15.165,
      "eval_nq_samples_per_second": 32.971,
      "eval_nq_steps_per_second": 0.066,
      "eval_nq_token_set_f1": 0.47190121862554396,
      "eval_nq_token_set_f1_sem": 0.004918196606022275,
      "eval_nq_token_set_precision": 0.4297765646887952,
      "eval_nq_token_set_recall": 0.5326213888032779,
      "eval_nq_true_num_tokens": 64.0,
      "step": 228125
    },
    {
      "epoch": 43.8,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 228132
    },
    {
      "epoch": 43.81,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 228144
    },
    {
      "epoch": 43.81,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 228156
    },
    {
      "epoch": 43.81,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 228168
    },
    {
      "epoch": 43.81,
      "learning_rate": 0.001,
      "loss": 2.4798,
      "step": 228180
    },
    {
      "epoch": 43.82,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 228192
    },
    {
      "epoch": 43.82,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 228204
    },
    {
      "epoch": 43.82,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 228216
    },
    {
      "epoch": 43.82,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 228228
    },
    {
      "epoch": 43.82,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 228240
    },
    {
      "epoch": 43.83,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 228252
    },
    {
      "epoch": 43.83,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 228264
    },
    {
      "epoch": 43.83,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 228276
    },
    {
      "epoch": 43.83,
      "learning_rate": 0.001,
      "loss": 2.4981,
      "step": 228288
    },
    {
      "epoch": 43.84,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 228300
    },
    {
      "epoch": 43.84,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 228312
    },
    {
      "epoch": 43.84,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 228324
    },
    {
      "epoch": 43.84,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 228336
    },
    {
      "epoch": 43.85,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 228348
    },
    {
      "epoch": 43.85,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 228360
    },
    {
      "epoch": 43.85,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 228372
    },
    {
      "epoch": 43.85,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 228384
    },
    {
      "epoch": 43.85,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 228396
    },
    {
      "epoch": 43.86,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 228408
    },
    {
      "epoch": 43.86,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 228420
    },
    {
      "epoch": 43.86,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 228432
    },
    {
      "epoch": 43.86,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 228444
    },
    {
      "epoch": 43.87,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 228456
    },
    {
      "epoch": 43.87,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 228468
    },
    {
      "epoch": 43.87,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 228480
    },
    {
      "epoch": 43.87,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 228492
    },
    {
      "epoch": 43.88,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 228504
    },
    {
      "epoch": 43.88,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 228516
    },
    {
      "epoch": 43.88,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 228528
    },
    {
      "epoch": 43.88,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 228540
    },
    {
      "epoch": 43.88,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 228552
    },
    {
      "epoch": 43.89,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 228564
    },
    {
      "epoch": 43.89,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 228576
    },
    {
      "epoch": 43.89,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 228588
    },
    {
      "epoch": 43.89,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 228600
    },
    {
      "epoch": 43.9,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 228612
    },
    {
      "epoch": 43.9,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 228624
    },
    {
      "epoch": 43.9,
      "learning_rate": 0.001,
      "loss": 2.5114,
      "step": 228636
    },
    {
      "epoch": 43.9,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 228648
    },
    {
      "epoch": 43.91,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 228660
    },
    {
      "epoch": 43.91,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 228672
    },
    {
      "epoch": 43.91,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 228684
    },
    {
      "epoch": 43.91,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 228696
    },
    {
      "epoch": 43.91,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 228708
    },
    {
      "epoch": 43.92,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 228720
    },
    {
      "epoch": 43.92,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 228732
    },
    {
      "epoch": 43.92,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 228744
    },
    {
      "epoch": 43.92,
      "eval_ag_news_accuracy": 0.32903125,
      "eval_ag_news_bleu_score": 5.122464930551099,
      "eval_ag_news_bleu_score_sem": 0.15941103150767458,
      "eval_ag_news_emb_cos_sim": 0.826166570186615,
      "eval_ag_news_emb_cos_sim_sem": 0.006659244870114276,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.45980167388916,
      "eval_ag_news_n_ngrams_match_1": 14.496,
      "eval_ag_news_n_ngrams_match_2": 3.378,
      "eval_ag_news_n_ngrams_match_3": 0.972,
      "eval_ag_news_num_pred_words": 46.59,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.810667003147977,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3639347275150985,
      "eval_ag_news_runtime": 14.209,
      "eval_ag_news_samples_per_second": 35.189,
      "eval_ag_news_steps_per_second": 0.07,
      "eval_ag_news_token_set_f1": 0.3605539755511987,
      "eval_ag_news_token_set_f1_sem": 0.0043332377796799285,
      "eval_ag_news_token_set_precision": 0.34775428086718624,
      "eval_ag_news_token_set_recall": 0.3895662581363894,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 228750
    },
    {
      "epoch": 43.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.11646875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.279443862478997,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12529580128001774,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6832237243652344,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009008737403966762,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.197946071624756,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.378,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.016,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.774,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.818,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.482193848544117,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2201173881010906,
      "eval_anthropic_toxic_prompts_runtime": 13.5463,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.91,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.074,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3684695633841047,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067072920874347375,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44838591841292746,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3385878893256619,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 228750
    },
    {
      "epoch": 43.92,
      "eval_arxiv_accuracy": 0.35428125,
      "eval_arxiv_bleu_score": 4.603703466028948,
      "eval_arxiv_bleu_score_sem": 0.13452632685735333,
      "eval_arxiv_emb_cos_sim": 0.7816855907440186,
      "eval_arxiv_emb_cos_sim_sem": 0.007311415984348738,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3170077800750732,
      "eval_arxiv_n_ngrams_match_1": 15.546,
      "eval_arxiv_n_ngrams_match_2": 3.206,
      "eval_arxiv_n_ngrams_match_3": 0.758,
      "eval_arxiv_num_pred_words": 39.946,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.57770840984758,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3739435874807563,
      "eval_arxiv_runtime": 15.0488,
      "eval_arxiv_samples_per_second": 33.225,
      "eval_arxiv_steps_per_second": 0.066,
      "eval_arxiv_token_set_f1": 0.3662341742629446,
      "eval_arxiv_token_set_f1_sem": 0.00450767857689167,
      "eval_arxiv_token_set_precision": 0.31821886913500225,
      "eval_arxiv_token_set_recall": 0.4488378199357054,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 228750
    },
    {
      "epoch": 43.92,
      "eval_python_code_alpaca_accuracy": 0.1638125,
      "eval_python_code_alpaca_bleu_score": 4.74719281991187,
      "eval_python_code_alpaca_bleu_score_sem": 0.15097743269272845,
      "eval_python_code_alpaca_emb_cos_sim": 0.7565107345581055,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008246849481979839,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.812595844268799,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.754,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.99,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.984,
      "eval_python_code_alpaca_num_pred_words": 42.192,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.65309099045313,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3398873282204268,
      "eval_python_code_alpaca_runtime": 13.4391,
      "eval_python_code_alpaca_samples_per_second": 37.205,
      "eval_python_code_alpaca_steps_per_second": 0.074,
      "eval_python_code_alpaca_token_set_f1": 0.47850209079121614,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005644283108384785,
      "eval_python_code_alpaca_token_set_precision": 0.5340946357203107,
      "eval_python_code_alpaca_token_set_recall": 0.459099245729098,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 228750
    },
    {
      "epoch": 43.92,
      "eval_wikibio_accuracy": 0.3329375,
      "eval_wikibio_bleu_score": 6.23548243281245,
      "eval_wikibio_bleu_score_sem": 0.22414342021633893,
      "eval_wikibio_emb_cos_sim": 0.7375278472900391,
      "eval_wikibio_emb_cos_sim_sem": 0.009473264821730153,
      "eval_wikibio_emb_top1_equal": 0.15625,
      "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.595562219619751,
      "eval_wikibio_n_ngrams_match_1": 10.052,
      "eval_wikibio_n_ngrams_match_2": 3.432,
      "eval_wikibio_n_ngrams_match_3": 1.314,
      "eval_wikibio_num_pred_words": 35.474,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.43617936529365,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35709053868168017,
      "eval_wikibio_runtime": 13.9262,
      "eval_wikibio_samples_per_second": 35.904,
      "eval_wikibio_steps_per_second": 0.072,
      "eval_wikibio_token_set_f1": 0.3209013043262027,
      "eval_wikibio_token_set_f1_sem": 0.005646028256609949,
      "eval_wikibio_token_set_precision": 0.32596328260921825,
      "eval_wikibio_token_set_recall": 0.3337972872543651,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 228750
    },
    {
      "epoch": 43.92,
      "eval_nq_accuracy": 0.53684375,
      "eval_nq_bleu_score": 12.445183379049432,
      "eval_nq_bleu_score_sem": 0.5044772499170779,
      "eval_nq_emb_cos_sim": 0.8396387100219727,
      "eval_nq_emb_cos_sim_sem": 0.0067118592835795545,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1118662357330322,
      "eval_nq_n_ngrams_match_1": 23.416,
      "eval_nq_n_ngrams_match_2": 8.888,
      "eval_nq_n_ngrams_match_3": 4.206,
      "eval_nq_num_pred_words": 48.918,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.263648819821245,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45463262098736623,
      "eval_nq_runtime": 14.6789,
      "eval_nq_samples_per_second": 34.062,
      "eval_nq_steps_per_second": 0.068,
      "eval_nq_token_set_f1": 0.4687988198064519,
      "eval_nq_token_set_f1_sem": 0.004966175925347379,
      "eval_nq_token_set_precision": 0.4260689416833156,
      "eval_nq_token_set_recall": 0.5291435665839637,
      "eval_nq_true_num_tokens": 64.0,
      "step": 228750
    },
    {
      "epoch": 43.92,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 228756
    },
    {
      "epoch": 43.93,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 228768
    },
    {
      "epoch": 43.93,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 228780
    },
    {
      "epoch": 43.93,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 228792
    },
    {
      "epoch": 43.93,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 228804
    },
    {
      "epoch": 43.94,
      "learning_rate": 0.001,
      "loss": 2.4994,
      "step": 228816
    },
    {
      "epoch": 43.94,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 228828
    },
    {
      "epoch": 43.94,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 228840
    },
    {
      "epoch": 43.94,
      "learning_rate": 0.001,
      "loss": 2.5085,
      "step": 228852
    },
    {
      "epoch": 43.94,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 228864
    },
    {
      "epoch": 43.95,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 228876
    },
    {
      "epoch": 43.95,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 228888
    },
    {
      "epoch": 43.95,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 228900
    },
    {
      "epoch": 43.95,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 228912
    },
    {
      "epoch": 43.96,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 228924
    },
    {
      "epoch": 43.96,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 228936
    },
    {
      "epoch": 43.96,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 228948
    },
    {
      "epoch": 43.96,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 228960
    },
    {
      "epoch": 43.97,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 228972
    },
    {
      "epoch": 43.97,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 228984
    },
    {
      "epoch": 43.97,
      "learning_rate": 0.001,
      "loss": 2.5008,
      "step": 228996
    },
    {
      "epoch": 43.97,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 229008
    },
    {
      "epoch": 43.97,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 229020
    },
    {
      "epoch": 43.98,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 229032
    },
    {
      "epoch": 43.98,
      "learning_rate": 0.001,
      "loss": 2.4961,
      "step": 229044
    },
    {
      "epoch": 43.98,
      "learning_rate": 0.001,
      "loss": 2.506,
      "step": 229056
    },
    {
      "epoch": 43.98,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 229068
    },
    {
      "epoch": 43.99,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 229080
    },
    {
      "epoch": 43.99,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 229092
    },
    {
      "epoch": 43.99,
      "learning_rate": 0.001,
      "loss": 2.5012,
      "step": 229104
    },
    {
      "epoch": 43.99,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 229116
    },
    {
      "epoch": 44.0,
      "learning_rate": 0.001,
      "loss": 2.5057,
      "step": 229128
    },
    {
      "epoch": 44.0,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 229140
    },
    {
      "epoch": 44.0,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 229152
    },
    {
      "epoch": 44.0,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 229164
    },
    {
      "epoch": 44.0,
      "learning_rate": 0.001,
      "loss": 2.4797,
      "step": 229176
    },
    {
      "epoch": 44.01,
      "learning_rate": 0.001,
      "loss": 2.4715,
      "step": 229188
    },
    {
      "epoch": 44.01,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 229200
    },
    {
      "epoch": 44.01,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 229212
    },
    {
      "epoch": 44.01,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 229224
    },
    {
      "epoch": 44.02,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 229236
    },
    {
      "epoch": 44.02,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 229248
    },
    {
      "epoch": 44.02,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 229260
    },
    {
      "epoch": 44.02,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 229272
    },
    {
      "epoch": 44.03,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 229284
    },
    {
      "epoch": 44.03,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 229296
    },
    {
      "epoch": 44.03,
      "learning_rate": 0.001,
      "loss": 2.4606,
      "step": 229308
    },
    {
      "epoch": 44.03,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 229320
    },
    {
      "epoch": 44.03,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 229332
    },
    {
      "epoch": 44.04,
      "learning_rate": 0.001,
      "loss": 2.4746,
      "step": 229344
    },
    {
      "epoch": 44.04,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 229356
    },
    {
      "epoch": 44.04,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 229368
    },
    {
      "epoch": 44.04,
      "eval_ag_news_accuracy": 0.33,
      "eval_ag_news_bleu_score": 5.103983653986498,
      "eval_ag_news_bleu_score_sem": 0.1590122618243574,
      "eval_ag_news_emb_cos_sim": 0.8271461725234985,
      "eval_ag_news_emb_cos_sim_sem": 0.006241688139382752,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4644253253936768,
      "eval_ag_news_n_ngrams_match_1": 14.578,
      "eval_ag_news_n_ngrams_match_2": 3.364,
      "eval_ag_news_n_ngrams_match_3": 0.996,
      "eval_ag_news_num_pred_words": 46.808,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.958088992814854,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36493808296155045,
      "eval_ag_news_runtime": 18.7889,
      "eval_ag_news_samples_per_second": 26.611,
      "eval_ag_news_steps_per_second": 0.053,
      "eval_ag_news_token_set_f1": 0.36322957537705597,
      "eval_ag_news_token_set_f1_sem": 0.004421643630767436,
      "eval_ag_news_token_set_precision": 0.3502102824614946,
      "eval_ag_news_token_set_recall": 0.3909909421278378,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 229375
    },
    {
      "epoch": 44.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.1175625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3095375669636113,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12708507076971015,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6871490478515625,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009180323809699692,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1824052333831787,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.398,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.054,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.796,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.10466121927512,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2212104118596558,
      "eval_anthropic_toxic_prompts_runtime": 18.136,
      "eval_anthropic_toxic_prompts_samples_per_second": 27.57,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.055,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3600745721199604,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006589481405737642,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.452521602022359,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3250802004853819,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 229375
    },
    {
      "epoch": 44.04,
      "eval_arxiv_accuracy": 0.35415625,
      "eval_arxiv_bleu_score": 4.567469995224752,
      "eval_arxiv_bleu_score_sem": 0.13634272388419386,
      "eval_arxiv_emb_cos_sim": 0.7857984304428101,
      "eval_arxiv_emb_cos_sim_sem": 0.006438691728945783,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3286006450653076,
      "eval_arxiv_n_ngrams_match_1": 15.826,
      "eval_arxiv_n_ngrams_match_2": 3.078,
      "eval_arxiv_n_ngrams_match_3": 0.726,
      "eval_arxiv_num_pred_words": 40.838,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.899273388501747,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37881624225660093,
      "eval_arxiv_runtime": 18.9059,
      "eval_arxiv_samples_per_second": 26.447,
      "eval_arxiv_steps_per_second": 0.053,
      "eval_arxiv_token_set_f1": 0.3689119879339914,
      "eval_arxiv_token_set_f1_sem": 0.0042171397295453685,
      "eval_arxiv_token_set_precision": 0.32291774537253054,
      "eval_arxiv_token_set_recall": 0.44560393164832557,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 229375
    },
    {
      "epoch": 44.04,
      "eval_python_code_alpaca_accuracy": 0.16428125,
      "eval_python_code_alpaca_bleu_score": 4.751828922133397,
      "eval_python_code_alpaca_bleu_score_sem": 0.15112619433172716,
      "eval_python_code_alpaca_emb_cos_sim": 0.7750812768936157,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006498989381185697,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8149120807647705,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.072,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.962,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.964,
      "eval_python_code_alpaca_num_pred_words": 42.562,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.691708193598654,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3500105254019824,
      "eval_python_code_alpaca_runtime": 14.1792,
      "eval_python_code_alpaca_samples_per_second": 35.263,
      "eval_python_code_alpaca_steps_per_second": 0.071,
      "eval_python_code_alpaca_token_set_f1": 0.4865076752927683,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005367249972759482,
      "eval_python_code_alpaca_token_set_precision": 0.5536583859166705,
      "eval_python_code_alpaca_token_set_recall": 0.4552995905336038,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 229375
    },
    {
      "epoch": 44.04,
      "eval_wikibio_accuracy": 0.33190625,
      "eval_wikibio_bleu_score": 6.200101019233036,
      "eval_wikibio_bleu_score_sem": 0.21423849953372273,
      "eval_wikibio_emb_cos_sim": 0.7459069490432739,
      "eval_wikibio_emb_cos_sim_sem": 0.008310609761242727,
      "eval_wikibio_emb_top1_equal": 0.1484375,
      "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.622779607772827,
      "eval_wikibio_n_ngrams_match_1": 10.402,
      "eval_wikibio_n_ngrams_match_2": 3.514,
      "eval_wikibio_n_ngrams_match_3": 1.304,
      "eval_wikibio_num_pred_words": 37.074,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.44149598864582,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3701234283837513,
      "eval_wikibio_runtime": 20.7974,
      "eval_wikibio_samples_per_second": 24.041,
      "eval_wikibio_steps_per_second": 0.048,
      "eval_wikibio_token_set_f1": 0.3283242409291427,
      "eval_wikibio_token_set_f1_sem": 0.004969229621705694,
      "eval_wikibio_token_set_precision": 0.3386983843491412,
      "eval_wikibio_token_set_recall": 0.3341984644878862,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 229375
    },
    {
      "epoch": 44.04,
      "eval_nq_accuracy": 0.53759375,
      "eval_nq_bleu_score": 12.1953972848761,
      "eval_nq_bleu_score_sem": 0.49263520022599105,
      "eval_nq_emb_cos_sim": 0.8387718796730042,
      "eval_nq_emb_cos_sim_sem": 0.007476274040843608,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1147685050964355,
      "eval_nq_n_ngrams_match_1": 23.472,
      "eval_nq_n_ngrams_match_2": 8.808,
      "eval_nq_n_ngrams_match_3": 4.108,
      "eval_nq_num_pred_words": 48.778,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.287666991363565,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4580419596826657,
      "eval_nq_runtime": 14.01,
      "eval_nq_samples_per_second": 35.689,
      "eval_nq_steps_per_second": 0.071,
      "eval_nq_token_set_f1": 0.47118302928665534,
      "eval_nq_token_set_f1_sem": 0.005037996171470476,
      "eval_nq_token_set_precision": 0.42921038369893383,
      "eval_nq_token_set_recall": 0.5316751312424766,
      "eval_nq_true_num_tokens": 64.0,
      "step": 229375
    },
    {
      "epoch": 44.04,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 229380
    },
    {
      "epoch": 44.05,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 229392
    },
    {
      "epoch": 44.05,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 229404
    },
    {
      "epoch": 44.05,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 229416
    },
    {
      "epoch": 44.05,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 229428
    },
    {
      "epoch": 44.06,
      "learning_rate": 0.001,
      "loss": 2.4797,
      "step": 229440
    },
    {
      "epoch": 44.06,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 229452
    },
    {
      "epoch": 44.06,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 229464
    },
    {
      "epoch": 44.06,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 229476
    },
    {
      "epoch": 44.06,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 229488
    },
    {
      "epoch": 44.07,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 229500
    },
    {
      "epoch": 44.07,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 229512
    },
    {
      "epoch": 44.07,
      "learning_rate": 0.001,
      "loss": 2.4769,
      "step": 229524
    },
    {
      "epoch": 44.07,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 229536
    },
    {
      "epoch": 44.08,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 229548
    },
    {
      "epoch": 44.08,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 229560
    },
    {
      "epoch": 44.08,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 229572
    },
    {
      "epoch": 44.08,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 229584
    },
    {
      "epoch": 44.09,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 229596
    },
    {
      "epoch": 44.09,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 229608
    },
    {
      "epoch": 44.09,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 229620
    },
    {
      "epoch": 44.09,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 229632
    },
    {
      "epoch": 44.09,
      "learning_rate": 0.001,
      "loss": 2.48,
      "step": 229644
    },
    {
      "epoch": 44.1,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 229656
    },
    {
      "epoch": 44.1,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 229668
    },
    {
      "epoch": 44.1,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 229680
    },
    {
      "epoch": 44.1,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 229692
    },
    {
      "epoch": 44.11,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 229704
    },
    {
      "epoch": 44.11,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 229716
    },
    {
      "epoch": 44.11,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 229728
    },
    {
      "epoch": 44.11,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 229740
    },
    {
      "epoch": 44.12,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 229752
    },
    {
      "epoch": 44.12,
      "learning_rate": 0.001,
      "loss": 2.4959,
      "step": 229764
    },
    {
      "epoch": 44.12,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 229776
    },
    {
      "epoch": 44.12,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 229788
    },
    {
      "epoch": 44.12,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 229800
    },
    {
      "epoch": 44.13,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 229812
    },
    {
      "epoch": 44.13,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 229824
    },
    {
      "epoch": 44.13,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 229836
    },
    {
      "epoch": 44.13,
      "learning_rate": 0.001,
      "loss": 2.4797,
      "step": 229848
    },
    {
      "epoch": 44.14,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 229860
    },
    {
      "epoch": 44.14,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 229872
    },
    {
      "epoch": 44.14,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 229884
    },
    {
      "epoch": 44.14,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 229896
    },
    {
      "epoch": 44.15,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 229908
    },
    {
      "epoch": 44.15,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 229920
    },
    {
      "epoch": 44.15,
      "learning_rate": 0.001,
      "loss": 2.4746,
      "step": 229932
    },
    {
      "epoch": 44.15,
      "learning_rate": 0.001,
      "loss": 2.4702,
      "step": 229944
    },
    {
      "epoch": 44.15,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 229956
    },
    {
      "epoch": 44.16,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 229968
    },
    {
      "epoch": 44.16,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 229980
    },
    {
      "epoch": 44.16,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 229992
    },
    {
      "epoch": 44.16,
      "eval_ag_news_accuracy": 0.3304375,
      "eval_ag_news_bleu_score": 5.227999143175835,
      "eval_ag_news_bleu_score_sem": 0.16745849992881312,
      "eval_ag_news_emb_cos_sim": 0.8245687484741211,
      "eval_ag_news_emb_cos_sim_sem": 0.006772942579417492,
      "eval_ag_news_emb_top1_equal": 0.3125,
      "eval_ag_news_emb_top1_equal_sem": 0.041130074229814934,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.467036247253418,
      "eval_ag_news_n_ngrams_match_1": 14.582,
      "eval_ag_news_n_ngrams_match_2": 3.316,
      "eval_ag_news_n_ngrams_match_3": 0.958,
      "eval_ag_news_num_pred_words": 46.686,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.04163808857976,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.363437163091331,
      "eval_ag_news_runtime": 22.7358,
      "eval_ag_news_samples_per_second": 21.992,
      "eval_ag_news_steps_per_second": 0.044,
      "eval_ag_news_token_set_f1": 0.36366623360661227,
      "eval_ag_news_token_set_f1_sem": 0.004404099270188432,
      "eval_ag_news_token_set_precision": 0.3501714071343526,
      "eval_ag_news_token_set_recall": 0.3909533064624547,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 230000
    },
    {
      "epoch": 44.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.11678125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2214823884686625,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11874571914592888,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6864318251609802,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008864945527737752,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2236599922180176,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.4,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.026,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.558,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.11989074376225,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21994995218263774,
      "eval_anthropic_toxic_prompts_runtime": 13.4739,
      "eval_anthropic_toxic_prompts_samples_per_second": 37.109,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.074,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36254226339893136,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065839280307947775,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4506151153465686,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32877529479373396,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 230000
    },
    {
      "epoch": 44.16,
      "eval_arxiv_accuracy": 0.3510625,
      "eval_arxiv_bleu_score": 4.666007287703487,
      "eval_arxiv_bleu_score_sem": 0.14038904396579627,
      "eval_arxiv_emb_cos_sim": 0.7872946858406067,
      "eval_arxiv_emb_cos_sim_sem": 0.006174981531991151,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.332165002822876,
      "eval_arxiv_n_ngrams_match_1": 15.68,
      "eval_arxiv_n_ngrams_match_2": 3.21,
      "eval_arxiv_n_ngrams_match_3": 0.784,
      "eval_arxiv_num_pred_words": 40.328,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.99889381598616,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37623712352710803,
      "eval_arxiv_runtime": 13.8737,
      "eval_arxiv_samples_per_second": 36.039,
      "eval_arxiv_steps_per_second": 0.072,
      "eval_arxiv_token_set_f1": 0.3670198305060281,
      "eval_arxiv_token_set_f1_sem": 0.0043571585470539385,
      "eval_arxiv_token_set_precision": 0.3207681752154741,
      "eval_arxiv_token_set_recall": 0.44425783673944136,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 230000
    },
    {
      "epoch": 44.16,
      "eval_python_code_alpaca_accuracy": 0.16125,
      "eval_python_code_alpaca_bleu_score": 4.6031537781665355,
      "eval_python_code_alpaca_bleu_score_sem": 0.14470936890987762,
      "eval_python_code_alpaca_emb_cos_sim": 0.7584249377250671,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008684214572966975,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.825847625732422,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.668,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.826,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.932,
      "eval_python_code_alpaca_num_pred_words": 41.94,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.875242815680284,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3410002505428078,
      "eval_python_code_alpaca_runtime": 22.5054,
      "eval_python_code_alpaca_samples_per_second": 22.217,
      "eval_python_code_alpaca_steps_per_second": 0.044,
      "eval_python_code_alpaca_token_set_f1": 0.47470388797143237,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005552881437807432,
      "eval_python_code_alpaca_token_set_precision": 0.5300151500823808,
      "eval_python_code_alpaca_token_set_recall": 0.4538208418854002,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 230000
    },
    {
      "epoch": 44.16,
      "eval_wikibio_accuracy": 0.3335,
      "eval_wikibio_bleu_score": 6.479867851690654,
      "eval_wikibio_bleu_score_sem": 0.22298200379478933,
      "eval_wikibio_emb_cos_sim": 0.7464686036109924,
      "eval_wikibio_emb_cos_sim_sem": 0.008253369731153376,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6139187812805176,
      "eval_wikibio_n_ngrams_match_1": 10.244,
      "eval_wikibio_n_ngrams_match_2": 3.55,
      "eval_wikibio_n_ngrams_match_3": 1.356,
      "eval_wikibio_num_pred_words": 35.666,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.1111989027452,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3695829595970824,
      "eval_wikibio_runtime": 13.9807,
      "eval_wikibio_samples_per_second": 35.764,
      "eval_wikibio_steps_per_second": 0.072,
      "eval_wikibio_token_set_f1": 0.32897327881917576,
      "eval_wikibio_token_set_f1_sem": 0.005215906713604864,
      "eval_wikibio_token_set_precision": 0.33438402705040965,
      "eval_wikibio_token_set_recall": 0.3402201625076424,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 230000
    },
    {
      "epoch": 44.16,
      "eval_nq_accuracy": 0.53596875,
      "eval_nq_bleu_score": 12.269807454845486,
      "eval_nq_bleu_score_sem": 0.5018900023960086,
      "eval_nq_emb_cos_sim": 0.8384106755256653,
      "eval_nq_emb_cos_sim_sem": 0.006962078537743653,
      "eval_nq_emb_top1_equal": 0.265625,
      "eval_nq_emb_top1_equal_sem": 0.03919146934646163,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1142802238464355,
      "eval_nq_n_ngrams_match_1": 23.356,
      "eval_nq_n_ngrams_match_2": 8.762,
      "eval_nq_n_ngrams_match_3": 4.11,
      "eval_nq_num_pred_words": 48.806,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.28362126677155,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4551664252869843,
      "eval_nq_runtime": 14.513,
      "eval_nq_samples_per_second": 34.452,
      "eval_nq_steps_per_second": 0.069,
      "eval_nq_token_set_f1": 0.4669954403632132,
      "eval_nq_token_set_f1_sem": 0.004963082345214547,
      "eval_nq_token_set_precision": 0.424774802542662,
      "eval_nq_token_set_recall": 0.52720961576976,
      "eval_nq_true_num_tokens": 64.0,
      "step": 230000
    },
    {
      "epoch": 44.16,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 230004
    },
    {
      "epoch": 44.17,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 230016
    },
    {
      "epoch": 44.17,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 230028
    },
    {
      "epoch": 44.17,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 230040
    },
    {
      "epoch": 44.17,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 230052
    },
    {
      "epoch": 44.18,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 230064
    },
    {
      "epoch": 44.18,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 230076
    },
    {
      "epoch": 44.18,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 230088
    },
    {
      "epoch": 44.18,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 230100
    },
    {
      "epoch": 44.18,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 230112
    },
    {
      "epoch": 44.19,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 230124
    },
    {
      "epoch": 44.19,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 230136
    },
    {
      "epoch": 44.19,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 230148
    },
    {
      "epoch": 44.19,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 230160
    },
    {
      "epoch": 44.2,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 230172
    },
    {
      "epoch": 44.2,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 230184
    },
    {
      "epoch": 44.2,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 230196
    },
    {
      "epoch": 44.2,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 230208
    },
    {
      "epoch": 44.21,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 230220
    },
    {
      "epoch": 44.21,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 230232
    },
    {
      "epoch": 44.21,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 230244
    },
    {
      "epoch": 44.21,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 230256
    },
    {
      "epoch": 44.21,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 230268
    },
    {
      "epoch": 44.22,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 230280
    },
    {
      "epoch": 44.22,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 230292
    },
    {
      "epoch": 44.22,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 230304
    },
    {
      "epoch": 44.22,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 230316
    },
    {
      "epoch": 44.23,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 230328
    },
    {
      "epoch": 44.23,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 230340
    },
    {
      "epoch": 44.23,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 230352
    },
    {
      "epoch": 44.23,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 230364
    },
    {
      "epoch": 44.24,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 230376
    },
    {
      "epoch": 44.24,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 230388
    },
    {
      "epoch": 44.24,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 230400
    },
    {
      "epoch": 44.24,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 230412
    },
    {
      "epoch": 44.24,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 230424
    },
    {
      "epoch": 44.25,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 230436
    },
    {
      "epoch": 44.25,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 230448
    },
    {
      "epoch": 44.25,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 230460
    },
    {
      "epoch": 44.25,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 230472
    },
    {
      "epoch": 44.26,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 230484
    },
    {
      "epoch": 44.26,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 230496
    },
    {
      "epoch": 44.26,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 230508
    },
    {
      "epoch": 44.26,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 230520
    },
    {
      "epoch": 44.26,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 230532
    },
    {
      "epoch": 44.27,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 230544
    },
    {
      "epoch": 44.27,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 230556
    },
    {
      "epoch": 44.27,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 230568
    },
    {
      "epoch": 44.27,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 230580
    },
    {
      "epoch": 44.28,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 230592
    },
    {
      "epoch": 44.28,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 230604
    },
    {
      "epoch": 44.28,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 230616
    },
    {
      "epoch": 44.28,
      "eval_ag_news_accuracy": 0.32865625,
      "eval_ag_news_bleu_score": 5.151042038723306,
      "eval_ag_news_bleu_score_sem": 0.1679491783122144,
      "eval_ag_news_emb_cos_sim": 0.8254836797714233,
      "eval_ag_news_emb_cos_sim_sem": 0.007335676764022821,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4625508785247803,
      "eval_ag_news_n_ngrams_match_1": 14.63,
      "eval_ag_news_n_ngrams_match_2": 3.39,
      "eval_ag_news_n_ngrams_match_3": 0.978,
      "eval_ag_news_num_pred_words": 46.71,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.898241361092445,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36539840279800617,
      "eval_ag_news_runtime": 14.1975,
      "eval_ag_news_samples_per_second": 35.218,
      "eval_ag_news_steps_per_second": 0.07,
      "eval_ag_news_token_set_f1": 0.3653697064151061,
      "eval_ag_news_token_set_f1_sem": 0.0045366294122285485,
      "eval_ag_news_token_set_precision": 0.3517690523920131,
      "eval_ag_news_token_set_recall": 0.3963370629402755,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 230625
    },
    {
      "epoch": 44.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.11646875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2309547844401014,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11684793739141351,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6881632804870605,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009371160129945464,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.176680088043213,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.472,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.036,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.78,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.614,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.967052820335084,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21984921097908108,
      "eval_anthropic_toxic_prompts_runtime": 13.6023,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.758,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.074,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3666556033599894,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006796563987067125,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45299614163070895,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3333300874950141,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 230625
    },
    {
      "epoch": 44.28,
      "eval_arxiv_accuracy": 0.35125,
      "eval_arxiv_bleu_score": 4.502675640221707,
      "eval_arxiv_bleu_score_sem": 0.1318155761835477,
      "eval_arxiv_emb_cos_sim": 0.7878255844116211,
      "eval_arxiv_emb_cos_sim_sem": 0.006188094470234103,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.337618589401245,
      "eval_arxiv_n_ngrams_match_1": 15.674,
      "eval_arxiv_n_ngrams_match_2": 3.144,
      "eval_arxiv_n_ngrams_match_3": 0.684,
      "eval_arxiv_num_pred_words": 40.596,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.152005331481472,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3734648833536512,
      "eval_arxiv_runtime": 15.0698,
      "eval_arxiv_samples_per_second": 33.179,
      "eval_arxiv_steps_per_second": 0.066,
      "eval_arxiv_token_set_f1": 0.36629161983045455,
      "eval_arxiv_token_set_f1_sem": 0.0041715945315500625,
      "eval_arxiv_token_set_precision": 0.3203715424232131,
      "eval_arxiv_token_set_recall": 0.4439270992429467,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 230625
    },
    {
      "epoch": 44.28,
      "eval_python_code_alpaca_accuracy": 0.16471875,
      "eval_python_code_alpaca_bleu_score": 4.987518028366514,
      "eval_python_code_alpaca_bleu_score_sem": 0.15173951075981082,
      "eval_python_code_alpaca_emb_cos_sim": 0.776828408241272,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007575471258625553,
      "eval_python_code_alpaca_emb_top1_equal": 0.1953125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8173060417175293,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.264,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.202,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.096,
      "eval_python_code_alpaca_num_pred_words": 43.38,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.73171535993908,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.35011357808652255,
      "eval_python_code_alpaca_runtime": 13.252,
      "eval_python_code_alpaca_samples_per_second": 37.73,
      "eval_python_code_alpaca_steps_per_second": 0.075,
      "eval_python_code_alpaca_token_set_f1": 0.4869528704480582,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0053927974542409265,
      "eval_python_code_alpaca_token_set_precision": 0.5611929984632298,
      "eval_python_code_alpaca_token_set_recall": 0.448868216022795,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 230625
    },
    {
      "epoch": 44.28,
      "eval_wikibio_accuracy": 0.3320625,
      "eval_wikibio_bleu_score": 6.361335782866247,
      "eval_wikibio_bleu_score_sem": 0.22093950249641842,
      "eval_wikibio_emb_cos_sim": 0.7376881241798401,
      "eval_wikibio_emb_cos_sim_sem": 0.011141980385010424,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6032822132110596,
      "eval_wikibio_n_ngrams_match_1": 10.324,
      "eval_wikibio_n_ngrams_match_2": 3.592,
      "eval_wikibio_n_ngrams_match_3": 1.374,
      "eval_wikibio_num_pred_words": 36.288,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.71855500311837,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3634289849159927,
      "eval_wikibio_runtime": 13.4004,
      "eval_wikibio_samples_per_second": 37.312,
      "eval_wikibio_steps_per_second": 0.075,
      "eval_wikibio_token_set_f1": 0.3273452590282704,
      "eval_wikibio_token_set_f1_sem": 0.0054884067259554185,
      "eval_wikibio_token_set_precision": 0.3363029694057662,
      "eval_wikibio_token_set_recall": 0.3353293070713061,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 230625
    },
    {
      "epoch": 44.28,
      "eval_nq_accuracy": 0.53790625,
      "eval_nq_bleu_score": 12.297011020912475,
      "eval_nq_bleu_score_sem": 0.4863772620800528,
      "eval_nq_emb_cos_sim": 0.842413067817688,
      "eval_nq_emb_cos_sim_sem": 0.007522692209033278,
      "eval_nq_emb_top1_equal": 0.34375,
      "eval_nq_emb_top1_equal_sem": 0.04214578430296913,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1123740673065186,
      "eval_nq_n_ngrams_match_1": 23.608,
      "eval_nq_n_ngrams_match_2": 8.794,
      "eval_nq_n_ngrams_match_3": 4.094,
      "eval_nq_num_pred_words": 48.95,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.267846427352762,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4590508098826642,
      "eval_nq_runtime": 14.9576,
      "eval_nq_samples_per_second": 33.428,
      "eval_nq_steps_per_second": 0.067,
      "eval_nq_token_set_f1": 0.4735377685738816,
      "eval_nq_token_set_f1_sem": 0.004958284323717352,
      "eval_nq_token_set_precision": 0.4317609500410873,
      "eval_nq_token_set_recall": 0.5332976598623803,
      "eval_nq_true_num_tokens": 64.0,
      "step": 230625
    },
    {
      "epoch": 44.28,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 230628
    },
    {
      "epoch": 44.29,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 230640
    },
    {
      "epoch": 44.29,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 230652
    },
    {
      "epoch": 44.29,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 230664
    },
    {
      "epoch": 44.29,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 230676
    },
    {
      "epoch": 44.29,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 230688
    },
    {
      "epoch": 44.3,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 230700
    },
    {
      "epoch": 44.3,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 230712
    },
    {
      "epoch": 44.3,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 230724
    },
    {
      "epoch": 44.3,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 230736
    },
    {
      "epoch": 44.31,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 230748
    },
    {
      "epoch": 44.31,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 230760
    },
    {
      "epoch": 44.31,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 230772
    },
    {
      "epoch": 44.31,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 230784
    },
    {
      "epoch": 44.32,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 230796
    },
    {
      "epoch": 44.32,
      "learning_rate": 0.001,
      "loss": 2.4923,
      "step": 230808
    },
    {
      "epoch": 44.32,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 230820
    },
    {
      "epoch": 44.32,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 230832
    },
    {
      "epoch": 44.32,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 230844
    },
    {
      "epoch": 44.33,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 230856
    },
    {
      "epoch": 44.33,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 230868
    },
    {
      "epoch": 44.33,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 230880
    },
    {
      "epoch": 44.33,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 230892
    },
    {
      "epoch": 44.34,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 230904
    },
    {
      "epoch": 44.34,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 230916
    },
    {
      "epoch": 44.34,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 230928
    },
    {
      "epoch": 44.34,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 230940
    },
    {
      "epoch": 44.35,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 230952
    },
    {
      "epoch": 44.35,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 230964
    },
    {
      "epoch": 44.35,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 230976
    },
    {
      "epoch": 44.35,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 230988
    },
    {
      "epoch": 44.35,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 231000
    },
    {
      "epoch": 44.36,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 231012
    },
    {
      "epoch": 44.36,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 231024
    },
    {
      "epoch": 44.36,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 231036
    },
    {
      "epoch": 44.36,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 231048
    },
    {
      "epoch": 44.37,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 231060
    },
    {
      "epoch": 44.37,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 231072
    },
    {
      "epoch": 44.37,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 231084
    },
    {
      "epoch": 44.37,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 231096
    },
    {
      "epoch": 44.38,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 231108
    },
    {
      "epoch": 44.38,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 231120
    },
    {
      "epoch": 44.38,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 231132
    },
    {
      "epoch": 44.38,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 231144
    },
    {
      "epoch": 44.38,
      "learning_rate": 0.001,
      "loss": 2.5006,
      "step": 231156
    },
    {
      "epoch": 44.39,
      "learning_rate": 0.001,
      "loss": 2.5023,
      "step": 231168
    },
    {
      "epoch": 44.39,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 231180
    },
    {
      "epoch": 44.39,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 231192
    },
    {
      "epoch": 44.39,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 231204
    },
    {
      "epoch": 44.4,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 231216
    },
    {
      "epoch": 44.4,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 231228
    },
    {
      "epoch": 44.4,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 231240
    },
    {
      "epoch": 44.4,
      "eval_ag_news_accuracy": 0.329125,
      "eval_ag_news_bleu_score": 4.951772545080421,
      "eval_ag_news_bleu_score_sem": 0.15895500546693048,
      "eval_ag_news_emb_cos_sim": 0.8184164762496948,
      "eval_ag_news_emb_cos_sim_sem": 0.007088071931555415,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4644315242767334,
      "eval_ag_news_n_ngrams_match_1": 14.28,
      "eval_ag_news_n_ngrams_match_2": 3.292,
      "eval_ag_news_n_ngrams_match_3": 0.912,
      "eval_ag_news_num_pred_words": 46.56,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.95828709788525,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3584141121283838,
      "eval_ag_news_runtime": 14.7439,
      "eval_ag_news_samples_per_second": 33.912,
      "eval_ag_news_steps_per_second": 0.068,
      "eval_ag_news_token_set_f1": 0.3572673046241237,
      "eval_ag_news_token_set_f1_sem": 0.004575753007493886,
      "eval_ag_news_token_set_precision": 0.34227980742384423,
      "eval_ag_news_token_set_recall": 0.3887265408129691,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 231250
    },
    {
      "epoch": 44.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.1178125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.257911478221794,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12478898503307423,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6780132055282593,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008386806653538867,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.196791410446167,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.318,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.998,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.766,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.754,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.453941523810737,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21806154708980463,
      "eval_anthropic_toxic_prompts_runtime": 13.3856,
      "eval_anthropic_toxic_prompts_samples_per_second": 37.354,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.075,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36287035858831085,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006630035259195912,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4432813115155612,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3343028740591016,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 231250
    },
    {
      "epoch": 44.4,
      "eval_arxiv_accuracy": 0.35334375,
      "eval_arxiv_bleu_score": 4.617478853905073,
      "eval_arxiv_bleu_score_sem": 0.1362489432586617,
      "eval_arxiv_emb_cos_sim": 0.7854287028312683,
      "eval_arxiv_emb_cos_sim_sem": 0.0062751474687443385,
      "eval_arxiv_emb_top1_equal": 0.234375,
      "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.327728509902954,
      "eval_arxiv_n_ngrams_match_1": 15.55,
      "eval_arxiv_n_ngrams_match_2": 3.128,
      "eval_arxiv_n_ngrams_match_3": 0.754,
      "eval_arxiv_num_pred_words": 40.706,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.87495205846069,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3750254383457203,
      "eval_arxiv_runtime": 14.8707,
      "eval_arxiv_samples_per_second": 33.623,
      "eval_arxiv_steps_per_second": 0.067,
      "eval_arxiv_token_set_f1": 0.3648252787311752,
      "eval_arxiv_token_set_f1_sem": 0.00403940420609417,
      "eval_arxiv_token_set_precision": 0.3173998898279539,
      "eval_arxiv_token_set_recall": 0.4444589702833801,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 231250
    },
    {
      "epoch": 44.4,
      "eval_python_code_alpaca_accuracy": 0.16209375,
      "eval_python_code_alpaca_bleu_score": 4.6041812163336795,
      "eval_python_code_alpaca_bleu_score_sem": 0.1483186692461661,
      "eval_python_code_alpaca_emb_cos_sim": 0.7744543552398682,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007144725198488152,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8420767784118652,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.006,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.946,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.98,
      "eval_python_code_alpaca_num_pred_words": 44.422,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.15134812530243,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3393222654256284,
      "eval_python_code_alpaca_runtime": 13.7062,
      "eval_python_code_alpaca_samples_per_second": 36.48,
      "eval_python_code_alpaca_steps_per_second": 0.073,
      "eval_python_code_alpaca_token_set_f1": 0.4868389731976632,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005375971006064705,
      "eval_python_code_alpaca_token_set_precision": 0.5486301922005524,
      "eval_python_code_alpaca_token_set_recall": 0.45746959759716543,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 231250
    },
    {
      "epoch": 44.4,
      "eval_wikibio_accuracy": 0.334125,
      "eval_wikibio_bleu_score": 6.014058014255632,
      "eval_wikibio_bleu_score_sem": 0.21447243333362348,
      "eval_wikibio_emb_cos_sim": 0.7384083271026611,
      "eval_wikibio_emb_cos_sim_sem": 0.010786422326982934,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.579582452774048,
      "eval_wikibio_n_ngrams_match_1": 9.94,
      "eval_wikibio_n_ngrams_match_2": 3.372,
      "eval_wikibio_n_ngrams_match_3": 1.26,
      "eval_wikibio_num_pred_words": 35.614,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 35.85856507636032,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.353314695923721,
      "eval_wikibio_runtime": 13.7511,
      "eval_wikibio_samples_per_second": 36.361,
      "eval_wikibio_steps_per_second": 0.073,
      "eval_wikibio_token_set_f1": 0.317845479825668,
      "eval_wikibio_token_set_f1_sem": 0.005635497103008044,
      "eval_wikibio_token_set_precision": 0.3245144502863291,
      "eval_wikibio_token_set_recall": 0.3308307688927712,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 231250
    },
    {
      "epoch": 44.4,
      "eval_nq_accuracy": 0.5359375,
      "eval_nq_bleu_score": 12.087189298239092,
      "eval_nq_bleu_score_sem": 0.4775757830859531,
      "eval_nq_emb_cos_sim": 0.8411356210708618,
      "eval_nq_emb_cos_sim_sem": 0.00651645990843797,
      "eval_nq_emb_top1_equal": 0.2578125,
      "eval_nq_emb_top1_equal_sem": 0.038815656435002115,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1131131649017334,
      "eval_nq_n_ngrams_match_1": 23.498,
      "eval_nq_n_ngrams_match_2": 8.732,
      "eval_nq_n_ngrams_match_3": 4.024,
      "eval_nq_num_pred_words": 48.72,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.273959431539895,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45961685494641735,
      "eval_nq_runtime": 14.3703,
      "eval_nq_samples_per_second": 34.794,
      "eval_nq_steps_per_second": 0.07,
      "eval_nq_token_set_f1": 0.4710749317682711,
      "eval_nq_token_set_f1_sem": 0.0047465441645887325,
      "eval_nq_token_set_precision": 0.42948145801591264,
      "eval_nq_token_set_recall": 0.5303811677895016,
      "eval_nq_true_num_tokens": 64.0,
      "step": 231250
    },
    {
      "epoch": 44.4,
      "learning_rate": 0.001,
      "loss": 2.5001,
      "step": 231252
    },
    {
      "epoch": 44.41,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 231264
    },
    {
      "epoch": 44.41,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 231276
    },
    {
      "epoch": 44.41,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 231288
    },
    {
      "epoch": 44.41,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 231300
    },
    {
      "epoch": 44.41,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 231312
    },
    {
      "epoch": 44.42,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 231324
    },
    {
      "epoch": 44.42,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 231336
    },
    {
      "epoch": 44.42,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 231348
    },
    {
      "epoch": 44.42,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 231360
    },
    {
      "epoch": 44.43,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 231372
    },
    {
      "epoch": 44.43,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 231384
    },
    {
      "epoch": 44.43,
      "learning_rate": 0.001,
      "loss": 2.4973,
      "step": 231396
    },
    {
      "epoch": 44.43,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 231408
    },
    {
      "epoch": 44.44,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 231420
    },
    {
      "epoch": 44.44,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 231432
    },
    {
      "epoch": 44.44,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 231444
    },
    {
      "epoch": 44.44,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 231456
    },
    {
      "epoch": 44.44,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 231468
    },
    {
      "epoch": 44.45,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 231480
    },
    {
      "epoch": 44.45,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 231492
    },
    {
      "epoch": 44.45,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 231504
    },
    {
      "epoch": 44.45,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 231516
    },
    {
      "epoch": 44.46,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 231528
    },
    {
      "epoch": 44.46,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 231540
    },
    {
      "epoch": 44.46,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 231552
    },
    {
      "epoch": 44.46,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 231564
    },
    {
      "epoch": 44.47,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 231576
    },
    {
      "epoch": 44.47,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 231588
    },
    {
      "epoch": 44.47,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 231600
    },
    {
      "epoch": 44.47,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 231612
    },
    {
      "epoch": 44.47,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 231624
    },
    {
      "epoch": 44.48,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 231636
    },
    {
      "epoch": 44.48,
      "learning_rate": 0.001,
      "loss": 2.5002,
      "step": 231648
    },
    {
      "epoch": 44.48,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 231660
    },
    {
      "epoch": 44.48,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 231672
    },
    {
      "epoch": 44.49,
      "learning_rate": 0.001,
      "loss": 2.501,
      "step": 231684
    },
    {
      "epoch": 44.49,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 231696
    },
    {
      "epoch": 44.49,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 231708
    },
    {
      "epoch": 44.49,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 231720
    },
    {
      "epoch": 44.5,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 231732
    },
    {
      "epoch": 44.5,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 231744
    },
    {
      "epoch": 44.5,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 231756
    },
    {
      "epoch": 44.5,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 231768
    },
    {
      "epoch": 44.5,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 231780
    },
    {
      "epoch": 44.51,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 231792
    },
    {
      "epoch": 44.51,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 231804
    },
    {
      "epoch": 44.51,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 231816
    },
    {
      "epoch": 44.51,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 231828
    },
    {
      "epoch": 44.52,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 231840
    },
    {
      "epoch": 44.52,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 231852
    },
    {
      "epoch": 44.52,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 231864
    },
    {
      "epoch": 44.52,
      "eval_ag_news_accuracy": 0.32746875,
      "eval_ag_news_bleu_score": 5.032148010511632,
      "eval_ag_news_bleu_score_sem": 0.15772902792673343,
      "eval_ag_news_emb_cos_sim": 0.8267014026641846,
      "eval_ag_news_emb_cos_sim_sem": 0.006599245538536478,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4720215797424316,
      "eval_ag_news_n_ngrams_match_1": 14.466,
      "eval_ag_news_n_ngrams_match_2": 3.332,
      "eval_ag_news_n_ngrams_match_3": 0.968,
      "eval_ag_news_num_pred_words": 46.846,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.20177514451354,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3605934341228475,
      "eval_ag_news_runtime": 12.4031,
      "eval_ag_news_samples_per_second": 40.313,
      "eval_ag_news_steps_per_second": 0.081,
      "eval_ag_news_token_set_f1": 0.3593330210601828,
      "eval_ag_news_token_set_f1_sem": 0.004519197976533824,
      "eval_ag_news_token_set_precision": 0.3459731448610272,
      "eval_ag_news_token_set_recall": 0.387712768205289,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 231875
    },
    {
      "epoch": 44.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.116625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3787512868093175,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14053752954488988,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6918679475784302,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008374250843949669,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2191762924194336,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.416,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.07,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.642,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.007512817403224,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2248723645176024,
      "eval_anthropic_toxic_prompts_runtime": 11.1823,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.714,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36663493969040806,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006519405853883953,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4525412478375378,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3326264750523705,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 231875
    },
    {
      "epoch": 44.52,
      "eval_arxiv_accuracy": 0.3543125,
      "eval_arxiv_bleu_score": 4.517749544071756,
      "eval_arxiv_bleu_score_sem": 0.13442668578636985,
      "eval_arxiv_emb_cos_sim": 0.7815765738487244,
      "eval_arxiv_emb_cos_sim_sem": 0.007420977375597315,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3189609050750732,
      "eval_arxiv_n_ngrams_match_1": 15.42,
      "eval_arxiv_n_ngrams_match_2": 3.106,
      "eval_arxiv_n_ngrams_match_3": 0.712,
      "eval_arxiv_num_pred_words": 39.85,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.63162375615171,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37318881767804524,
      "eval_arxiv_runtime": 11.4412,
      "eval_arxiv_samples_per_second": 43.702,
      "eval_arxiv_steps_per_second": 0.087,
      "eval_arxiv_token_set_f1": 0.36440001264371186,
      "eval_arxiv_token_set_f1_sem": 0.004088611910439568,
      "eval_arxiv_token_set_precision": 0.3144086455781148,
      "eval_arxiv_token_set_recall": 0.45718424629530285,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 231875
    },
    {
      "epoch": 44.52,
      "eval_python_code_alpaca_accuracy": 0.161125,
      "eval_python_code_alpaca_bleu_score": 4.654814475943952,
      "eval_python_code_alpaca_bleu_score_sem": 0.1477308756024803,
      "eval_python_code_alpaca_emb_cos_sim": 0.7707570791244507,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0081884800480593,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8380470275878906,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.91,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.934,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.944,
      "eval_python_code_alpaca_num_pred_words": 43.174,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.082371538632618,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34218812762441414,
      "eval_python_code_alpaca_runtime": 11.3825,
      "eval_python_code_alpaca_samples_per_second": 43.927,
      "eval_python_code_alpaca_steps_per_second": 0.088,
      "eval_python_code_alpaca_token_set_f1": 0.49155034642244394,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005474163445380063,
      "eval_python_code_alpaca_token_set_precision": 0.5414658855911652,
      "eval_python_code_alpaca_token_set_recall": 0.47145215086983455,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 231875
    },
    {
      "epoch": 44.52,
      "eval_wikibio_accuracy": 0.33109375,
      "eval_wikibio_bleu_score": 6.068280358273803,
      "eval_wikibio_bleu_score_sem": 0.21390801141292687,
      "eval_wikibio_emb_cos_sim": 0.7426877021789551,
      "eval_wikibio_emb_cos_sim_sem": 0.00934576401953962,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6402812004089355,
      "eval_wikibio_n_ngrams_match_1": 10.294,
      "eval_wikibio_n_ngrams_match_2": 3.416,
      "eval_wikibio_n_ngrams_match_3": 1.23,
      "eval_wikibio_num_pred_words": 36.14,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.102549671635146,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36616732745850755,
      "eval_wikibio_runtime": 11.4382,
      "eval_wikibio_samples_per_second": 43.713,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.3231692667263347,
      "eval_wikibio_token_set_f1_sem": 0.0053967739013447685,
      "eval_wikibio_token_set_precision": 0.3315140182844297,
      "eval_wikibio_token_set_recall": 0.3296530079213008,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 231875
    },
    {
      "epoch": 44.52,
      "eval_nq_accuracy": 0.5378125,
      "eval_nq_bleu_score": 12.337385287495966,
      "eval_nq_bleu_score_sem": 0.49999971679454874,
      "eval_nq_emb_cos_sim": 0.8437784910202026,
      "eval_nq_emb_cos_sim_sem": 0.00671438085050062,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1097679138183594,
      "eval_nq_n_ngrams_match_1": 23.646,
      "eval_nq_n_ngrams_match_2": 8.904,
      "eval_nq_n_ngrams_match_3": 4.178,
      "eval_nq_num_pred_words": 49.086,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.246327203926093,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4599864372969777,
      "eval_nq_runtime": 11.4574,
      "eval_nq_samples_per_second": 43.64,
      "eval_nq_steps_per_second": 0.087,
      "eval_nq_token_set_f1": 0.4727454867206314,
      "eval_nq_token_set_f1_sem": 0.004996086940878568,
      "eval_nq_token_set_precision": 0.43139192327672027,
      "eval_nq_token_set_recall": 0.5297096802434696,
      "eval_nq_true_num_tokens": 64.0,
      "step": 231875
    },
    {
      "epoch": 44.52,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 231876
    },
    {
      "epoch": 44.53,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 231888
    },
    {
      "epoch": 44.53,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 231900
    },
    {
      "epoch": 44.53,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 231912
    },
    {
      "epoch": 44.53,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 231924
    },
    {
      "epoch": 44.53,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 231936
    },
    {
      "epoch": 44.54,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 231948
    },
    {
      "epoch": 44.54,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 231960
    },
    {
      "epoch": 44.54,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 231972
    },
    {
      "epoch": 44.54,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 231984
    },
    {
      "epoch": 44.55,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 231996
    },
    {
      "epoch": 44.55,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 232008
    },
    {
      "epoch": 44.55,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 232020
    },
    {
      "epoch": 44.55,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 232032
    },
    {
      "epoch": 44.56,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 232044
    },
    {
      "epoch": 44.56,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 232056
    },
    {
      "epoch": 44.56,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 232068
    },
    {
      "epoch": 44.56,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 232080
    },
    {
      "epoch": 44.56,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 232092
    },
    {
      "epoch": 44.57,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 232104
    },
    {
      "epoch": 44.57,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 232116
    },
    {
      "epoch": 44.57,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 232128
    },
    {
      "epoch": 44.57,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 232140
    },
    {
      "epoch": 44.58,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 232152
    },
    {
      "epoch": 44.58,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 232164
    },
    {
      "epoch": 44.58,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 232176
    },
    {
      "epoch": 44.58,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 232188
    },
    {
      "epoch": 44.59,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 232200
    },
    {
      "epoch": 44.59,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 232212
    },
    {
      "epoch": 44.59,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 232224
    },
    {
      "epoch": 44.59,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 232236
    },
    {
      "epoch": 44.59,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 232248
    },
    {
      "epoch": 44.6,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 232260
    },
    {
      "epoch": 44.6,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 232272
    },
    {
      "epoch": 44.6,
      "learning_rate": 0.001,
      "loss": 2.5003,
      "step": 232284
    },
    {
      "epoch": 44.6,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 232296
    },
    {
      "epoch": 44.61,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 232308
    },
    {
      "epoch": 44.61,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 232320
    },
    {
      "epoch": 44.61,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 232332
    },
    {
      "epoch": 44.61,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 232344
    },
    {
      "epoch": 44.62,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 232356
    },
    {
      "epoch": 44.62,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 232368
    },
    {
      "epoch": 44.62,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 232380
    },
    {
      "epoch": 44.62,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 232392
    },
    {
      "epoch": 44.62,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 232404
    },
    {
      "epoch": 44.63,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 232416
    },
    {
      "epoch": 44.63,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 232428
    },
    {
      "epoch": 44.63,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 232440
    },
    {
      "epoch": 44.63,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 232452
    },
    {
      "epoch": 44.64,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 232464
    },
    {
      "epoch": 44.64,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 232476
    },
    {
      "epoch": 44.64,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 232488
    },
    {
      "epoch": 44.64,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 232500
    },
    {
      "epoch": 44.64,
      "eval_ag_news_accuracy": 0.3278125,
      "eval_ag_news_bleu_score": 4.833847274796979,
      "eval_ag_news_bleu_score_sem": 0.15604819210561066,
      "eval_ag_news_emb_cos_sim": 0.8204124569892883,
      "eval_ag_news_emb_cos_sim_sem": 0.0062024290085393625,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4684019088745117,
      "eval_ag_news_n_ngrams_match_1": 14.374,
      "eval_ag_news_n_ngrams_match_2": 3.168,
      "eval_ag_news_n_ngrams_match_3": 0.88,
      "eval_ag_news_num_pred_words": 46.616,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.08542601693543,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3577536070579498,
      "eval_ag_news_runtime": 12.0782,
      "eval_ag_news_samples_per_second": 41.397,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.35668751572364166,
      "eval_ag_news_token_set_f1_sem": 0.004468901815282063,
      "eval_ag_news_token_set_precision": 0.344271097338136,
      "eval_ag_news_token_set_recall": 0.38491437490998465,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 232500
    },
    {
      "epoch": 44.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.1176875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.17252314798791,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11957468490942567,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.681765079498291,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009270485796832114,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1847341060638428,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.33,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.96,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.508,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.16086332463754,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21582066676314207,
      "eval_anthropic_toxic_prompts_runtime": 11.438,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.714,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3632119220779932,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006928794261231546,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4458300876755742,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3312882521265975,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 232500
    },
    {
      "epoch": 44.64,
      "eval_arxiv_accuracy": 0.35475,
      "eval_arxiv_bleu_score": 4.456119312620294,
      "eval_arxiv_bleu_score_sem": 0.1360193654027934,
      "eval_arxiv_emb_cos_sim": 0.7783864736557007,
      "eval_arxiv_emb_cos_sim_sem": 0.00699144698862165,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.31887149810791,
      "eval_arxiv_n_ngrams_match_1": 15.348,
      "eval_arxiv_n_ngrams_match_2": 3.044,
      "eval_arxiv_n_ngrams_match_3": 0.706,
      "eval_arxiv_num_pred_words": 40.098,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.629153406908745,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36868529982167786,
      "eval_arxiv_runtime": 11.8737,
      "eval_arxiv_samples_per_second": 42.11,
      "eval_arxiv_steps_per_second": 0.084,
      "eval_arxiv_token_set_f1": 0.36319481193290903,
      "eval_arxiv_token_set_f1_sem": 0.004378341711286714,
      "eval_arxiv_token_set_precision": 0.3110319725875156,
      "eval_arxiv_token_set_recall": 0.4569955362034434,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 232500
    },
    {
      "epoch": 44.64,
      "eval_python_code_alpaca_accuracy": 0.16259375,
      "eval_python_code_alpaca_bleu_score": 4.660162667988266,
      "eval_python_code_alpaca_bleu_score_sem": 0.15117254687198411,
      "eval_python_code_alpaca_emb_cos_sim": 0.768064558506012,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007732874591290058,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.851837396621704,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.874,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.822,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.988,
      "eval_python_code_alpaca_num_pred_words": 43.028,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.319575552402913,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3415741618862518,
      "eval_python_code_alpaca_runtime": 11.8385,
      "eval_python_code_alpaca_samples_per_second": 42.235,
      "eval_python_code_alpaca_steps_per_second": 0.084,
      "eval_python_code_alpaca_token_set_f1": 0.48227694070062255,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005521130966249993,
      "eval_python_code_alpaca_token_set_precision": 0.5358386821741956,
      "eval_python_code_alpaca_token_set_recall": 0.46013190161250833,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 232500
    },
    {
      "epoch": 44.64,
      "eval_wikibio_accuracy": 0.33040625,
      "eval_wikibio_bleu_score": 6.305004954218946,
      "eval_wikibio_bleu_score_sem": 0.2239023075215015,
      "eval_wikibio_emb_cos_sim": 0.7458434104919434,
      "eval_wikibio_emb_cos_sim_sem": 0.008974877020999816,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6182138919830322,
      "eval_wikibio_n_ngrams_match_1": 10.018,
      "eval_wikibio_n_ngrams_match_2": 3.4,
      "eval_wikibio_n_ngrams_match_3": 1.324,
      "eval_wikibio_num_pred_words": 35.022,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.270938414205695,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35632726064714937,
      "eval_wikibio_runtime": 11.2664,
      "eval_wikibio_samples_per_second": 44.38,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.3219415595057358,
      "eval_wikibio_token_set_f1_sem": 0.005475142763128616,
      "eval_wikibio_token_set_precision": 0.32635907410889786,
      "eval_wikibio_token_set_recall": 0.3365921153126336,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 232500
    },
    {
      "epoch": 44.64,
      "eval_nq_accuracy": 0.5371875,
      "eval_nq_bleu_score": 12.446293493753974,
      "eval_nq_bleu_score_sem": 0.5001599804606712,
      "eval_nq_emb_cos_sim": 0.8429065346717834,
      "eval_nq_emb_cos_sim_sem": 0.0067761174224904765,
      "eval_nq_emb_top1_equal": 0.3515625,
      "eval_nq_emb_top1_equal_sem": 0.04236756101983345,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1108951568603516,
      "eval_nq_n_ngrams_match_1": 23.608,
      "eval_nq_n_ngrams_match_2": 8.974,
      "eval_nq_n_ngrams_match_3": 4.198,
      "eval_nq_num_pred_words": 48.92,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.25562806006653,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46122095192437024,
      "eval_nq_runtime": 11.962,
      "eval_nq_samples_per_second": 41.799,
      "eval_nq_steps_per_second": 0.084,
      "eval_nq_token_set_f1": 0.47409383964277885,
      "eval_nq_token_set_f1_sem": 0.004876826908985697,
      "eval_nq_token_set_precision": 0.43193271966593727,
      "eval_nq_token_set_recall": 0.5332972341018406,
      "eval_nq_true_num_tokens": 64.0,
      "step": 232500
    },
    {
      "epoch": 44.65,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 232512
    },
    {
      "epoch": 44.65,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 232524
    },
    {
      "epoch": 44.65,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 232536
    },
    {
      "epoch": 44.65,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 232548
    },
    {
      "epoch": 44.65,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 232560
    },
    {
      "epoch": 44.66,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 232572
    },
    {
      "epoch": 44.66,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 232584
    },
    {
      "epoch": 44.66,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 232596
    },
    {
      "epoch": 44.66,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 232608
    },
    {
      "epoch": 44.67,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 232620
    },
    {
      "epoch": 44.67,
      "learning_rate": 0.001,
      "loss": 2.504,
      "step": 232632
    },
    {
      "epoch": 44.67,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 232644
    },
    {
      "epoch": 44.67,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 232656
    },
    {
      "epoch": 44.68,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 232668
    },
    {
      "epoch": 44.68,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 232680
    },
    {
      "epoch": 44.68,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 232692
    },
    {
      "epoch": 44.68,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 232704
    },
    {
      "epoch": 44.68,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 232716
    },
    {
      "epoch": 44.69,
      "learning_rate": 0.001,
      "loss": 2.5034,
      "step": 232728
    },
    {
      "epoch": 44.69,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 232740
    },
    {
      "epoch": 44.69,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 232752
    },
    {
      "epoch": 44.69,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 232764
    },
    {
      "epoch": 44.7,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 232776
    },
    {
      "epoch": 44.7,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 232788
    },
    {
      "epoch": 44.7,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 232800
    },
    {
      "epoch": 44.7,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 232812
    },
    {
      "epoch": 44.71,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 232824
    },
    {
      "epoch": 44.71,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 232836
    },
    {
      "epoch": 44.71,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 232848
    },
    {
      "epoch": 44.71,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 232860
    },
    {
      "epoch": 44.71,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 232872
    },
    {
      "epoch": 44.72,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 232884
    },
    {
      "epoch": 44.72,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 232896
    },
    {
      "epoch": 44.72,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 232908
    },
    {
      "epoch": 44.72,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 232920
    },
    {
      "epoch": 44.73,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 232932
    },
    {
      "epoch": 44.73,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 232944
    },
    {
      "epoch": 44.73,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 232956
    },
    {
      "epoch": 44.73,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 232968
    },
    {
      "epoch": 44.74,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 232980
    },
    {
      "epoch": 44.74,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 232992
    },
    {
      "epoch": 44.74,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 233004
    },
    {
      "epoch": 44.74,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 233016
    },
    {
      "epoch": 44.74,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 233028
    },
    {
      "epoch": 44.75,
      "learning_rate": 0.001,
      "loss": 2.4992,
      "step": 233040
    },
    {
      "epoch": 44.75,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 233052
    },
    {
      "epoch": 44.75,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 233064
    },
    {
      "epoch": 44.75,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 233076
    },
    {
      "epoch": 44.76,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 233088
    },
    {
      "epoch": 44.76,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 233100
    },
    {
      "epoch": 44.76,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 233112
    },
    {
      "epoch": 44.76,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 233124
    },
    {
      "epoch": 44.76,
      "eval_ag_news_accuracy": 0.329875,
      "eval_ag_news_bleu_score": 4.911330238378152,
      "eval_ag_news_bleu_score_sem": 0.1583020659705351,
      "eval_ag_news_emb_cos_sim": 0.8199710845947266,
      "eval_ag_news_emb_cos_sim_sem": 0.007175698522332103,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.467978000640869,
      "eval_ag_news_n_ngrams_match_1": 14.572,
      "eval_ag_news_n_ngrams_match_2": 3.216,
      "eval_ag_news_n_ngrams_match_3": 0.896,
      "eval_ag_news_num_pred_words": 47.002,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.07182762310611,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3611572503418052,
      "eval_ag_news_runtime": 11.8256,
      "eval_ag_news_samples_per_second": 42.281,
      "eval_ag_news_steps_per_second": 0.085,
      "eval_ag_news_token_set_f1": 0.35761217906127024,
      "eval_ag_news_token_set_f1_sem": 0.00448286920762013,
      "eval_ag_news_token_set_precision": 0.34624476285537886,
      "eval_ag_news_token_set_recall": 0.38470179548590044,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 233125
    },
    {
      "epoch": 44.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.116125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.056392732103471,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10660535293173316,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6825622916221619,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009403752814734545,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.182234525680542,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.49,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.0,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.746,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.100546719133966,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22057785534846946,
      "eval_anthropic_toxic_prompts_runtime": 11.4665,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.605,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3658021762998863,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00675121611005975,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45522586502478696,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3318392893769312,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 233125
    },
    {
      "epoch": 44.76,
      "eval_arxiv_accuracy": 0.35034375,
      "eval_arxiv_bleu_score": 4.569612237114931,
      "eval_arxiv_bleu_score_sem": 0.12911174905259093,
      "eval_arxiv_emb_cos_sim": 0.7890654802322388,
      "eval_arxiv_emb_cos_sim_sem": 0.006090167584808057,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3350062370300293,
      "eval_arxiv_n_ngrams_match_1": 15.634,
      "eval_arxiv_n_ngrams_match_2": 3.152,
      "eval_arxiv_n_ngrams_match_3": 0.74,
      "eval_arxiv_num_pred_words": 41.216,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 28.078558350066526,
      "eval_arxiv_pred_num_tokens": 62.9921875,
      "eval_arxiv_rouge_score": 0.37022475451948533,
      "eval_arxiv_runtime": 12.2,
      "eval_arxiv_samples_per_second": 40.984,
      "eval_arxiv_steps_per_second": 0.082,
      "eval_arxiv_token_set_f1": 0.36813210363390825,
      "eval_arxiv_token_set_f1_sem": 0.0038958119374668844,
      "eval_arxiv_token_set_precision": 0.3210678923740764,
      "eval_arxiv_token_set_recall": 0.44954983845232976,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 233125
    },
    {
      "epoch": 44.76,
      "eval_python_code_alpaca_accuracy": 0.16184375,
      "eval_python_code_alpaca_bleu_score": 4.804922149779122,
      "eval_python_code_alpaca_bleu_score_sem": 0.15457295137870475,
      "eval_python_code_alpaca_emb_cos_sim": 0.7771536707878113,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006405953505820438,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8460724353790283,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.156,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.036,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.048,
      "eval_python_code_alpaca_num_pred_words": 43.692,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.22001612446157,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3481268633301894,
      "eval_python_code_alpaca_runtime": 11.4053,
      "eval_python_code_alpaca_samples_per_second": 43.839,
      "eval_python_code_alpaca_steps_per_second": 0.088,
      "eval_python_code_alpaca_token_set_f1": 0.49094832460599663,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005440607741651078,
      "eval_python_code_alpaca_token_set_precision": 0.5537041849175821,
      "eval_python_code_alpaca_token_set_recall": 0.45950289975435893,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 233125
    },
    {
      "epoch": 44.76,
      "eval_wikibio_accuracy": 0.328125,
      "eval_wikibio_bleu_score": 6.379971239059081,
      "eval_wikibio_bleu_score_sem": 0.22358192028378598,
      "eval_wikibio_emb_cos_sim": 0.7499635219573975,
      "eval_wikibio_emb_cos_sim_sem": 0.00844267427687358,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.65299129486084,
      "eval_wikibio_n_ngrams_match_1": 10.352,
      "eval_wikibio_n_ngrams_match_2": 3.552,
      "eval_wikibio_n_ngrams_match_3": 1.382,
      "eval_wikibio_num_pred_words": 36.47,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.589927424291794,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36666395165759413,
      "eval_wikibio_runtime": 11.13,
      "eval_wikibio_samples_per_second": 44.924,
      "eval_wikibio_steps_per_second": 0.09,
      "eval_wikibio_token_set_f1": 0.32686702242703836,
      "eval_wikibio_token_set_f1_sem": 0.005317875481153925,
      "eval_wikibio_token_set_precision": 0.33673488188853756,
      "eval_wikibio_token_set_recall": 0.3327432570358703,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 233125
    },
    {
      "epoch": 44.76,
      "eval_nq_accuracy": 0.53709375,
      "eval_nq_bleu_score": 12.168191358668201,
      "eval_nq_bleu_score_sem": 0.4940694869119727,
      "eval_nq_emb_cos_sim": 0.8457506895065308,
      "eval_nq_emb_cos_sim_sem": 0.006445194855962112,
      "eval_nq_emb_top1_equal": 0.3515625,
      "eval_nq_emb_top1_equal_sem": 0.04236756101983345,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1120073795318604,
      "eval_nq_n_ngrams_match_1": 23.566,
      "eval_nq_n_ngrams_match_2": 8.744,
      "eval_nq_n_ngrams_match_3": 4.114,
      "eval_nq_num_pred_words": 49.208,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.264815264924167,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4578527317584956,
      "eval_nq_runtime": 17.739,
      "eval_nq_samples_per_second": 28.187,
      "eval_nq_steps_per_second": 0.056,
      "eval_nq_token_set_f1": 0.4692111460883984,
      "eval_nq_token_set_f1_sem": 0.004946791072729553,
      "eval_nq_token_set_precision": 0.4293232239549887,
      "eval_nq_token_set_recall": 0.5262780931430423,
      "eval_nq_true_num_tokens": 64.0,
      "step": 233125
    },
    {
      "epoch": 44.76,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 233136
    },
    {
      "epoch": 44.77,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 233148
    },
    {
      "epoch": 44.77,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 233160
    },
    {
      "epoch": 44.77,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 233172
    },
    {
      "epoch": 44.77,
      "learning_rate": 0.001,
      "loss": 2.4989,
      "step": 233184
    },
    {
      "epoch": 44.78,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 233196
    },
    {
      "epoch": 44.78,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 233208
    },
    {
      "epoch": 44.78,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 233220
    },
    {
      "epoch": 44.78,
      "learning_rate": 0.001,
      "loss": 2.5069,
      "step": 233232
    },
    {
      "epoch": 44.79,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 233244
    },
    {
      "epoch": 44.79,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 233256
    },
    {
      "epoch": 44.79,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 233268
    },
    {
      "epoch": 44.79,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 233280
    },
    {
      "epoch": 44.79,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 233292
    },
    {
      "epoch": 44.8,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 233304
    },
    {
      "epoch": 44.8,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 233316
    },
    {
      "epoch": 44.8,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 233328
    },
    {
      "epoch": 44.8,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 233340
    },
    {
      "epoch": 44.81,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 233352
    },
    {
      "epoch": 44.81,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 233364
    },
    {
      "epoch": 44.81,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 233376
    },
    {
      "epoch": 44.81,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 233388
    },
    {
      "epoch": 44.82,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 233400
    },
    {
      "epoch": 44.82,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 233412
    },
    {
      "epoch": 44.82,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 233424
    },
    {
      "epoch": 44.82,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 233436
    },
    {
      "epoch": 44.82,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 233448
    },
    {
      "epoch": 44.83,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 233460
    },
    {
      "epoch": 44.83,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 233472
    },
    {
      "epoch": 44.83,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 233484
    },
    {
      "epoch": 44.83,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 233496
    },
    {
      "epoch": 44.84,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 233508
    },
    {
      "epoch": 44.84,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 233520
    },
    {
      "epoch": 44.84,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 233532
    },
    {
      "epoch": 44.84,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 233544
    },
    {
      "epoch": 44.85,
      "learning_rate": 0.001,
      "loss": 2.5056,
      "step": 233556
    },
    {
      "epoch": 44.85,
      "learning_rate": 0.001,
      "loss": 2.5037,
      "step": 233568
    },
    {
      "epoch": 44.85,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 233580
    },
    {
      "epoch": 44.85,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 233592
    },
    {
      "epoch": 44.85,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 233604
    },
    {
      "epoch": 44.86,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 233616
    },
    {
      "epoch": 44.86,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 233628
    },
    {
      "epoch": 44.86,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 233640
    },
    {
      "epoch": 44.86,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 233652
    },
    {
      "epoch": 44.87,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 233664
    },
    {
      "epoch": 44.87,
      "learning_rate": 0.001,
      "loss": 2.4995,
      "step": 233676
    },
    {
      "epoch": 44.87,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 233688
    },
    {
      "epoch": 44.87,
      "learning_rate": 0.001,
      "loss": 2.4988,
      "step": 233700
    },
    {
      "epoch": 44.88,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 233712
    },
    {
      "epoch": 44.88,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 233724
    },
    {
      "epoch": 44.88,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 233736
    },
    {
      "epoch": 44.88,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 233748
    },
    {
      "epoch": 44.88,
      "eval_ag_news_accuracy": 0.329,
      "eval_ag_news_bleu_score": 4.943506182483064,
      "eval_ag_news_bleu_score_sem": 0.1591658295276275,
      "eval_ag_news_emb_cos_sim": 0.8237883448600769,
      "eval_ag_news_emb_cos_sim_sem": 0.006359255456214875,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4656832218170166,
      "eval_ag_news_n_ngrams_match_1": 14.462,
      "eval_ag_news_n_ngrams_match_2": 3.232,
      "eval_ag_news_n_ngrams_match_3": 0.884,
      "eval_ag_news_num_pred_words": 47.176,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.998314252957076,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.359527296304229,
      "eval_ag_news_runtime": 11.5777,
      "eval_ag_news_samples_per_second": 43.186,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.3616526399749545,
      "eval_ag_news_token_set_f1_sem": 0.004410987666602623,
      "eval_ag_news_token_set_precision": 0.34805479304943676,
      "eval_ag_news_token_set_recall": 0.39039921147470874,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 233750
    },
    {
      "epoch": 44.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.11665625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.184933979162106,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11634765300963519,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6902061700820923,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008550291644734592,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.194072961807251,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.516,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.762,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.248,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.38755501479955,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2202184690163958,
      "eval_anthropic_toxic_prompts_runtime": 11.3634,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.001,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.088,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3637381932820986,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006503239882816224,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4596421768650673,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3256649891482098,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 233750
    },
    {
      "epoch": 44.88,
      "eval_arxiv_accuracy": 0.35296875,
      "eval_arxiv_bleu_score": 4.454903348532187,
      "eval_arxiv_bleu_score_sem": 0.12382288765593584,
      "eval_arxiv_emb_cos_sim": 0.7839063405990601,
      "eval_arxiv_emb_cos_sim_sem": 0.006648951819454505,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3281784057617188,
      "eval_arxiv_n_ngrams_match_1": 15.612,
      "eval_arxiv_n_ngrams_match_2": 3.132,
      "eval_arxiv_n_ngrams_match_3": 0.692,
      "eval_arxiv_num_pred_words": 40.982,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.887495705410885,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3710272278631155,
      "eval_arxiv_runtime": 11.9622,
      "eval_arxiv_samples_per_second": 41.798,
      "eval_arxiv_steps_per_second": 0.084,
      "eval_arxiv_token_set_f1": 0.3629057381987348,
      "eval_arxiv_token_set_f1_sem": 0.0039950649701458955,
      "eval_arxiv_token_set_precision": 0.31690083419007586,
      "eval_arxiv_token_set_recall": 0.44308012254810736,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 233750
    },
    {
      "epoch": 44.88,
      "eval_python_code_alpaca_accuracy": 0.1655625,
      "eval_python_code_alpaca_bleu_score": 4.945063139850272,
      "eval_python_code_alpaca_bleu_score_sem": 0.15143180572827783,
      "eval_python_code_alpaca_emb_cos_sim": 0.7870513200759888,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006154739849449286,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8081815242767334,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.462,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.16,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.128,
      "eval_python_code_alpaca_num_pred_words": 44.406,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.579740932387736,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.35518891420517174,
      "eval_python_code_alpaca_runtime": 11.8525,
      "eval_python_code_alpaca_samples_per_second": 42.185,
      "eval_python_code_alpaca_steps_per_second": 0.084,
      "eval_python_code_alpaca_token_set_f1": 0.4918032318275741,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0052360135457094606,
      "eval_python_code_alpaca_token_set_precision": 0.576674056168738,
      "eval_python_code_alpaca_token_set_recall": 0.44628443025629405,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 233750
    },
    {
      "epoch": 44.88,
      "eval_wikibio_accuracy": 0.3314375,
      "eval_wikibio_bleu_score": 6.261302634399697,
      "eval_wikibio_bleu_score_sem": 0.2242887877383055,
      "eval_wikibio_emb_cos_sim": 0.7429580688476562,
      "eval_wikibio_emb_cos_sim_sem": 0.009413135098339775,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.590331554412842,
      "eval_wikibio_n_ngrams_match_1": 10.24,
      "eval_wikibio_n_ngrams_match_2": 3.548,
      "eval_wikibio_n_ngrams_match_3": 1.372,
      "eval_wikibio_num_pred_words": 36.704,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.24609148604096,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35995438205822317,
      "eval_wikibio_runtime": 14.7028,
      "eval_wikibio_samples_per_second": 34.007,
      "eval_wikibio_steps_per_second": 0.068,
      "eval_wikibio_token_set_f1": 0.3221930267981259,
      "eval_wikibio_token_set_f1_sem": 0.00556499696088582,
      "eval_wikibio_token_set_precision": 0.3322789642795646,
      "eval_wikibio_token_set_recall": 0.3300855178762037,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 233750
    },
    {
      "epoch": 44.88,
      "eval_nq_accuracy": 0.53653125,
      "eval_nq_bleu_score": 12.311711459786745,
      "eval_nq_bleu_score_sem": 0.48828531255976293,
      "eval_nq_emb_cos_sim": 0.8420907258987427,
      "eval_nq_emb_cos_sim_sem": 0.006898221452438093,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.113239288330078,
      "eval_nq_n_ngrams_match_1": 23.698,
      "eval_nq_n_ngrams_match_2": 8.886,
      "eval_nq_n_ngrams_match_3": 4.17,
      "eval_nq_num_pred_words": 49.252,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.275003037479582,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46208923471860586,
      "eval_nq_runtime": 29.667,
      "eval_nq_samples_per_second": 16.854,
      "eval_nq_steps_per_second": 0.034,
      "eval_nq_token_set_f1": 0.47194581999061097,
      "eval_nq_token_set_f1_sem": 0.004866567798190057,
      "eval_nq_token_set_precision": 0.4309898560778709,
      "eval_nq_token_set_recall": 0.5296490932118251,
      "eval_nq_true_num_tokens": 64.0,
      "step": 233750
    },
    {
      "epoch": 44.88,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 233760
    },
    {
      "epoch": 44.89,
      "learning_rate": 0.001,
      "loss": 2.5009,
      "step": 233772
    },
    {
      "epoch": 44.89,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 233784
    },
    {
      "epoch": 44.89,
      "learning_rate": 0.001,
      "loss": 2.4999,
      "step": 233796
    },
    {
      "epoch": 44.89,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 233808
    },
    {
      "epoch": 44.9,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 233820
    },
    {
      "epoch": 44.9,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 233832
    },
    {
      "epoch": 44.9,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 233844
    },
    {
      "epoch": 44.9,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 233856
    },
    {
      "epoch": 44.91,
      "learning_rate": 0.001,
      "loss": 2.502,
      "step": 233868
    },
    {
      "epoch": 44.91,
      "learning_rate": 0.001,
      "loss": 2.5062,
      "step": 233880
    },
    {
      "epoch": 44.91,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 233892
    },
    {
      "epoch": 44.91,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 233904
    },
    {
      "epoch": 44.91,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 233916
    },
    {
      "epoch": 44.92,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 233928
    },
    {
      "epoch": 44.92,
      "learning_rate": 0.001,
      "loss": 2.5014,
      "step": 233940
    },
    {
      "epoch": 44.92,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 233952
    },
    {
      "epoch": 44.92,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 233964
    },
    {
      "epoch": 44.93,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 233976
    },
    {
      "epoch": 44.93,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 233988
    },
    {
      "epoch": 44.93,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 234000
    },
    {
      "epoch": 44.93,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 234012
    },
    {
      "epoch": 44.94,
      "learning_rate": 0.001,
      "loss": 2.5021,
      "step": 234024
    },
    {
      "epoch": 44.94,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 234036
    },
    {
      "epoch": 44.94,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 234048
    },
    {
      "epoch": 44.94,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 234060
    },
    {
      "epoch": 44.94,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 234072
    },
    {
      "epoch": 44.95,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 234084
    },
    {
      "epoch": 44.95,
      "learning_rate": 0.001,
      "loss": 2.5094,
      "step": 234096
    },
    {
      "epoch": 44.95,
      "learning_rate": 0.001,
      "loss": 2.5016,
      "step": 234108
    },
    {
      "epoch": 44.95,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 234120
    },
    {
      "epoch": 44.96,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 234132
    },
    {
      "epoch": 44.96,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 234144
    },
    {
      "epoch": 44.96,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 234156
    },
    {
      "epoch": 44.96,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 234168
    },
    {
      "epoch": 44.97,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 234180
    },
    {
      "epoch": 44.97,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 234192
    },
    {
      "epoch": 44.97,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 234204
    },
    {
      "epoch": 44.97,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 234216
    },
    {
      "epoch": 44.97,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 234228
    },
    {
      "epoch": 44.98,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 234240
    },
    {
      "epoch": 44.98,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 234252
    },
    {
      "epoch": 44.98,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 234264
    },
    {
      "epoch": 44.98,
      "learning_rate": 0.001,
      "loss": 2.5054,
      "step": 234276
    },
    {
      "epoch": 44.99,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 234288
    },
    {
      "epoch": 44.99,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 234300
    },
    {
      "epoch": 44.99,
      "learning_rate": 0.001,
      "loss": 2.5043,
      "step": 234312
    },
    {
      "epoch": 44.99,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 234324
    },
    {
      "epoch": 45.0,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 234336
    },
    {
      "epoch": 45.0,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 234348
    },
    {
      "epoch": 45.0,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 234360
    },
    {
      "epoch": 45.0,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 234372
    },
    {
      "epoch": 45.0,
      "eval_ag_news_accuracy": 0.32984375,
      "eval_ag_news_bleu_score": 5.050963469957959,
      "eval_ag_news_bleu_score_sem": 0.16074897571484204,
      "eval_ag_news_emb_cos_sim": 0.8206876516342163,
      "eval_ag_news_emb_cos_sim_sem": 0.007737273668603383,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4585185050964355,
      "eval_ag_news_n_ngrams_match_1": 14.654,
      "eval_ag_news_n_ngrams_match_2": 3.346,
      "eval_ag_news_n_ngrams_match_3": 0.938,
      "eval_ag_news_num_pred_words": 47.044,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.769874725259882,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3629810314376243,
      "eval_ag_news_runtime": 11.6887,
      "eval_ag_news_samples_per_second": 42.776,
      "eval_ag_news_steps_per_second": 0.086,
      "eval_ag_news_token_set_f1": 0.36353585810266753,
      "eval_ag_news_token_set_f1_sem": 0.0045619068841447575,
      "eval_ag_news_token_set_precision": 0.34873619528640054,
      "eval_ag_news_token_set_recall": 0.39487507795527493,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 234375
    },
    {
      "epoch": 45.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.11584375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.1859433383747104,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12730083966736477,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6858948469161987,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008169146498393943,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2058634757995605,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.75,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.494,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.676798638145378,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21730135124581162,
      "eval_anthropic_toxic_prompts_runtime": 11.1916,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.676,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3574979096810616,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006470218294475449,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4433849828834597,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32354975873003256,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 234375
    },
    {
      "epoch": 45.0,
      "eval_arxiv_accuracy": 0.35515625,
      "eval_arxiv_bleu_score": 4.482566132989462,
      "eval_arxiv_bleu_score_sem": 0.1315923904064372,
      "eval_arxiv_emb_cos_sim": 0.7830665111541748,
      "eval_arxiv_emb_cos_sim_sem": 0.006992334048073903,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3218233585357666,
      "eval_arxiv_n_ngrams_match_1": 15.624,
      "eval_arxiv_n_ngrams_match_2": 3.098,
      "eval_arxiv_n_ngrams_match_3": 0.698,
      "eval_arxiv_num_pred_words": 40.482,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.710831303072315,
      "eval_arxiv_pred_num_tokens": 62.9921875,
      "eval_arxiv_rouge_score": 0.3748865835741546,
      "eval_arxiv_runtime": 11.6649,
      "eval_arxiv_samples_per_second": 42.864,
      "eval_arxiv_steps_per_second": 0.086,
      "eval_arxiv_token_set_f1": 0.366400106839306,
      "eval_arxiv_token_set_f1_sem": 0.0042245727621743655,
      "eval_arxiv_token_set_precision": 0.3189128701744615,
      "eval_arxiv_token_set_recall": 0.4514187899904173,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 234375
    },
    {
      "epoch": 45.0,
      "eval_python_code_alpaca_accuracy": 0.1626875,
      "eval_python_code_alpaca_bleu_score": 4.646057821441524,
      "eval_python_code_alpaca_bleu_score_sem": 0.14654806645109208,
      "eval_python_code_alpaca_emb_cos_sim": 0.7748706936836243,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007797053704860023,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8313002586364746,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.036,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.028,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.974,
      "eval_python_code_alpaca_num_pred_words": 44.142,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.967508637150246,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3426050111183082,
      "eval_python_code_alpaca_runtime": 27.0116,
      "eval_python_code_alpaca_samples_per_second": 18.511,
      "eval_python_code_alpaca_steps_per_second": 0.037,
      "eval_python_code_alpaca_token_set_f1": 0.48563724798043634,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00547676951873612,
      "eval_python_code_alpaca_token_set_precision": 0.5504174307459256,
      "eval_python_code_alpaca_token_set_recall": 0.4555004170217813,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 234375
    },
    {
      "epoch": 45.0,
      "eval_wikibio_accuracy": 0.33025,
      "eval_wikibio_bleu_score": 6.323590258839978,
      "eval_wikibio_bleu_score_sem": 0.2148147875379222,
      "eval_wikibio_emb_cos_sim": 0.759397029876709,
      "eval_wikibio_emb_cos_sim_sem": 0.008260989749655244,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6340675354003906,
      "eval_wikibio_n_ngrams_match_1": 10.504,
      "eval_wikibio_n_ngrams_match_2": 3.626,
      "eval_wikibio_n_ngrams_match_3": 1.354,
      "eval_wikibio_num_pred_words": 37.208,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.866527233580925,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.37195459576758305,
      "eval_wikibio_runtime": 24.9962,
      "eval_wikibio_samples_per_second": 20.003,
      "eval_wikibio_steps_per_second": 0.04,
      "eval_wikibio_token_set_f1": 0.3275260511089058,
      "eval_wikibio_token_set_f1_sem": 0.005317047030055768,
      "eval_wikibio_token_set_precision": 0.34032125816446246,
      "eval_wikibio_token_set_recall": 0.3283904764364292,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 234375
    },
    {
      "epoch": 45.0,
      "eval_nq_accuracy": 0.53621875,
      "eval_nq_bleu_score": 12.472245564420644,
      "eval_nq_bleu_score_sem": 0.509516496709447,
      "eval_nq_emb_cos_sim": 0.8428362607955933,
      "eval_nq_emb_cos_sim_sem": 0.006838735785576522,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.113086462020874,
      "eval_nq_n_ngrams_match_1": 23.714,
      "eval_nq_n_ngrams_match_2": 8.988,
      "eval_nq_n_ngrams_match_3": 4.242,
      "eval_nq_num_pred_words": 48.972,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.27373849593678,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46227421514202416,
      "eval_nq_runtime": 11.9422,
      "eval_nq_samples_per_second": 41.868,
      "eval_nq_steps_per_second": 0.084,
      "eval_nq_token_set_f1": 0.47320994443134756,
      "eval_nq_token_set_f1_sem": 0.004999096680014144,
      "eval_nq_token_set_precision": 0.431642811161382,
      "eval_nq_token_set_recall": 0.5322724941503796,
      "eval_nq_true_num_tokens": 64.0,
      "step": 234375
    },
    {
      "epoch": 45.0,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 234384
    },
    {
      "epoch": 45.01,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 234396
    },
    {
      "epoch": 45.01,
      "learning_rate": 0.001,
      "loss": 2.4764,
      "step": 234408
    },
    {
      "epoch": 45.01,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 234420
    },
    {
      "epoch": 45.01,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 234432
    },
    {
      "epoch": 45.02,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 234444
    },
    {
      "epoch": 45.02,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 234456
    },
    {
      "epoch": 45.02,
      "learning_rate": 0.001,
      "loss": 2.4723,
      "step": 234468
    },
    {
      "epoch": 45.02,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 234480
    },
    {
      "epoch": 45.03,
      "learning_rate": 0.001,
      "loss": 2.476,
      "step": 234492
    },
    {
      "epoch": 45.03,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 234504
    },
    {
      "epoch": 45.03,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 234516
    },
    {
      "epoch": 45.03,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 234528
    },
    {
      "epoch": 45.03,
      "learning_rate": 0.001,
      "loss": 2.4707,
      "step": 234540
    },
    {
      "epoch": 45.04,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 234552
    },
    {
      "epoch": 45.04,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 234564
    },
    {
      "epoch": 45.04,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 234576
    },
    {
      "epoch": 45.04,
      "learning_rate": 0.001,
      "loss": 2.472,
      "step": 234588
    },
    {
      "epoch": 45.05,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 234600
    },
    {
      "epoch": 45.05,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 234612
    },
    {
      "epoch": 45.05,
      "learning_rate": 0.001,
      "loss": 2.4759,
      "step": 234624
    },
    {
      "epoch": 45.05,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 234636
    },
    {
      "epoch": 45.06,
      "learning_rate": 0.001,
      "loss": 2.4757,
      "step": 234648
    },
    {
      "epoch": 45.06,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 234660
    },
    {
      "epoch": 45.06,
      "learning_rate": 0.001,
      "loss": 2.4718,
      "step": 234672
    },
    {
      "epoch": 45.06,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 234684
    },
    {
      "epoch": 45.06,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 234696
    },
    {
      "epoch": 45.07,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 234708
    },
    {
      "epoch": 45.07,
      "learning_rate": 0.001,
      "loss": 2.4752,
      "step": 234720
    },
    {
      "epoch": 45.07,
      "learning_rate": 0.001,
      "loss": 2.4753,
      "step": 234732
    },
    {
      "epoch": 45.07,
      "learning_rate": 0.001,
      "loss": 2.4695,
      "step": 234744
    },
    {
      "epoch": 45.08,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 234756
    },
    {
      "epoch": 45.08,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 234768
    },
    {
      "epoch": 45.08,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 234780
    },
    {
      "epoch": 45.08,
      "learning_rate": 0.001,
      "loss": 2.4728,
      "step": 234792
    },
    {
      "epoch": 45.09,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 234804
    },
    {
      "epoch": 45.09,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 234816
    },
    {
      "epoch": 45.09,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 234828
    },
    {
      "epoch": 45.09,
      "learning_rate": 0.001,
      "loss": 2.4741,
      "step": 234840
    },
    {
      "epoch": 45.09,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 234852
    },
    {
      "epoch": 45.1,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 234864
    },
    {
      "epoch": 45.1,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 234876
    },
    {
      "epoch": 45.1,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 234888
    },
    {
      "epoch": 45.1,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 234900
    },
    {
      "epoch": 45.11,
      "learning_rate": 0.001,
      "loss": 2.4758,
      "step": 234912
    },
    {
      "epoch": 45.11,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 234924
    },
    {
      "epoch": 45.11,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 234936
    },
    {
      "epoch": 45.11,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 234948
    },
    {
      "epoch": 45.12,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 234960
    },
    {
      "epoch": 45.12,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 234972
    },
    {
      "epoch": 45.12,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 234984
    },
    {
      "epoch": 45.12,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 234996
    },
    {
      "epoch": 45.12,
      "eval_ag_news_accuracy": 0.330125,
      "eval_ag_news_bleu_score": 5.080505909630914,
      "eval_ag_news_bleu_score_sem": 0.15953387709149533,
      "eval_ag_news_emb_cos_sim": 0.8225025534629822,
      "eval_ag_news_emb_cos_sim_sem": 0.006679532650771699,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.459632635116577,
      "eval_ag_news_n_ngrams_match_1": 14.422,
      "eval_ag_news_n_ngrams_match_2": 3.298,
      "eval_ag_news_n_ngrams_match_3": 0.964,
      "eval_ag_news_num_pred_words": 46.58,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.805290221497806,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35965960931803664,
      "eval_ag_news_runtime": 13.3014,
      "eval_ag_news_samples_per_second": 37.59,
      "eval_ag_news_steps_per_second": 0.075,
      "eval_ag_news_token_set_f1": 0.36084064419133377,
      "eval_ag_news_token_set_f1_sem": 0.0045625628717824605,
      "eval_ag_news_token_set_precision": 0.3450969952602633,
      "eval_ag_news_token_set_recall": 0.3936605413583107,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 235000
    },
    {
      "epoch": 45.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.11809375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.249449969364471,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12182234326896128,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6863254308700562,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009197239183931736,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1501572132110596,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.042,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.702,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.339733606991103,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2175672514017648,
      "eval_anthropic_toxic_prompts_runtime": 11.213,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.591,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36769985522783755,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006739889351686934,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44911581742473733,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3395485151405642,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 235000
    },
    {
      "epoch": 45.12,
      "eval_arxiv_accuracy": 0.35675,
      "eval_arxiv_bleu_score": 4.498174501227942,
      "eval_arxiv_bleu_score_sem": 0.13092403510269388,
      "eval_arxiv_emb_cos_sim": 0.7803041934967041,
      "eval_arxiv_emb_cos_sim_sem": 0.007107721582584603,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.320892095565796,
      "eval_arxiv_n_ngrams_match_1": 15.354,
      "eval_arxiv_n_ngrams_match_2": 3.082,
      "eval_arxiv_n_ngrams_match_3": 0.718,
      "eval_arxiv_num_pred_words": 40.28,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.68503724440266,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37205666210677635,
      "eval_arxiv_runtime": 15.5522,
      "eval_arxiv_samples_per_second": 32.15,
      "eval_arxiv_steps_per_second": 0.064,
      "eval_arxiv_token_set_f1": 0.3642742211446164,
      "eval_arxiv_token_set_f1_sem": 0.0042574172954471445,
      "eval_arxiv_token_set_precision": 0.31586787043883907,
      "eval_arxiv_token_set_recall": 0.45116233598688793,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 235000
    },
    {
      "epoch": 45.12,
      "eval_python_code_alpaca_accuracy": 0.1645,
      "eval_python_code_alpaca_bleu_score": 4.923737753922552,
      "eval_python_code_alpaca_bleu_score_sem": 0.1553692776982092,
      "eval_python_code_alpaca_emb_cos_sim": 0.7769338488578796,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007851296040349036,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8087265491485596,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.12,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.078,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.06,
      "eval_python_code_alpaca_num_pred_words": 43.334,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.588779766535286,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.346360684279525,
      "eval_python_code_alpaca_runtime": 11.3833,
      "eval_python_code_alpaca_samples_per_second": 43.924,
      "eval_python_code_alpaca_steps_per_second": 0.088,
      "eval_python_code_alpaca_token_set_f1": 0.49147143962269535,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005240694562867404,
      "eval_python_code_alpaca_token_set_precision": 0.5548385292059235,
      "eval_python_code_alpaca_token_set_recall": 0.4614492581547742,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 235000
    },
    {
      "epoch": 45.12,
      "eval_wikibio_accuracy": 0.3329375,
      "eval_wikibio_bleu_score": 6.1282826144213765,
      "eval_wikibio_bleu_score_sem": 0.2135608913666631,
      "eval_wikibio_emb_cos_sim": 0.7597454786300659,
      "eval_wikibio_emb_cos_sim_sem": 0.008223369736092223,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6424877643585205,
      "eval_wikibio_n_ngrams_match_1": 10.064,
      "eval_wikibio_n_ngrams_match_2": 3.428,
      "eval_wikibio_n_ngrams_match_3": 1.272,
      "eval_wikibio_num_pred_words": 35.324,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.186718211609985,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36334704713539007,
      "eval_wikibio_runtime": 11.3461,
      "eval_wikibio_samples_per_second": 44.068,
      "eval_wikibio_steps_per_second": 0.088,
      "eval_wikibio_token_set_f1": 0.32353777314613724,
      "eval_wikibio_token_set_f1_sem": 0.005704797195882945,
      "eval_wikibio_token_set_precision": 0.32894446201880306,
      "eval_wikibio_token_set_recall": 0.3358082802925566,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 235000
    },
    {
      "epoch": 45.12,
      "eval_nq_accuracy": 0.537,
      "eval_nq_bleu_score": 12.37910922158707,
      "eval_nq_bleu_score_sem": 0.4889500774094668,
      "eval_nq_emb_cos_sim": 0.841946005821228,
      "eval_nq_emb_cos_sim_sem": 0.006509863268101507,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.110452651977539,
      "eval_nq_n_ngrams_match_1": 23.926,
      "eval_nq_n_ngrams_match_2": 8.974,
      "eval_nq_n_ngrams_match_3": 4.146,
      "eval_nq_num_pred_words": 49.174,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.251975712489681,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4646920390789369,
      "eval_nq_runtime": 11.9196,
      "eval_nq_samples_per_second": 41.948,
      "eval_nq_steps_per_second": 0.084,
      "eval_nq_token_set_f1": 0.47868285087664814,
      "eval_nq_token_set_f1_sem": 0.0046898240743526816,
      "eval_nq_token_set_precision": 0.43710833309811864,
      "eval_nq_token_set_recall": 0.5358058398409047,
      "eval_nq_true_num_tokens": 64.0,
      "step": 235000
    },
    {
      "epoch": 45.12,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 235008
    },
    {
      "epoch": 45.13,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 235020
    },
    {
      "epoch": 45.13,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 235032
    },
    {
      "epoch": 45.13,
      "learning_rate": 0.001,
      "loss": 2.4746,
      "step": 235044
    },
    {
      "epoch": 45.13,
      "learning_rate": 0.001,
      "loss": 2.4748,
      "step": 235056
    },
    {
      "epoch": 45.14,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 235068
    },
    {
      "epoch": 45.14,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 235080
    },
    {
      "epoch": 45.14,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 235092
    },
    {
      "epoch": 45.14,
      "learning_rate": 0.001,
      "loss": 2.4754,
      "step": 235104
    },
    {
      "epoch": 45.15,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 235116
    },
    {
      "epoch": 45.15,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 235128
    },
    {
      "epoch": 45.15,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 235140
    },
    {
      "epoch": 45.15,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 235152
    },
    {
      "epoch": 45.15,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 235164
    },
    {
      "epoch": 45.16,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 235176
    },
    {
      "epoch": 45.16,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 235188
    },
    {
      "epoch": 45.16,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 235200
    },
    {
      "epoch": 45.16,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 235212
    },
    {
      "epoch": 45.17,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 235224
    },
    {
      "epoch": 45.17,
      "learning_rate": 0.001,
      "loss": 2.4977,
      "step": 235236
    },
    {
      "epoch": 45.17,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 235248
    },
    {
      "epoch": 45.17,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 235260
    },
    {
      "epoch": 45.18,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 235272
    },
    {
      "epoch": 45.18,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 235284
    },
    {
      "epoch": 45.18,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 235296
    },
    {
      "epoch": 45.18,
      "learning_rate": 0.001,
      "loss": 2.474,
      "step": 235308
    },
    {
      "epoch": 45.18,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 235320
    },
    {
      "epoch": 45.19,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 235332
    },
    {
      "epoch": 45.19,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 235344
    },
    {
      "epoch": 45.19,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 235356
    },
    {
      "epoch": 45.19,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 235368
    },
    {
      "epoch": 45.2,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 235380
    },
    {
      "epoch": 45.2,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 235392
    },
    {
      "epoch": 45.2,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 235404
    },
    {
      "epoch": 45.2,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 235416
    },
    {
      "epoch": 45.21,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 235428
    },
    {
      "epoch": 45.21,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 235440
    },
    {
      "epoch": 45.21,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 235452
    },
    {
      "epoch": 45.21,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 235464
    },
    {
      "epoch": 45.21,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 235476
    },
    {
      "epoch": 45.22,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 235488
    },
    {
      "epoch": 45.22,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 235500
    },
    {
      "epoch": 45.22,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 235512
    },
    {
      "epoch": 45.22,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 235524
    },
    {
      "epoch": 45.23,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 235536
    },
    {
      "epoch": 45.23,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 235548
    },
    {
      "epoch": 45.23,
      "learning_rate": 0.001,
      "loss": 2.4716,
      "step": 235560
    },
    {
      "epoch": 45.23,
      "learning_rate": 0.001,
      "loss": 2.499,
      "step": 235572
    },
    {
      "epoch": 45.24,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 235584
    },
    {
      "epoch": 45.24,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 235596
    },
    {
      "epoch": 45.24,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 235608
    },
    {
      "epoch": 45.24,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 235620
    },
    {
      "epoch": 45.24,
      "eval_ag_news_accuracy": 0.3299375,
      "eval_ag_news_bleu_score": 4.866871206895734,
      "eval_ag_news_bleu_score_sem": 0.15012628398864336,
      "eval_ag_news_emb_cos_sim": 0.8280687928199768,
      "eval_ag_news_emb_cos_sim_sem": 0.006476843846471254,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4598886966705322,
      "eval_ag_news_n_ngrams_match_1": 14.544,
      "eval_ag_news_n_ngrams_match_2": 3.278,
      "eval_ag_news_n_ngrams_match_3": 0.896,
      "eval_ag_news_num_pred_words": 47.016,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.81343537632191,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3621607706413642,
      "eval_ag_news_runtime": 15.6836,
      "eval_ag_news_samples_per_second": 31.88,
      "eval_ag_news_steps_per_second": 0.064,
      "eval_ag_news_token_set_f1": 0.3610874182248644,
      "eval_ag_news_token_set_f1_sem": 0.004483335555714829,
      "eval_ag_news_token_set_precision": 0.34740491236900384,
      "eval_ag_news_token_set_recall": 0.39075741696767335,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 235625
    },
    {
      "epoch": 45.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.1161875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.215478882411253,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12096969487830865,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.676385760307312,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009648151193992583,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.203469753265381,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.328,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.256,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.61779987068487,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21867117862093788,
      "eval_anthropic_toxic_prompts_runtime": 25.1762,
      "eval_anthropic_toxic_prompts_samples_per_second": 19.86,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.04,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36282506843024004,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006601876380974217,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4509802352407291,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3330317478483652,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 235625
    },
    {
      "epoch": 45.24,
      "eval_arxiv_accuracy": 0.35421875,
      "eval_arxiv_bleu_score": 4.614085119744084,
      "eval_arxiv_bleu_score_sem": 0.1369909901083598,
      "eval_arxiv_emb_cos_sim": 0.7892141938209534,
      "eval_arxiv_emb_cos_sim_sem": 0.00709441964206786,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3293392658233643,
      "eval_arxiv_n_ngrams_match_1": 15.962,
      "eval_arxiv_n_ngrams_match_2": 3.146,
      "eval_arxiv_n_ngrams_match_3": 0.728,
      "eval_arxiv_num_pred_words": 41.562,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.919887983207765,
      "eval_arxiv_pred_num_tokens": 62.9921875,
      "eval_arxiv_rouge_score": 0.37875755355305507,
      "eval_arxiv_runtime": 30.732,
      "eval_arxiv_samples_per_second": 16.27,
      "eval_arxiv_steps_per_second": 0.033,
      "eval_arxiv_token_set_f1": 0.3720818651494092,
      "eval_arxiv_token_set_f1_sem": 0.004215388835882899,
      "eval_arxiv_token_set_precision": 0.32606911438785263,
      "eval_arxiv_token_set_recall": 0.44864556790489124,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 235625
    },
    {
      "epoch": 45.24,
      "eval_python_code_alpaca_accuracy": 0.163,
      "eval_python_code_alpaca_bleu_score": 4.8233972354286525,
      "eval_python_code_alpaca_bleu_score_sem": 0.15316598434408954,
      "eval_python_code_alpaca_emb_cos_sim": 0.7797827124595642,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007125558919127823,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8484835624694824,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.106,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.172,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.114,
      "eval_python_code_alpaca_num_pred_words": 44.832,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.261585866664173,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34161305540878206,
      "eval_python_code_alpaca_runtime": 11.5138,
      "eval_python_code_alpaca_samples_per_second": 43.426,
      "eval_python_code_alpaca_steps_per_second": 0.087,
      "eval_python_code_alpaca_token_set_f1": 0.4881126232561804,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005361239903944323,
      "eval_python_code_alpaca_token_set_precision": 0.555132361923531,
      "eval_python_code_alpaca_token_set_recall": 0.4579293609304612,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 235625
    },
    {
      "epoch": 45.24,
      "eval_wikibio_accuracy": 0.33096875,
      "eval_wikibio_bleu_score": 6.508365084613505,
      "eval_wikibio_bleu_score_sem": 0.24532999105066583,
      "eval_wikibio_emb_cos_sim": 0.7584213614463806,
      "eval_wikibio_emb_cos_sim_sem": 0.007648820604469176,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6209194660186768,
      "eval_wikibio_n_ngrams_match_1": 10.314,
      "eval_wikibio_n_ngrams_match_2": 3.584,
      "eval_wikibio_n_ngrams_match_3": 1.384,
      "eval_wikibio_num_pred_words": 36.462,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.371914234645935,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36373669882380344,
      "eval_wikibio_runtime": 11.4701,
      "eval_wikibio_samples_per_second": 43.592,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.32613851056154686,
      "eval_wikibio_token_set_f1_sem": 0.005264163300303088,
      "eval_wikibio_token_set_precision": 0.33286115807209193,
      "eval_wikibio_token_set_recall": 0.33652744995122025,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 235625
    },
    {
      "epoch": 45.24,
      "eval_nq_accuracy": 0.538,
      "eval_nq_bleu_score": 12.257466558820232,
      "eval_nq_bleu_score_sem": 0.487990812104523,
      "eval_nq_emb_cos_sim": 0.8438543081283569,
      "eval_nq_emb_cos_sim_sem": 0.006167822494644871,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.108394145965576,
      "eval_nq_n_ngrams_match_1": 23.59,
      "eval_nq_n_ngrams_match_2": 8.822,
      "eval_nq_n_ngrams_match_3": 4.148,
      "eval_nq_num_pred_words": 49.062,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.235006442539254,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4587694171930715,
      "eval_nq_runtime": 11.6939,
      "eval_nq_samples_per_second": 42.757,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.47206982833067984,
      "eval_nq_token_set_f1_sem": 0.004924633875529909,
      "eval_nq_token_set_precision": 0.43000664181308274,
      "eval_nq_token_set_recall": 0.5308060014530428,
      "eval_nq_true_num_tokens": 64.0,
      "step": 235625
    },
    {
      "epoch": 45.24,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 235632
    },
    {
      "epoch": 45.25,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 235644
    },
    {
      "epoch": 45.25,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 235656
    },
    {
      "epoch": 45.25,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 235668
    },
    {
      "epoch": 45.25,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 235680
    },
    {
      "epoch": 45.26,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 235692
    },
    {
      "epoch": 45.26,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 235704
    },
    {
      "epoch": 45.26,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 235716
    },
    {
      "epoch": 45.26,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 235728
    },
    {
      "epoch": 45.26,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 235740
    },
    {
      "epoch": 45.27,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 235752
    },
    {
      "epoch": 45.27,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 235764
    },
    {
      "epoch": 45.27,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 235776
    },
    {
      "epoch": 45.27,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 235788
    },
    {
      "epoch": 45.28,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 235800
    },
    {
      "epoch": 45.28,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 235812
    },
    {
      "epoch": 45.28,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 235824
    },
    {
      "epoch": 45.28,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 235836
    },
    {
      "epoch": 45.29,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 235848
    },
    {
      "epoch": 45.29,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 235860
    },
    {
      "epoch": 45.29,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 235872
    },
    {
      "epoch": 45.29,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 235884
    },
    {
      "epoch": 45.29,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 235896
    },
    {
      "epoch": 45.3,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 235908
    },
    {
      "epoch": 45.3,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 235920
    },
    {
      "epoch": 45.3,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 235932
    },
    {
      "epoch": 45.3,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 235944
    },
    {
      "epoch": 45.31,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 235956
    },
    {
      "epoch": 45.31,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 235968
    },
    {
      "epoch": 45.31,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 235980
    },
    {
      "epoch": 45.31,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 235992
    },
    {
      "epoch": 45.32,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 236004
    },
    {
      "epoch": 45.32,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 236016
    },
    {
      "epoch": 45.32,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 236028
    },
    {
      "epoch": 45.32,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 236040
    },
    {
      "epoch": 45.32,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 236052
    },
    {
      "epoch": 45.33,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 236064
    },
    {
      "epoch": 45.33,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 236076
    },
    {
      "epoch": 45.33,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 236088
    },
    {
      "epoch": 45.33,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 236100
    },
    {
      "epoch": 45.34,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 236112
    },
    {
      "epoch": 45.34,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 236124
    },
    {
      "epoch": 45.34,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 236136
    },
    {
      "epoch": 45.34,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 236148
    },
    {
      "epoch": 45.35,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 236160
    },
    {
      "epoch": 45.35,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 236172
    },
    {
      "epoch": 45.35,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 236184
    },
    {
      "epoch": 45.35,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 236196
    },
    {
      "epoch": 45.35,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 236208
    },
    {
      "epoch": 45.36,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 236220
    },
    {
      "epoch": 45.36,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 236232
    },
    {
      "epoch": 45.36,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 236244
    },
    {
      "epoch": 45.36,
      "eval_ag_news_accuracy": 0.32978125,
      "eval_ag_news_bleu_score": 5.068924985048963,
      "eval_ag_news_bleu_score_sem": 0.15804541170942332,
      "eval_ag_news_emb_cos_sim": 0.8181669116020203,
      "eval_ag_news_emb_cos_sim_sem": 0.007024785804338275,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4600605964660645,
      "eval_ag_news_n_ngrams_match_1": 14.576,
      "eval_ag_news_n_ngrams_match_2": 3.296,
      "eval_ag_news_n_ngrams_match_3": 0.94,
      "eval_ag_news_num_pred_words": 46.678,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.8189045694214,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3638274695582324,
      "eval_ag_news_runtime": 31.213,
      "eval_ag_news_samples_per_second": 16.019,
      "eval_ag_news_steps_per_second": 0.032,
      "eval_ag_news_token_set_f1": 0.36142355006688703,
      "eval_ag_news_token_set_f1_sem": 0.004617036902792799,
      "eval_ag_news_token_set_precision": 0.34859251880572,
      "eval_ag_news_token_set_recall": 0.38927184647075064,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 236250
    },
    {
      "epoch": 45.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.1159375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.0716327682616535,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11298120935300243,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6829196214675903,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009644477832727056,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1737353801727295,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.306,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.892,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.696,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.586,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.896580662145972,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2159726355983121,
      "eval_anthropic_toxic_prompts_runtime": 18.0997,
      "eval_anthropic_toxic_prompts_samples_per_second": 27.625,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.055,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36047059793605396,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006363741253291872,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44671505151350177,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3306825404772978,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 236250
    },
    {
      "epoch": 45.36,
      "eval_arxiv_accuracy": 0.35440625,
      "eval_arxiv_bleu_score": 4.3774102065342735,
      "eval_arxiv_bleu_score_sem": 0.12293831713317518,
      "eval_arxiv_emb_cos_sim": 0.7836014032363892,
      "eval_arxiv_emb_cos_sim_sem": 0.007510873310748512,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.312004327774048,
      "eval_arxiv_n_ngrams_match_1": 15.278,
      "eval_arxiv_n_ngrams_match_2": 3.056,
      "eval_arxiv_n_ngrams_match_3": 0.686,
      "eval_arxiv_num_pred_words": 39.608,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.44006928379248,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3714682813779792,
      "eval_arxiv_runtime": 29.7371,
      "eval_arxiv_samples_per_second": 16.814,
      "eval_arxiv_steps_per_second": 0.034,
      "eval_arxiv_token_set_f1": 0.36171949803136166,
      "eval_arxiv_token_set_f1_sem": 0.004384357297562797,
      "eval_arxiv_token_set_precision": 0.3119897852176188,
      "eval_arxiv_token_set_recall": 0.4516782800865952,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 236250
    },
    {
      "epoch": 45.36,
      "eval_python_code_alpaca_accuracy": 0.16546875,
      "eval_python_code_alpaca_bleu_score": 4.833091059079826,
      "eval_python_code_alpaca_bleu_score_sem": 0.1498344748185783,
      "eval_python_code_alpaca_emb_cos_sim": 0.7737959027290344,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007713317795036496,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8216986656188965,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.182,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.098,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.072,
      "eval_python_code_alpaca_num_pred_words": 43.948,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.805373149787687,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3482552589865845,
      "eval_python_code_alpaca_runtime": 16.6762,
      "eval_python_code_alpaca_samples_per_second": 29.983,
      "eval_python_code_alpaca_steps_per_second": 0.06,
      "eval_python_code_alpaca_token_set_f1": 0.4895694294231196,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005327856918272031,
      "eval_python_code_alpaca_token_set_precision": 0.5576943403315965,
      "eval_python_code_alpaca_token_set_recall": 0.45703971465575227,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 236250
    },
    {
      "epoch": 45.36,
      "eval_wikibio_accuracy": 0.33278125,
      "eval_wikibio_bleu_score": 6.3777153332111896,
      "eval_wikibio_bleu_score_sem": 0.22310046634993044,
      "eval_wikibio_emb_cos_sim": 0.7548523545265198,
      "eval_wikibio_emb_cos_sim_sem": 0.008988898750663783,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6209940910339355,
      "eval_wikibio_n_ngrams_match_1": 10.256,
      "eval_wikibio_n_ngrams_match_2": 3.548,
      "eval_wikibio_n_ngrams_match_3": 1.36,
      "eval_wikibio_num_pred_words": 35.766,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.37470321837863,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3653736570756626,
      "eval_wikibio_runtime": 28.4822,
      "eval_wikibio_samples_per_second": 17.555,
      "eval_wikibio_steps_per_second": 0.035,
      "eval_wikibio_token_set_f1": 0.32625843237840907,
      "eval_wikibio_token_set_f1_sem": 0.005399370667335609,
      "eval_wikibio_token_set_precision": 0.3352714334272106,
      "eval_wikibio_token_set_recall": 0.33370784478189963,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 236250
    },
    {
      "epoch": 45.36,
      "eval_nq_accuracy": 0.53703125,
      "eval_nq_bleu_score": 12.334143957771436,
      "eval_nq_bleu_score_sem": 0.49047282629757294,
      "eval_nq_emb_cos_sim": 0.8425197005271912,
      "eval_nq_emb_cos_sim_sem": 0.006781060457567395,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1136088371276855,
      "eval_nq_n_ngrams_match_1": 23.728,
      "eval_nq_n_ngrams_match_2": 8.906,
      "eval_nq_n_ngrams_match_3": 4.138,
      "eval_nq_num_pred_words": 48.936,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.27806162001522,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4614856671972397,
      "eval_nq_runtime": 32.675,
      "eval_nq_samples_per_second": 15.302,
      "eval_nq_steps_per_second": 0.031,
      "eval_nq_token_set_f1": 0.4737944349817195,
      "eval_nq_token_set_f1_sem": 0.004902637958479992,
      "eval_nq_token_set_precision": 0.4331997859628718,
      "eval_nq_token_set_recall": 0.5303665878086978,
      "eval_nq_true_num_tokens": 64.0,
      "step": 236250
    },
    {
      "epoch": 45.36,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 236256
    },
    {
      "epoch": 45.37,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 236268
    },
    {
      "epoch": 45.37,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 236280
    },
    {
      "epoch": 45.37,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 236292
    },
    {
      "epoch": 45.37,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 236304
    },
    {
      "epoch": 45.38,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 236316
    },
    {
      "epoch": 45.38,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 236328
    },
    {
      "epoch": 45.38,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 236340
    },
    {
      "epoch": 45.38,
      "learning_rate": 0.001,
      "loss": 2.4777,
      "step": 236352
    },
    {
      "epoch": 45.38,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 236364
    },
    {
      "epoch": 45.39,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 236376
    },
    {
      "epoch": 45.39,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 236388
    },
    {
      "epoch": 45.39,
      "learning_rate": 0.001,
      "loss": 2.4798,
      "step": 236400
    },
    {
      "epoch": 45.39,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 236412
    },
    {
      "epoch": 45.4,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 236424
    },
    {
      "epoch": 45.4,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 236436
    },
    {
      "epoch": 45.4,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 236448
    },
    {
      "epoch": 45.4,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 236460
    },
    {
      "epoch": 45.41,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 236472
    },
    {
      "epoch": 45.41,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 236484
    },
    {
      "epoch": 45.41,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 236496
    },
    {
      "epoch": 45.41,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 236508
    },
    {
      "epoch": 45.41,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 236520
    },
    {
      "epoch": 45.42,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 236532
    },
    {
      "epoch": 45.42,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 236544
    },
    {
      "epoch": 45.42,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 236556
    },
    {
      "epoch": 45.42,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 236568
    },
    {
      "epoch": 45.43,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 236580
    },
    {
      "epoch": 45.43,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 236592
    },
    {
      "epoch": 45.43,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 236604
    },
    {
      "epoch": 45.43,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 236616
    },
    {
      "epoch": 45.44,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 236628
    },
    {
      "epoch": 45.44,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 236640
    },
    {
      "epoch": 45.44,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 236652
    },
    {
      "epoch": 45.44,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 236664
    },
    {
      "epoch": 45.44,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 236676
    },
    {
      "epoch": 45.45,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 236688
    },
    {
      "epoch": 45.45,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 236700
    },
    {
      "epoch": 45.45,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 236712
    },
    {
      "epoch": 45.45,
      "learning_rate": 0.001,
      "loss": 2.4758,
      "step": 236724
    },
    {
      "epoch": 45.46,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 236736
    },
    {
      "epoch": 45.46,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 236748
    },
    {
      "epoch": 45.46,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 236760
    },
    {
      "epoch": 45.46,
      "learning_rate": 0.001,
      "loss": 2.4748,
      "step": 236772
    },
    {
      "epoch": 45.47,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 236784
    },
    {
      "epoch": 45.47,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 236796
    },
    {
      "epoch": 45.47,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 236808
    },
    {
      "epoch": 45.47,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 236820
    },
    {
      "epoch": 45.47,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 236832
    },
    {
      "epoch": 45.48,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 236844
    },
    {
      "epoch": 45.48,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 236856
    },
    {
      "epoch": 45.48,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 236868
    },
    {
      "epoch": 45.48,
      "eval_ag_news_accuracy": 0.328125,
      "eval_ag_news_bleu_score": 5.1097284651362935,
      "eval_ag_news_bleu_score_sem": 0.16805408067037464,
      "eval_ag_news_emb_cos_sim": 0.8161610960960388,
      "eval_ag_news_emb_cos_sim_sem": 0.0076715825368430905,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.468269109725952,
      "eval_ag_news_n_ngrams_match_1": 14.522,
      "eval_ag_news_n_ngrams_match_2": 3.378,
      "eval_ag_news_n_ngrams_match_3": 0.988,
      "eval_ag_news_num_pred_words": 47.396,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.08116538258978,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3563040187253297,
      "eval_ag_news_runtime": 31.0775,
      "eval_ag_news_samples_per_second": 16.089,
      "eval_ag_news_steps_per_second": 0.032,
      "eval_ag_news_token_set_f1": 0.3605941653128626,
      "eval_ag_news_token_set_f1_sem": 0.00455381093278579,
      "eval_ag_news_token_set_precision": 0.34833227459101823,
      "eval_ag_news_token_set_recall": 0.3896397253423483,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 236875
    },
    {
      "epoch": 45.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.11646875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2666462887798327,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.16144987480701473,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.682634711265564,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009303319508911117,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.199036121368408,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.432,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.994,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.832,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.50889520793385,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21733901350708934,
      "eval_anthropic_toxic_prompts_runtime": 30.7501,
      "eval_anthropic_toxic_prompts_samples_per_second": 16.26,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.033,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3660504748382527,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006690572454197111,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45446579938371107,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3332701357190309,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 236875
    },
    {
      "epoch": 45.48,
      "eval_arxiv_accuracy": 0.35603125,
      "eval_arxiv_bleu_score": 4.561718540765802,
      "eval_arxiv_bleu_score_sem": 0.12746920151424626,
      "eval_arxiv_emb_cos_sim": 0.7865849733352661,
      "eval_arxiv_emb_cos_sim_sem": 0.006162806361127566,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3262879848480225,
      "eval_arxiv_n_ngrams_match_1": 15.576,
      "eval_arxiv_n_ngrams_match_2": 3.254,
      "eval_arxiv_n_ngrams_match_3": 0.726,
      "eval_arxiv_num_pred_words": 41.548,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.834826399562722,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.369173905150433,
      "eval_arxiv_runtime": 34.3059,
      "eval_arxiv_samples_per_second": 14.575,
      "eval_arxiv_steps_per_second": 0.029,
      "eval_arxiv_token_set_f1": 0.3618765912173447,
      "eval_arxiv_token_set_f1_sem": 0.004027786625053291,
      "eval_arxiv_token_set_precision": 0.3170028173870082,
      "eval_arxiv_token_set_recall": 0.4401272965914524,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 236875
    },
    {
      "epoch": 45.48,
      "eval_python_code_alpaca_accuracy": 0.16403125,
      "eval_python_code_alpaca_bleu_score": 4.625687913046255,
      "eval_python_code_alpaca_bleu_score_sem": 0.14890315462892045,
      "eval_python_code_alpaca_emb_cos_sim": 0.7672336101531982,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00910012823515108,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8469605445861816,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.018,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.016,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.036,
      "eval_python_code_alpaca_num_pred_words": 44.498,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.2353161723801,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3376399817632487,
      "eval_python_code_alpaca_runtime": 29.577,
      "eval_python_code_alpaca_samples_per_second": 16.905,
      "eval_python_code_alpaca_steps_per_second": 0.034,
      "eval_python_code_alpaca_token_set_f1": 0.48058938152673236,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005517767124463283,
      "eval_python_code_alpaca_token_set_precision": 0.5518982416220253,
      "eval_python_code_alpaca_token_set_recall": 0.450023858065497,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 236875
    },
    {
      "epoch": 45.48,
      "eval_wikibio_accuracy": 0.3306875,
      "eval_wikibio_bleu_score": 6.324013838507522,
      "eval_wikibio_bleu_score_sem": 0.22019240291969452,
      "eval_wikibio_emb_cos_sim": 0.7524803876876831,
      "eval_wikibio_emb_cos_sim_sem": 0.008564025590448573,
      "eval_wikibio_emb_top1_equal": 0.25,
      "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.645080804824829,
      "eval_wikibio_n_ngrams_match_1": 10.508,
      "eval_wikibio_n_ngrams_match_2": 3.61,
      "eval_wikibio_n_ngrams_match_3": 1.368,
      "eval_wikibio_num_pred_words": 37.264,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.28586640929773,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3723037163215847,
      "eval_wikibio_runtime": 34.7075,
      "eval_wikibio_samples_per_second": 14.406,
      "eval_wikibio_steps_per_second": 0.029,
      "eval_wikibio_token_set_f1": 0.3284325087793525,
      "eval_wikibio_token_set_f1_sem": 0.0051377816417530935,
      "eval_wikibio_token_set_precision": 0.34115068068346904,
      "eval_wikibio_token_set_recall": 0.3300704913409777,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 236875
    },
    {
      "epoch": 45.48,
      "eval_nq_accuracy": 0.535875,
      "eval_nq_bleu_score": 12.032815460481608,
      "eval_nq_bleu_score_sem": 0.4761279300902094,
      "eval_nq_emb_cos_sim": 0.8346362113952637,
      "eval_nq_emb_cos_sim_sem": 0.006997409450596926,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1153182983398438,
      "eval_nq_n_ngrams_match_1": 23.618,
      "eval_nq_n_ngrams_match_2": 8.798,
      "eval_nq_n_ngrams_match_3": 4.058,
      "eval_nq_num_pred_words": 49.488,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.292224747475986,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4562308616159445,
      "eval_nq_runtime": 31.475,
      "eval_nq_samples_per_second": 15.886,
      "eval_nq_steps_per_second": 0.032,
      "eval_nq_token_set_f1": 0.47124612309718056,
      "eval_nq_token_set_f1_sem": 0.00486947120200252,
      "eval_nq_token_set_precision": 0.4312773553495812,
      "eval_nq_token_set_recall": 0.527375433761542,
      "eval_nq_true_num_tokens": 64.0,
      "step": 236875
    },
    {
      "epoch": 45.48,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 236880
    },
    {
      "epoch": 45.49,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 236892
    },
    {
      "epoch": 45.49,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 236904
    },
    {
      "epoch": 45.49,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 236916
    },
    {
      "epoch": 45.49,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 236928
    },
    {
      "epoch": 45.5,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 236940
    },
    {
      "epoch": 45.5,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 236952
    },
    {
      "epoch": 45.5,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 236964
    },
    {
      "epoch": 45.5,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 236976
    },
    {
      "epoch": 45.5,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 236988
    },
    {
      "epoch": 45.51,
      "learning_rate": 0.001,
      "loss": 2.4983,
      "step": 237000
    },
    {
      "epoch": 45.51,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 237012
    },
    {
      "epoch": 45.51,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 237024
    },
    {
      "epoch": 45.51,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 237036
    },
    {
      "epoch": 45.52,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 237048
    },
    {
      "epoch": 45.52,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 237060
    },
    {
      "epoch": 45.52,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 237072
    },
    {
      "epoch": 45.52,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 237084
    },
    {
      "epoch": 45.53,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 237096
    },
    {
      "epoch": 45.53,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 237108
    },
    {
      "epoch": 45.53,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 237120
    },
    {
      "epoch": 45.53,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 237132
    },
    {
      "epoch": 45.53,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 237144
    },
    {
      "epoch": 45.54,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 237156
    },
    {
      "epoch": 45.54,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 237168
    },
    {
      "epoch": 45.54,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 237180
    },
    {
      "epoch": 45.54,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 237192
    },
    {
      "epoch": 45.55,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 237204
    },
    {
      "epoch": 45.55,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 237216
    },
    {
      "epoch": 45.55,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 237228
    },
    {
      "epoch": 45.55,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 237240
    },
    {
      "epoch": 45.56,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 237252
    },
    {
      "epoch": 45.56,
      "learning_rate": 0.001,
      "loss": 2.474,
      "step": 237264
    },
    {
      "epoch": 45.56,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 237276
    },
    {
      "epoch": 45.56,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 237288
    },
    {
      "epoch": 45.56,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 237300
    },
    {
      "epoch": 45.57,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 237312
    },
    {
      "epoch": 45.57,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 237324
    },
    {
      "epoch": 45.57,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 237336
    },
    {
      "epoch": 45.57,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 237348
    },
    {
      "epoch": 45.58,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 237360
    },
    {
      "epoch": 45.58,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 237372
    },
    {
      "epoch": 45.58,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 237384
    },
    {
      "epoch": 45.58,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 237396
    },
    {
      "epoch": 45.59,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 237408
    },
    {
      "epoch": 45.59,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 237420
    },
    {
      "epoch": 45.59,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 237432
    },
    {
      "epoch": 45.59,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 237444
    },
    {
      "epoch": 45.59,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 237456
    },
    {
      "epoch": 45.6,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 237468
    },
    {
      "epoch": 45.6,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 237480
    },
    {
      "epoch": 45.6,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 237492
    },
    {
      "epoch": 45.6,
      "eval_ag_news_accuracy": 0.32959375,
      "eval_ag_news_bleu_score": 5.024724755706574,
      "eval_ag_news_bleu_score_sem": 0.16372791908545986,
      "eval_ag_news_emb_cos_sim": 0.8220397233963013,
      "eval_ag_news_emb_cos_sim_sem": 0.007606910173436648,
      "eval_ag_news_emb_top1_equal": 0.3203125,
      "eval_ag_news_emb_top1_equal_sem": 0.041403754790620424,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4633166790008545,
      "eval_ag_news_n_ngrams_match_1": 14.574,
      "eval_ag_news_n_ngrams_match_2": 3.316,
      "eval_ag_news_n_ngrams_match_3": 0.938,
      "eval_ag_news_num_pred_words": 47.258,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.922678405268474,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3594223922503628,
      "eval_ag_news_runtime": 32.0224,
      "eval_ag_news_samples_per_second": 15.614,
      "eval_ag_news_steps_per_second": 0.031,
      "eval_ag_news_token_set_f1": 0.36223142711624384,
      "eval_ag_news_token_set_f1_sem": 0.004453876575610987,
      "eval_ag_news_token_set_precision": 0.3497371965963449,
      "eval_ag_news_token_set_recall": 0.3900248051337052,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 237500
    },
    {
      "epoch": 45.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.11575,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3184463357934466,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12615424755439342,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6881489753723145,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008676794751995533,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.189636707305908,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.494,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.064,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.802,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.118,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.279605237471955,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2218234368893499,
      "eval_anthropic_toxic_prompts_runtime": 34.2193,
      "eval_anthropic_toxic_prompts_samples_per_second": 14.612,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.029,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36779669154581185,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066226066988801305,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4557761434098778,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3352528239933192,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 237500
    },
    {
      "epoch": 45.6,
      "eval_arxiv_accuracy": 0.354125,
      "eval_arxiv_bleu_score": 4.635956601267533,
      "eval_arxiv_bleu_score_sem": 0.12833729885936038,
      "eval_arxiv_emb_cos_sim": 0.7911975383758545,
      "eval_arxiv_emb_cos_sim_sem": 0.006375492529603698,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3218994140625,
      "eval_arxiv_n_ngrams_match_1": 16.082,
      "eval_arxiv_n_ngrams_match_2": 3.234,
      "eval_arxiv_n_ngrams_match_3": 0.72,
      "eval_arxiv_num_pred_words": 41.972,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.712938945091185,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.38065290013941777,
      "eval_arxiv_runtime": 29.1833,
      "eval_arxiv_samples_per_second": 17.133,
      "eval_arxiv_steps_per_second": 0.034,
      "eval_arxiv_token_set_f1": 0.37468184081270856,
      "eval_arxiv_token_set_f1_sem": 0.004001955486649725,
      "eval_arxiv_token_set_precision": 0.3292246429652243,
      "eval_arxiv_token_set_recall": 0.4488915799943892,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 237500
    },
    {
      "epoch": 45.6,
      "eval_python_code_alpaca_accuracy": 0.163625,
      "eval_python_code_alpaca_bleu_score": 4.727852193410257,
      "eval_python_code_alpaca_bleu_score_sem": 0.14930282466611577,
      "eval_python_code_alpaca_emb_cos_sim": 0.7710949182510376,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008083269001830456,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8213469982147217,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.098,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.952,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.01,
      "eval_python_code_alpaca_num_pred_words": 43.544,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.79946428687405,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3452332843968612,
      "eval_python_code_alpaca_runtime": 31.4166,
      "eval_python_code_alpaca_samples_per_second": 15.915,
      "eval_python_code_alpaca_steps_per_second": 0.032,
      "eval_python_code_alpaca_token_set_f1": 0.48932212231834404,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005614974685638379,
      "eval_python_code_alpaca_token_set_precision": 0.5537253776682445,
      "eval_python_code_alpaca_token_set_recall": 0.4584430132902336,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 237500
    },
    {
      "epoch": 45.6,
      "eval_wikibio_accuracy": 0.3310625,
      "eval_wikibio_bleu_score": 6.056535468706676,
      "eval_wikibio_bleu_score_sem": 0.21373125210053123,
      "eval_wikibio_emb_cos_sim": 0.7506512999534607,
      "eval_wikibio_emb_cos_sim_sem": 0.00843996765894577,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6376712322235107,
      "eval_wikibio_n_ngrams_match_1": 10.076,
      "eval_wikibio_n_ngrams_match_2": 3.466,
      "eval_wikibio_n_ngrams_match_3": 1.282,
      "eval_wikibio_num_pred_words": 36.29,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.003232892403915,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3614314674627419,
      "eval_wikibio_runtime": 31.0212,
      "eval_wikibio_samples_per_second": 16.118,
      "eval_wikibio_steps_per_second": 0.032,
      "eval_wikibio_token_set_f1": 0.3177332711936968,
      "eval_wikibio_token_set_f1_sem": 0.005371096912022222,
      "eval_wikibio_token_set_precision": 0.3286910213631987,
      "eval_wikibio_token_set_recall": 0.32599899004879107,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 237500
    },
    {
      "epoch": 45.6,
      "eval_nq_accuracy": 0.53796875,
      "eval_nq_bleu_score": 12.326095005539692,
      "eval_nq_bleu_score_sem": 0.49644107675178234,
      "eval_nq_emb_cos_sim": 0.8375586271286011,
      "eval_nq_emb_cos_sim_sem": 0.00674963471364646,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.111186981201172,
      "eval_nq_n_ngrams_match_1": 23.75,
      "eval_nq_n_ngrams_match_2": 8.904,
      "eval_nq_n_ngrams_match_3": 4.176,
      "eval_nq_num_pred_words": 49.452,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.258037604848024,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4590653808449676,
      "eval_nq_runtime": 31.3621,
      "eval_nq_samples_per_second": 15.943,
      "eval_nq_steps_per_second": 0.032,
      "eval_nq_token_set_f1": 0.47191897728494864,
      "eval_nq_token_set_f1_sem": 0.005061002449395294,
      "eval_nq_token_set_precision": 0.43308139515979027,
      "eval_nq_token_set_recall": 0.5260267250353408,
      "eval_nq_true_num_tokens": 64.0,
      "step": 237500
    },
    {
      "epoch": 45.6,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 237504
    },
    {
      "epoch": 45.61,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 237516
    },
    {
      "epoch": 45.61,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 237528
    },
    {
      "epoch": 45.61,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 237540
    },
    {
      "epoch": 45.61,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 237552
    },
    {
      "epoch": 45.62,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 237564
    },
    {
      "epoch": 45.62,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 237576
    },
    {
      "epoch": 45.62,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 237588
    },
    {
      "epoch": 45.62,
      "learning_rate": 0.001,
      "loss": 2.4991,
      "step": 237600
    },
    {
      "epoch": 45.62,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 237612
    },
    {
      "epoch": 45.63,
      "learning_rate": 0.001,
      "loss": 2.5029,
      "step": 237624
    },
    {
      "epoch": 45.63,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 237636
    },
    {
      "epoch": 45.63,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 237648
    },
    {
      "epoch": 45.63,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 237660
    },
    {
      "epoch": 45.64,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 237672
    },
    {
      "epoch": 45.64,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 237684
    },
    {
      "epoch": 45.64,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 237696
    },
    {
      "epoch": 45.64,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 237708
    },
    {
      "epoch": 45.65,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 237720
    },
    {
      "epoch": 45.65,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 237732
    },
    {
      "epoch": 45.65,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 237744
    },
    {
      "epoch": 45.65,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 237756
    },
    {
      "epoch": 45.65,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 237768
    },
    {
      "epoch": 45.66,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 237780
    },
    {
      "epoch": 45.66,
      "learning_rate": 0.001,
      "loss": 2.4762,
      "step": 237792
    },
    {
      "epoch": 45.66,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 237804
    },
    {
      "epoch": 45.66,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 237816
    },
    {
      "epoch": 45.67,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 237828
    },
    {
      "epoch": 45.67,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 237840
    },
    {
      "epoch": 45.67,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 237852
    },
    {
      "epoch": 45.67,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 237864
    },
    {
      "epoch": 45.68,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 237876
    },
    {
      "epoch": 45.68,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 237888
    },
    {
      "epoch": 45.68,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 237900
    },
    {
      "epoch": 45.68,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 237912
    },
    {
      "epoch": 45.68,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 237924
    },
    {
      "epoch": 45.69,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 237936
    },
    {
      "epoch": 45.69,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 237948
    },
    {
      "epoch": 45.69,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 237960
    },
    {
      "epoch": 45.69,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 237972
    },
    {
      "epoch": 45.7,
      "learning_rate": 0.001,
      "loss": 2.4798,
      "step": 237984
    },
    {
      "epoch": 45.7,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 237996
    },
    {
      "epoch": 45.7,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 238008
    },
    {
      "epoch": 45.7,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 238020
    },
    {
      "epoch": 45.71,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 238032
    },
    {
      "epoch": 45.71,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 238044
    },
    {
      "epoch": 45.71,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 238056
    },
    {
      "epoch": 45.71,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 238068
    },
    {
      "epoch": 45.71,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 238080
    },
    {
      "epoch": 45.72,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 238092
    },
    {
      "epoch": 45.72,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 238104
    },
    {
      "epoch": 45.72,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 238116
    },
    {
      "epoch": 45.72,
      "eval_ag_news_accuracy": 0.329125,
      "eval_ag_news_bleu_score": 5.044727813363567,
      "eval_ag_news_bleu_score_sem": 0.16370559943851612,
      "eval_ag_news_emb_cos_sim": 0.8225343823432922,
      "eval_ag_news_emb_cos_sim_sem": 0.006919853322436309,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4637818336486816,
      "eval_ag_news_n_ngrams_match_1": 14.444,
      "eval_ag_news_n_ngrams_match_2": 3.232,
      "eval_ag_news_n_ngrams_match_3": 0.948,
      "eval_ag_news_num_pred_words": 46.48,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.937530841571864,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36035109910329255,
      "eval_ag_news_runtime": 31.7509,
      "eval_ag_news_samples_per_second": 15.748,
      "eval_ag_news_steps_per_second": 0.031,
      "eval_ag_news_token_set_f1": 0.363937083695072,
      "eval_ag_news_token_set_f1_sem": 0.004495710950073818,
      "eval_ag_news_token_set_precision": 0.348181004304836,
      "eval_ag_news_token_set_recall": 0.3962901292141611,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 238125
    },
    {
      "epoch": 45.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.11725,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2110720711957734,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12444476877967454,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6830450296401978,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008823951132115827,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.176992654800415,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.388,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.026,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.754,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.36,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.974545295203114,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21839579920788915,
      "eval_anthropic_toxic_prompts_runtime": 30.2496,
      "eval_anthropic_toxic_prompts_samples_per_second": 16.529,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.033,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3659661144509881,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006548412992509525,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4506092923404888,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33653116794084625,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 238125
    },
    {
      "epoch": 45.72,
      "eval_arxiv_accuracy": 0.355875,
      "eval_arxiv_bleu_score": 4.533323625214181,
      "eval_arxiv_bleu_score_sem": 0.14178894150305668,
      "eval_arxiv_emb_cos_sim": 0.7814557552337646,
      "eval_arxiv_emb_cos_sim_sem": 0.007334771923868853,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3131489753723145,
      "eval_arxiv_n_ngrams_match_1": 15.604,
      "eval_arxiv_n_ngrams_match_2": 3.158,
      "eval_arxiv_n_ngrams_match_3": 0.712,
      "eval_arxiv_num_pred_words": 40.422,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.471496476293275,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3724556854309238,
      "eval_arxiv_runtime": 30.2812,
      "eval_arxiv_samples_per_second": 16.512,
      "eval_arxiv_steps_per_second": 0.033,
      "eval_arxiv_token_set_f1": 0.3681954355977995,
      "eval_arxiv_token_set_f1_sem": 0.004370250898434788,
      "eval_arxiv_token_set_precision": 0.3201643705610055,
      "eval_arxiv_token_set_recall": 0.4541018201444844,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 238125
    },
    {
      "epoch": 45.72,
      "eval_python_code_alpaca_accuracy": 0.1631875,
      "eval_python_code_alpaca_bleu_score": 4.9750378292134005,
      "eval_python_code_alpaca_bleu_score_sem": 0.16032895638455275,
      "eval_python_code_alpaca_emb_cos_sim": 0.7726291418075562,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007056011350728784,
      "eval_python_code_alpaca_emb_top1_equal": 0.1953125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.840923309326172,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.964,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.076,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.094,
      "eval_python_code_alpaca_num_pred_words": 43.224,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.131575980932794,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3452292864192766,
      "eval_python_code_alpaca_runtime": 30.6634,
      "eval_python_code_alpaca_samples_per_second": 16.306,
      "eval_python_code_alpaca_steps_per_second": 0.033,
      "eval_python_code_alpaca_token_set_f1": 0.4896349754728187,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005574803660132695,
      "eval_python_code_alpaca_token_set_precision": 0.5464009977536679,
      "eval_python_code_alpaca_token_set_recall": 0.46678427966126335,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 238125
    },
    {
      "epoch": 45.72,
      "eval_wikibio_accuracy": 0.33603125,
      "eval_wikibio_bleu_score": 6.1314882652978175,
      "eval_wikibio_bleu_score_sem": 0.21713200836686297,
      "eval_wikibio_emb_cos_sim": 0.7277990579605103,
      "eval_wikibio_emb_cos_sim_sem": 0.009776202539391791,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6152026653289795,
      "eval_wikibio_n_ngrams_match_1": 10.128,
      "eval_wikibio_n_ngrams_match_2": 3.418,
      "eval_wikibio_n_ngrams_match_3": 1.262,
      "eval_wikibio_num_pred_words": 36.118,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.15887597840511,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3583513249132447,
      "eval_wikibio_runtime": 30.1834,
      "eval_wikibio_samples_per_second": 16.565,
      "eval_wikibio_steps_per_second": 0.033,
      "eval_wikibio_token_set_f1": 0.3216099510643586,
      "eval_wikibio_token_set_f1_sem": 0.005447061873766101,
      "eval_wikibio_token_set_precision": 0.32827551551202994,
      "eval_wikibio_token_set_recall": 0.33427165559753935,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 238125
    },
    {
      "epoch": 45.72,
      "eval_nq_accuracy": 0.538875,
      "eval_nq_bleu_score": 12.250939046918806,
      "eval_nq_bleu_score_sem": 0.49319309581285903,
      "eval_nq_emb_cos_sim": 0.8406163454055786,
      "eval_nq_emb_cos_sim_sem": 0.00763260131178207,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1072921752929688,
      "eval_nq_n_ngrams_match_1": 23.52,
      "eval_nq_n_ngrams_match_2": 8.804,
      "eval_nq_n_ngrams_match_3": 4.114,
      "eval_nq_num_pred_words": 49.158,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.225936705160954,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4568604821623774,
      "eval_nq_runtime": 31.2032,
      "eval_nq_samples_per_second": 16.024,
      "eval_nq_steps_per_second": 0.032,
      "eval_nq_token_set_f1": 0.47205520438837756,
      "eval_nq_token_set_f1_sem": 0.005003613810279482,
      "eval_nq_token_set_precision": 0.4303367268046354,
      "eval_nq_token_set_recall": 0.5316028579508614,
      "eval_nq_true_num_tokens": 64.0,
      "step": 238125
    },
    {
      "epoch": 45.72,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 238128
    },
    {
      "epoch": 45.73,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 238140
    },
    {
      "epoch": 45.73,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 238152
    },
    {
      "epoch": 45.73,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 238164
    },
    {
      "epoch": 45.73,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 238176
    },
    {
      "epoch": 45.74,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 238188
    },
    {
      "epoch": 45.74,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 238200
    },
    {
      "epoch": 45.74,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 238212
    },
    {
      "epoch": 45.74,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 238224
    },
    {
      "epoch": 45.74,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 238236
    },
    {
      "epoch": 45.75,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 238248
    },
    {
      "epoch": 45.75,
      "learning_rate": 0.001,
      "loss": 2.4758,
      "step": 238260
    },
    {
      "epoch": 45.75,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 238272
    },
    {
      "epoch": 45.75,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 238284
    },
    {
      "epoch": 45.76,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 238296
    },
    {
      "epoch": 45.76,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 238308
    },
    {
      "epoch": 45.76,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 238320
    },
    {
      "epoch": 45.76,
      "learning_rate": 0.001,
      "loss": 2.496,
      "step": 238332
    },
    {
      "epoch": 45.76,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 238344
    },
    {
      "epoch": 45.77,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 238356
    },
    {
      "epoch": 45.77,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 238368
    },
    {
      "epoch": 45.77,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 238380
    },
    {
      "epoch": 45.77,
      "learning_rate": 0.001,
      "loss": 2.4924,
      "step": 238392
    },
    {
      "epoch": 45.78,
      "learning_rate": 0.001,
      "loss": 2.4796,
      "step": 238404
    },
    {
      "epoch": 45.78,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 238416
    },
    {
      "epoch": 45.78,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 238428
    },
    {
      "epoch": 45.78,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 238440
    },
    {
      "epoch": 45.79,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 238452
    },
    {
      "epoch": 45.79,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 238464
    },
    {
      "epoch": 45.79,
      "learning_rate": 0.001,
      "loss": 2.4941,
      "step": 238476
    },
    {
      "epoch": 45.79,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 238488
    },
    {
      "epoch": 45.79,
      "learning_rate": 0.001,
      "loss": 2.4763,
      "step": 238500
    },
    {
      "epoch": 45.8,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 238512
    },
    {
      "epoch": 45.8,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 238524
    },
    {
      "epoch": 45.8,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 238536
    },
    {
      "epoch": 45.8,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 238548
    },
    {
      "epoch": 45.81,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 238560
    },
    {
      "epoch": 45.81,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 238572
    },
    {
      "epoch": 45.81,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 238584
    },
    {
      "epoch": 45.81,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 238596
    },
    {
      "epoch": 45.82,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 238608
    },
    {
      "epoch": 45.82,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 238620
    },
    {
      "epoch": 45.82,
      "learning_rate": 0.001,
      "loss": 2.4761,
      "step": 238632
    },
    {
      "epoch": 45.82,
      "learning_rate": 0.001,
      "loss": 2.4984,
      "step": 238644
    },
    {
      "epoch": 45.82,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 238656
    },
    {
      "epoch": 45.83,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 238668
    },
    {
      "epoch": 45.83,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 238680
    },
    {
      "epoch": 45.83,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 238692
    },
    {
      "epoch": 45.83,
      "learning_rate": 0.001,
      "loss": 2.5042,
      "step": 238704
    },
    {
      "epoch": 45.84,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 238716
    },
    {
      "epoch": 45.84,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 238728
    },
    {
      "epoch": 45.84,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 238740
    },
    {
      "epoch": 45.84,
      "eval_ag_news_accuracy": 0.32865625,
      "eval_ag_news_bleu_score": 5.144768233041238,
      "eval_ag_news_bleu_score_sem": 0.1689205305001006,
      "eval_ag_news_emb_cos_sim": 0.8266169428825378,
      "eval_ag_news_emb_cos_sim_sem": 0.006205318701840391,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4586448669433594,
      "eval_ag_news_n_ngrams_match_1": 14.38,
      "eval_ag_news_n_ngrams_match_2": 3.27,
      "eval_ag_news_n_ngrams_match_3": 0.998,
      "eval_ag_news_num_pred_words": 47.13,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.77388947895721,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35822425798766766,
      "eval_ag_news_runtime": 31.9965,
      "eval_ag_news_samples_per_second": 15.627,
      "eval_ag_news_steps_per_second": 0.031,
      "eval_ag_news_token_set_f1": 0.35634377796755073,
      "eval_ag_news_token_set_f1_sem": 0.0042882442053454855,
      "eval_ag_news_token_set_precision": 0.3434862089996292,
      "eval_ag_news_token_set_recall": 0.38303931384841483,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 238750
    },
    {
      "epoch": 45.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.11740625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.139026559988348,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11305576393985822,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6804046630859375,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009394784755624837,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1747403144836426,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.334,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.972,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.718,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.528,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.92060722650429,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2172781156802377,
      "eval_anthropic_toxic_prompts_runtime": 30.1829,
      "eval_anthropic_toxic_prompts_samples_per_second": 16.566,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.033,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.364104661709084,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006646789025763911,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4455358200937167,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33682954824463635,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 238750
    },
    {
      "epoch": 45.84,
      "eval_arxiv_accuracy": 0.35353125,
      "eval_arxiv_bleu_score": 4.306459648463017,
      "eval_arxiv_bleu_score_sem": 0.12515299423561607,
      "eval_arxiv_emb_cos_sim": 0.7886142730712891,
      "eval_arxiv_emb_cos_sim_sem": 0.0062686746347753765,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.324554204940796,
      "eval_arxiv_n_ngrams_match_1": 15.324,
      "eval_arxiv_n_ngrams_match_2": 2.91,
      "eval_arxiv_n_ngrams_match_3": 0.644,
      "eval_arxiv_num_pred_words": 40.634,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.786608748305973,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36529843382010346,
      "eval_arxiv_runtime": 30.9706,
      "eval_arxiv_samples_per_second": 16.144,
      "eval_arxiv_steps_per_second": 0.032,
      "eval_arxiv_token_set_f1": 0.36171720253871614,
      "eval_arxiv_token_set_f1_sem": 0.004325681949219151,
      "eval_arxiv_token_set_precision": 0.31479686368326426,
      "eval_arxiv_token_set_recall": 0.4458654477597768,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 238750
    },
    {
      "epoch": 45.84,
      "eval_python_code_alpaca_accuracy": 0.163,
      "eval_python_code_alpaca_bleu_score": 4.416517719170178,
      "eval_python_code_alpaca_bleu_score_sem": 0.1373566021447754,
      "eval_python_code_alpaca_emb_cos_sim": 0.7562286257743835,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009416064304311283,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8365767002105713,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.788,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.744,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.878,
      "eval_python_code_alpaca_num_pred_words": 43.366,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.057273315913395,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3363113499729391,
      "eval_python_code_alpaca_runtime": 29.6055,
      "eval_python_code_alpaca_samples_per_second": 16.889,
      "eval_python_code_alpaca_steps_per_second": 0.034,
      "eval_python_code_alpaca_token_set_f1": 0.4783706046645508,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005271790732172327,
      "eval_python_code_alpaca_token_set_precision": 0.5333043457211538,
      "eval_python_code_alpaca_token_set_recall": 0.45581275469665694,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 238750
    },
    {
      "epoch": 45.84,
      "eval_wikibio_accuracy": 0.32853125,
      "eval_wikibio_bleu_score": 6.255304883834685,
      "eval_wikibio_bleu_score_sem": 0.2265203061591911,
      "eval_wikibio_emb_cos_sim": 0.746715247631073,
      "eval_wikibio_emb_cos_sim_sem": 0.009717119243224598,
      "eval_wikibio_emb_top1_equal": 0.1796875,
      "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6414806842803955,
      "eval_wikibio_n_ngrams_match_1": 10.364,
      "eval_wikibio_n_ngrams_match_2": 3.564,
      "eval_wikibio_n_ngrams_match_3": 1.336,
      "eval_wikibio_num_pred_words": 36.364,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.1482804866323,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36748032006631687,
      "eval_wikibio_runtime": 31.2042,
      "eval_wikibio_samples_per_second": 16.023,
      "eval_wikibio_steps_per_second": 0.032,
      "eval_wikibio_token_set_f1": 0.3276415903672124,
      "eval_wikibio_token_set_f1_sem": 0.005572812305989724,
      "eval_wikibio_token_set_precision": 0.3379065389037104,
      "eval_wikibio_token_set_recall": 0.3324337132562616,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 238750
    },
    {
      "epoch": 45.84,
      "eval_nq_accuracy": 0.537,
      "eval_nq_bleu_score": 12.073843240487765,
      "eval_nq_bleu_score_sem": 0.48933742638335503,
      "eval_nq_emb_cos_sim": 0.8400126695632935,
      "eval_nq_emb_cos_sim_sem": 0.007382467668404009,
      "eval_nq_emb_top1_equal": 0.3671875,
      "eval_nq_emb_top1_equal_sem": 0.04277397517748991,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.109062433242798,
      "eval_nq_n_ngrams_match_1": 23.658,
      "eval_nq_n_ngrams_match_2": 8.738,
      "eval_nq_n_ngrams_match_3": 4.064,
      "eval_nq_num_pred_words": 49.174,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.240511631891755,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4597881343163798,
      "eval_nq_runtime": 32.0715,
      "eval_nq_samples_per_second": 15.59,
      "eval_nq_steps_per_second": 0.031,
      "eval_nq_token_set_f1": 0.472616888432545,
      "eval_nq_token_set_f1_sem": 0.004883805672172423,
      "eval_nq_token_set_precision": 0.4310450081246856,
      "eval_nq_token_set_recall": 0.5323319177726178,
      "eval_nq_true_num_tokens": 64.0,
      "step": 238750
    },
    {
      "epoch": 45.84,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 238752
    },
    {
      "epoch": 45.85,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 238764
    },
    {
      "epoch": 45.85,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 238776
    },
    {
      "epoch": 45.85,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 238788
    },
    {
      "epoch": 45.85,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 238800
    },
    {
      "epoch": 45.85,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 238812
    },
    {
      "epoch": 45.86,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 238824
    },
    {
      "epoch": 45.86,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 238836
    },
    {
      "epoch": 45.86,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 238848
    },
    {
      "epoch": 45.86,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 238860
    },
    {
      "epoch": 45.87,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 238872
    },
    {
      "epoch": 45.87,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 238884
    },
    {
      "epoch": 45.87,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 238896
    },
    {
      "epoch": 45.87,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 238908
    },
    {
      "epoch": 45.88,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 238920
    },
    {
      "epoch": 45.88,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 238932
    },
    {
      "epoch": 45.88,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 238944
    },
    {
      "epoch": 45.88,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 238956
    },
    {
      "epoch": 45.88,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 238968
    },
    {
      "epoch": 45.89,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 238980
    },
    {
      "epoch": 45.89,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 238992
    },
    {
      "epoch": 45.89,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 239004
    },
    {
      "epoch": 45.89,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 239016
    },
    {
      "epoch": 45.9,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 239028
    },
    {
      "epoch": 45.9,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 239040
    },
    {
      "epoch": 45.9,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 239052
    },
    {
      "epoch": 45.9,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 239064
    },
    {
      "epoch": 45.91,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 239076
    },
    {
      "epoch": 45.91,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 239088
    },
    {
      "epoch": 45.91,
      "learning_rate": 0.001,
      "loss": 2.5019,
      "step": 239100
    },
    {
      "epoch": 45.91,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 239112
    },
    {
      "epoch": 45.91,
      "learning_rate": 0.001,
      "loss": 2.5033,
      "step": 239124
    },
    {
      "epoch": 45.92,
      "learning_rate": 0.001,
      "loss": 2.5049,
      "step": 239136
    },
    {
      "epoch": 45.92,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 239148
    },
    {
      "epoch": 45.92,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 239160
    },
    {
      "epoch": 45.92,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 239172
    },
    {
      "epoch": 45.93,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 239184
    },
    {
      "epoch": 45.93,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 239196
    },
    {
      "epoch": 45.93,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 239208
    },
    {
      "epoch": 45.93,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 239220
    },
    {
      "epoch": 45.94,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 239232
    },
    {
      "epoch": 45.94,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 239244
    },
    {
      "epoch": 45.94,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 239256
    },
    {
      "epoch": 45.94,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 239268
    },
    {
      "epoch": 45.94,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 239280
    },
    {
      "epoch": 45.95,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 239292
    },
    {
      "epoch": 45.95,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 239304
    },
    {
      "epoch": 45.95,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 239316
    },
    {
      "epoch": 45.95,
      "learning_rate": 0.001,
      "loss": 2.4947,
      "step": 239328
    },
    {
      "epoch": 45.96,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 239340
    },
    {
      "epoch": 45.96,
      "learning_rate": 0.001,
      "loss": 2.4978,
      "step": 239352
    },
    {
      "epoch": 45.96,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 239364
    },
    {
      "epoch": 45.96,
      "eval_ag_news_accuracy": 0.32871875,
      "eval_ag_news_bleu_score": 4.9399026382671485,
      "eval_ag_news_bleu_score_sem": 0.1510686819332423,
      "eval_ag_news_emb_cos_sim": 0.8162962198257446,
      "eval_ag_news_emb_cos_sim_sem": 0.007395716266815606,
      "eval_ag_news_emb_top1_equal": 0.328125,
      "eval_ag_news_emb_top1_equal_sem": 0.041664103776406315,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4671449661254883,
      "eval_ag_news_n_ngrams_match_1": 14.348,
      "eval_ag_news_n_ngrams_match_2": 3.3,
      "eval_ag_news_n_ngrams_match_3": 0.938,
      "eval_ag_news_num_pred_words": 46.87,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.045121808701666,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3576900450423325,
      "eval_ag_news_runtime": 31.3477,
      "eval_ag_news_samples_per_second": 15.95,
      "eval_ag_news_steps_per_second": 0.032,
      "eval_ag_news_token_set_f1": 0.35768096360388313,
      "eval_ag_news_token_set_f1_sem": 0.004428705720902338,
      "eval_ag_news_token_set_precision": 0.34337172895039436,
      "eval_ag_news_token_set_recall": 0.38905105075725327,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 239375
    },
    {
      "epoch": 45.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.11665625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2676324815211477,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11991438332392085,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6874958276748657,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008322838274211456,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.195629358291626,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.318,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.976,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.748,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.32,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.42554127285827,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22266523439444974,
      "eval_anthropic_toxic_prompts_runtime": 28.1126,
      "eval_anthropic_toxic_prompts_samples_per_second": 17.786,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.036,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3631631838023226,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006377495720521091,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44728210919074557,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33138813950909785,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 239375
    },
    {
      "epoch": 45.96,
      "eval_arxiv_accuracy": 0.3506875,
      "eval_arxiv_bleu_score": 4.445403743904117,
      "eval_arxiv_bleu_score_sem": 0.1331901707372557,
      "eval_arxiv_emb_cos_sim": 0.7852582335472107,
      "eval_arxiv_emb_cos_sim_sem": 0.0062486166861512385,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.329124689102173,
      "eval_arxiv_n_ngrams_match_1": 15.5,
      "eval_arxiv_n_ngrams_match_2": 3.076,
      "eval_arxiv_n_ngrams_match_3": 0.676,
      "eval_arxiv_num_pred_words": 40.63,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.91389766790239,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36933122511739414,
      "eval_arxiv_runtime": 24.5123,
      "eval_arxiv_samples_per_second": 20.398,
      "eval_arxiv_steps_per_second": 0.041,
      "eval_arxiv_token_set_f1": 0.364780382769738,
      "eval_arxiv_token_set_f1_sem": 0.004452125811279962,
      "eval_arxiv_token_set_precision": 0.3159489690695461,
      "eval_arxiv_token_set_recall": 0.44893671958103903,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 239375
    },
    {
      "epoch": 45.96,
      "eval_python_code_alpaca_accuracy": 0.1626875,
      "eval_python_code_alpaca_bleu_score": 4.608845837247555,
      "eval_python_code_alpaca_bleu_score_sem": 0.1422032556489837,
      "eval_python_code_alpaca_emb_cos_sim": 0.7490547895431519,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.011754896978213053,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.83894419670105,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.772,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.824,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.928,
      "eval_python_code_alpaca_num_pred_words": 42.048,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.097704191719522,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3420207305899004,
      "eval_python_code_alpaca_runtime": 15.6332,
      "eval_python_code_alpaca_samples_per_second": 31.983,
      "eval_python_code_alpaca_steps_per_second": 0.064,
      "eval_python_code_alpaca_token_set_f1": 0.48195669409699216,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0056963555204259594,
      "eval_python_code_alpaca_token_set_precision": 0.5368215408074446,
      "eval_python_code_alpaca_token_set_recall": 0.4551674891904531,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 239375
    },
    {
      "epoch": 45.96,
      "eval_wikibio_accuracy": 0.3286875,
      "eval_wikibio_bleu_score": 6.424721395278257,
      "eval_wikibio_bleu_score_sem": 0.23062238420257883,
      "eval_wikibio_emb_cos_sim": 0.737343430519104,
      "eval_wikibio_emb_cos_sim_sem": 0.010036058933885709,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6401028633117676,
      "eval_wikibio_n_ngrams_match_1": 10.264,
      "eval_wikibio_n_ngrams_match_2": 3.546,
      "eval_wikibio_n_ngrams_match_3": 1.348,
      "eval_wikibio_num_pred_words": 35.836,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.095755179405025,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36338410457838255,
      "eval_wikibio_runtime": 23.3936,
      "eval_wikibio_samples_per_second": 21.373,
      "eval_wikibio_steps_per_second": 0.043,
      "eval_wikibio_token_set_f1": 0.3260010040556651,
      "eval_wikibio_token_set_f1_sem": 0.0056159329637420615,
      "eval_wikibio_token_set_precision": 0.33366631613476777,
      "eval_wikibio_token_set_recall": 0.33407662882375644,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 239375
    },
    {
      "epoch": 45.96,
      "eval_nq_accuracy": 0.53828125,
      "eval_nq_bleu_score": 12.107406644099097,
      "eval_nq_bleu_score_sem": 0.4878146134150134,
      "eval_nq_emb_cos_sim": 0.8405568599700928,
      "eval_nq_emb_cos_sim_sem": 0.006912360403024481,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1115565299987793,
      "eval_nq_n_ngrams_match_1": 23.364,
      "eval_nq_n_ngrams_match_2": 8.69,
      "eval_nq_n_ngrams_match_3": 4.044,
      "eval_nq_num_pred_words": 48.828,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.261089916669839,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4556274129056221,
      "eval_nq_runtime": 29.0195,
      "eval_nq_samples_per_second": 17.23,
      "eval_nq_steps_per_second": 0.034,
      "eval_nq_token_set_f1": 0.4671490124111569,
      "eval_nq_token_set_f1_sem": 0.00491704582686669,
      "eval_nq_token_set_precision": 0.42362585087010984,
      "eval_nq_token_set_recall": 0.5289978632145839,
      "eval_nq_true_num_tokens": 64.0,
      "step": 239375
    },
    {
      "epoch": 45.96,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 239376
    },
    {
      "epoch": 45.97,
      "learning_rate": 0.001,
      "loss": 2.4757,
      "step": 239388
    },
    {
      "epoch": 45.97,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 239400
    },
    {
      "epoch": 45.97,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 239412
    },
    {
      "epoch": 45.97,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 239424
    },
    {
      "epoch": 45.97,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 239436
    },
    {
      "epoch": 45.98,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 239448
    },
    {
      "epoch": 45.98,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 239460
    },
    {
      "epoch": 45.98,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 239472
    },
    {
      "epoch": 45.98,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 239484
    },
    {
      "epoch": 45.99,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 239496
    },
    {
      "epoch": 45.99,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 239508
    },
    {
      "epoch": 45.99,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 239520
    },
    {
      "epoch": 45.99,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 239532
    },
    {
      "epoch": 46.0,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 239544
    },
    {
      "epoch": 46.0,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 239556
    },
    {
      "epoch": 46.0,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 239568
    },
    {
      "epoch": 46.0,
      "learning_rate": 0.001,
      "loss": 2.4649,
      "step": 239580
    },
    {
      "epoch": 46.0,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 239592
    },
    {
      "epoch": 46.01,
      "learning_rate": 0.001,
      "loss": 2.4719,
      "step": 239604
    },
    {
      "epoch": 46.01,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 239616
    },
    {
      "epoch": 46.01,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 239628
    },
    {
      "epoch": 46.01,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 239640
    },
    {
      "epoch": 46.02,
      "learning_rate": 0.001,
      "loss": 2.4732,
      "step": 239652
    },
    {
      "epoch": 46.02,
      "learning_rate": 0.001,
      "loss": 2.4701,
      "step": 239664
    },
    {
      "epoch": 46.02,
      "learning_rate": 0.001,
      "loss": 2.4635,
      "step": 239676
    },
    {
      "epoch": 46.02,
      "learning_rate": 0.001,
      "loss": 2.4661,
      "step": 239688
    },
    {
      "epoch": 46.03,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 239700
    },
    {
      "epoch": 46.03,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 239712
    },
    {
      "epoch": 46.03,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 239724
    },
    {
      "epoch": 46.03,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 239736
    },
    {
      "epoch": 46.03,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 239748
    },
    {
      "epoch": 46.04,
      "learning_rate": 0.001,
      "loss": 2.4727,
      "step": 239760
    },
    {
      "epoch": 46.04,
      "learning_rate": 0.001,
      "loss": 2.4691,
      "step": 239772
    },
    {
      "epoch": 46.04,
      "learning_rate": 0.001,
      "loss": 2.4654,
      "step": 239784
    },
    {
      "epoch": 46.04,
      "learning_rate": 0.001,
      "loss": 2.4711,
      "step": 239796
    },
    {
      "epoch": 46.05,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 239808
    },
    {
      "epoch": 46.05,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 239820
    },
    {
      "epoch": 46.05,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 239832
    },
    {
      "epoch": 46.05,
      "learning_rate": 0.001,
      "loss": 2.4694,
      "step": 239844
    },
    {
      "epoch": 46.06,
      "learning_rate": 0.001,
      "loss": 2.4722,
      "step": 239856
    },
    {
      "epoch": 46.06,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 239868
    },
    {
      "epoch": 46.06,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 239880
    },
    {
      "epoch": 46.06,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 239892
    },
    {
      "epoch": 46.06,
      "learning_rate": 0.001,
      "loss": 2.4725,
      "step": 239904
    },
    {
      "epoch": 46.07,
      "learning_rate": 0.001,
      "loss": 2.4771,
      "step": 239916
    },
    {
      "epoch": 46.07,
      "learning_rate": 0.001,
      "loss": 2.4736,
      "step": 239928
    },
    {
      "epoch": 46.07,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 239940
    },
    {
      "epoch": 46.07,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 239952
    },
    {
      "epoch": 46.08,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 239964
    },
    {
      "epoch": 46.08,
      "learning_rate": 0.001,
      "loss": 2.4763,
      "step": 239976
    },
    {
      "epoch": 46.08,
      "learning_rate": 0.001,
      "loss": 2.4768,
      "step": 239988
    },
    {
      "epoch": 46.08,
      "learning_rate": 0.001,
      "loss": 2.4736,
      "step": 240000
    },
    {
      "epoch": 46.08,
      "eval_ag_news_accuracy": 0.329625,
      "eval_ag_news_bleu_score": 4.809710955415112,
      "eval_ag_news_bleu_score_sem": 0.14954303122333842,
      "eval_ag_news_emb_cos_sim": 0.822744607925415,
      "eval_ag_news_emb_cos_sim_sem": 0.006417004541208896,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.453580141067505,
      "eval_ag_news_n_ngrams_match_1": 14.464,
      "eval_ag_news_n_ngrams_match_2": 3.244,
      "eval_ag_news_n_ngrams_match_3": 0.858,
      "eval_ag_news_num_pred_words": 46.574,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.61337027474918,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3610593843406519,
      "eval_ag_news_runtime": 30.2345,
      "eval_ag_news_samples_per_second": 16.537,
      "eval_ag_news_steps_per_second": 0.033,
      "eval_ag_news_token_set_f1": 0.36150224937473874,
      "eval_ag_news_token_set_f1_sem": 0.004347056964809516,
      "eval_ag_news_token_set_precision": 0.34791071771012694,
      "eval_ag_news_token_set_recall": 0.3912455918213762,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 240000
    },
    {
      "epoch": 46.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11715625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.19502595225729,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11500448056937196,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6891922950744629,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008879860902299053,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1659159660339355,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.318,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.53,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.71045205814094,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21694025716983395,
      "eval_anthropic_toxic_prompts_runtime": 27.7102,
      "eval_anthropic_toxic_prompts_samples_per_second": 18.044,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.036,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35889453499554247,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006317587143558395,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4417790410242109,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3290858249843667,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 240000
    },
    {
      "epoch": 46.08,
      "eval_arxiv_accuracy": 0.35415625,
      "eval_arxiv_bleu_score": 4.537296894632079,
      "eval_arxiv_bleu_score_sem": 0.12948589511221464,
      "eval_arxiv_emb_cos_sim": 0.786674976348877,
      "eval_arxiv_emb_cos_sim_sem": 0.006466060180153037,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.330310821533203,
      "eval_arxiv_n_ngrams_match_1": 15.936,
      "eval_arxiv_n_ngrams_match_2": 3.106,
      "eval_arxiv_n_ngrams_match_3": 0.708,
      "eval_arxiv_num_pred_words": 41.652,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.947026891141647,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3767400793242511,
      "eval_arxiv_runtime": 24.7446,
      "eval_arxiv_samples_per_second": 20.206,
      "eval_arxiv_steps_per_second": 0.04,
      "eval_arxiv_token_set_f1": 0.37091683662322,
      "eval_arxiv_token_set_f1_sem": 0.00419620446335893,
      "eval_arxiv_token_set_precision": 0.32662819259736514,
      "eval_arxiv_token_set_recall": 0.4422520115584296,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 240000
    },
    {
      "epoch": 46.08,
      "eval_python_code_alpaca_accuracy": 0.16303125,
      "eval_python_code_alpaca_bleu_score": 4.624069656542272,
      "eval_python_code_alpaca_bleu_score_sem": 0.14400423219514763,
      "eval_python_code_alpaca_emb_cos_sim": 0.7744783163070679,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006863218494749795,
      "eval_python_code_alpaca_emb_top1_equal": 0.1328125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8359525203704834,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.912,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.934,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.992,
      "eval_python_code_alpaca_num_pred_words": 43.822,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.046629831852382,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3406593161608,
      "eval_python_code_alpaca_runtime": 24.784,
      "eval_python_code_alpaca_samples_per_second": 20.174,
      "eval_python_code_alpaca_steps_per_second": 0.04,
      "eval_python_code_alpaca_token_set_f1": 0.4862114037808474,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005301342399029125,
      "eval_python_code_alpaca_token_set_precision": 0.5399548248272075,
      "eval_python_code_alpaca_token_set_recall": 0.4665642576531167,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 240000
    },
    {
      "epoch": 46.08,
      "eval_wikibio_accuracy": 0.32915625,
      "eval_wikibio_bleu_score": 6.3564638604424015,
      "eval_wikibio_bleu_score_sem": 0.22570622608110016,
      "eval_wikibio_emb_cos_sim": 0.7566898465156555,
      "eval_wikibio_emb_cos_sim_sem": 0.00850022843269324,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6335058212280273,
      "eval_wikibio_n_ngrams_match_1": 10.64,
      "eval_wikibio_n_ngrams_match_2": 3.644,
      "eval_wikibio_n_ngrams_match_3": 1.36,
      "eval_wikibio_num_pred_words": 37.316,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.84526304133383,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3729145717259592,
      "eval_wikibio_runtime": 27.4853,
      "eval_wikibio_samples_per_second": 18.192,
      "eval_wikibio_steps_per_second": 0.036,
      "eval_wikibio_token_set_f1": 0.33196457282792086,
      "eval_wikibio_token_set_f1_sem": 0.005130478320420156,
      "eval_wikibio_token_set_precision": 0.34511803631241433,
      "eval_wikibio_token_set_recall": 0.3327441539282741,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 240000
    },
    {
      "epoch": 46.08,
      "eval_nq_accuracy": 0.53975,
      "eval_nq_bleu_score": 12.389035135167148,
      "eval_nq_bleu_score_sem": 0.4980326902113494,
      "eval_nq_emb_cos_sim": 0.8414790630340576,
      "eval_nq_emb_cos_sim_sem": 0.007054549381921063,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1096136569976807,
      "eval_nq_n_ngrams_match_1": 23.654,
      "eval_nq_n_ngrams_match_2": 8.878,
      "eval_nq_n_ngrams_match_3": 4.132,
      "eval_nq_num_pred_words": 49.342,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.24505524981566,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4582628348318293,
      "eval_nq_runtime": 25.4296,
      "eval_nq_samples_per_second": 19.662,
      "eval_nq_steps_per_second": 0.039,
      "eval_nq_token_set_f1": 0.470483692756976,
      "eval_nq_token_set_f1_sem": 0.004891242595918317,
      "eval_nq_token_set_precision": 0.43139297728319953,
      "eval_nq_token_set_recall": 0.5254170217681848,
      "eval_nq_true_num_tokens": 64.0,
      "step": 240000
    },
    {
      "epoch": 46.09,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 240012
    },
    {
      "epoch": 46.09,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 240024
    },
    {
      "epoch": 46.09,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 240036
    },
    {
      "epoch": 46.09,
      "learning_rate": 0.001,
      "loss": 2.4722,
      "step": 240048
    },
    {
      "epoch": 46.09,
      "learning_rate": 0.001,
      "loss": 2.469,
      "step": 240060
    },
    {
      "epoch": 46.1,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 240072
    },
    {
      "epoch": 46.1,
      "learning_rate": 0.001,
      "loss": 2.4727,
      "step": 240084
    },
    {
      "epoch": 46.1,
      "learning_rate": 0.001,
      "loss": 2.4746,
      "step": 240096
    },
    {
      "epoch": 46.1,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 240108
    },
    {
      "epoch": 46.11,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 240120
    },
    {
      "epoch": 46.11,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 240132
    },
    {
      "epoch": 46.11,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 240144
    },
    {
      "epoch": 46.11,
      "learning_rate": 0.001,
      "loss": 2.4737,
      "step": 240156
    },
    {
      "epoch": 46.12,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 240168
    },
    {
      "epoch": 46.12,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 240180
    },
    {
      "epoch": 46.12,
      "learning_rate": 0.001,
      "loss": 2.4759,
      "step": 240192
    },
    {
      "epoch": 46.12,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 240204
    },
    {
      "epoch": 46.12,
      "learning_rate": 0.001,
      "loss": 2.4738,
      "step": 240216
    },
    {
      "epoch": 46.13,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 240228
    },
    {
      "epoch": 46.13,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 240240
    },
    {
      "epoch": 46.13,
      "learning_rate": 0.001,
      "loss": 2.4777,
      "step": 240252
    },
    {
      "epoch": 46.13,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 240264
    },
    {
      "epoch": 46.14,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 240276
    },
    {
      "epoch": 46.14,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 240288
    },
    {
      "epoch": 46.14,
      "learning_rate": 0.001,
      "loss": 2.4782,
      "step": 240300
    },
    {
      "epoch": 46.14,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 240312
    },
    {
      "epoch": 46.15,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 240324
    },
    {
      "epoch": 46.15,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 240336
    },
    {
      "epoch": 46.15,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 240348
    },
    {
      "epoch": 46.15,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 240360
    },
    {
      "epoch": 46.15,
      "learning_rate": 0.001,
      "loss": 2.4798,
      "step": 240372
    },
    {
      "epoch": 46.16,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 240384
    },
    {
      "epoch": 46.16,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 240396
    },
    {
      "epoch": 46.16,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 240408
    },
    {
      "epoch": 46.16,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 240420
    },
    {
      "epoch": 46.17,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 240432
    },
    {
      "epoch": 46.17,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 240444
    },
    {
      "epoch": 46.17,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 240456
    },
    {
      "epoch": 46.17,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 240468
    },
    {
      "epoch": 46.18,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 240480
    },
    {
      "epoch": 46.18,
      "learning_rate": 0.001,
      "loss": 2.4717,
      "step": 240492
    },
    {
      "epoch": 46.18,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 240504
    },
    {
      "epoch": 46.18,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 240516
    },
    {
      "epoch": 46.18,
      "learning_rate": 0.001,
      "loss": 2.4728,
      "step": 240528
    },
    {
      "epoch": 46.19,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 240540
    },
    {
      "epoch": 46.19,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 240552
    },
    {
      "epoch": 46.19,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 240564
    },
    {
      "epoch": 46.19,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 240576
    },
    {
      "epoch": 46.2,
      "learning_rate": 0.001,
      "loss": 2.4754,
      "step": 240588
    },
    {
      "epoch": 46.2,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 240600
    },
    {
      "epoch": 46.2,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 240612
    },
    {
      "epoch": 46.2,
      "learning_rate": 0.001,
      "loss": 2.4796,
      "step": 240624
    },
    {
      "epoch": 46.2,
      "eval_ag_news_accuracy": 0.33040625,
      "eval_ag_news_bleu_score": 5.085781487073833,
      "eval_ag_news_bleu_score_sem": 0.1586855435254607,
      "eval_ag_news_emb_cos_sim": 0.8263951539993286,
      "eval_ag_news_emb_cos_sim_sem": 0.006545015056848609,
      "eval_ag_news_emb_top1_equal": 0.2421875,
      "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4564740657806396,
      "eval_ag_news_n_ngrams_match_1": 14.572,
      "eval_ag_news_n_ngrams_match_2": 3.324,
      "eval_ag_news_n_ngrams_match_3": 0.962,
      "eval_ag_news_num_pred_words": 46.846,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.704989493872922,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3627588435215151,
      "eval_ag_news_runtime": 31.2842,
      "eval_ag_news_samples_per_second": 15.983,
      "eval_ag_news_steps_per_second": 0.032,
      "eval_ag_news_token_set_f1": 0.36367627125920055,
      "eval_ag_news_token_set_f1_sem": 0.004495732384210909,
      "eval_ag_news_token_set_precision": 0.3484682179815899,
      "eval_ag_news_token_set_recall": 0.39595446271032897,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 240625
    },
    {
      "epoch": 46.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.11765625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2539468859656058,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11598988557949785,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6961995363235474,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008616661735882615,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.185462474822998,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.454,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.036,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.772,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.32,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.178467753157012,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22132556779911564,
      "eval_anthropic_toxic_prompts_runtime": 28.6646,
      "eval_anthropic_toxic_prompts_samples_per_second": 17.443,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.035,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3686151958594369,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006541165470605921,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45025060429691766,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.34104932451098174,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 240625
    },
    {
      "epoch": 46.2,
      "eval_arxiv_accuracy": 0.35609375,
      "eval_arxiv_bleu_score": 4.506935303199835,
      "eval_arxiv_bleu_score_sem": 0.128245708450738,
      "eval_arxiv_emb_cos_sim": 0.7882487773895264,
      "eval_arxiv_emb_cos_sim_sem": 0.006432708193546252,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3089725971221924,
      "eval_arxiv_n_ngrams_match_1": 15.716,
      "eval_arxiv_n_ngrams_match_2": 3.142,
      "eval_arxiv_n_ngrams_match_3": 0.688,
      "eval_arxiv_num_pred_words": 41.036,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.357004363511724,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3750472274439439,
      "eval_arxiv_runtime": 24.8541,
      "eval_arxiv_samples_per_second": 20.117,
      "eval_arxiv_steps_per_second": 0.04,
      "eval_arxiv_token_set_f1": 0.36817400991817184,
      "eval_arxiv_token_set_f1_sem": 0.004325866274308996,
      "eval_arxiv_token_set_precision": 0.32086655016617605,
      "eval_arxiv_token_set_recall": 0.44688507774430586,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 240625
    },
    {
      "epoch": 46.2,
      "eval_python_code_alpaca_accuracy": 0.16146875,
      "eval_python_code_alpaca_bleu_score": 4.659948408359692,
      "eval_python_code_alpaca_bleu_score_sem": 0.14826534153121193,
      "eval_python_code_alpaca_emb_cos_sim": 0.7675008773803711,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007611677377129935,
      "eval_python_code_alpaca_emb_top1_equal": 0.1953125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8577992916107178,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.086,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.938,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.95,
      "eval_python_code_alpaca_num_pred_words": 43.444,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.4231414608723,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.349076057560282,
      "eval_python_code_alpaca_runtime": 14.1274,
      "eval_python_code_alpaca_samples_per_second": 35.392,
      "eval_python_code_alpaca_steps_per_second": 0.071,
      "eval_python_code_alpaca_token_set_f1": 0.48991908772605924,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005347459449312268,
      "eval_python_code_alpaca_token_set_precision": 0.5498679956345386,
      "eval_python_code_alpaca_token_set_recall": 0.4621258316501458,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 240625
    },
    {
      "epoch": 46.2,
      "eval_wikibio_accuracy": 0.33275,
      "eval_wikibio_bleu_score": 6.060103788040271,
      "eval_wikibio_bleu_score_sem": 0.22060552009294668,
      "eval_wikibio_emb_cos_sim": 0.7326762676239014,
      "eval_wikibio_emb_cos_sim_sem": 0.010217672996841691,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.607383966445923,
      "eval_wikibio_n_ngrams_match_1": 10.012,
      "eval_wikibio_n_ngrams_match_2": 3.42,
      "eval_wikibio_n_ngrams_match_3": 1.26,
      "eval_wikibio_num_pred_words": 35.81,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.86947476109043,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3536812163252703,
      "eval_wikibio_runtime": 17.8396,
      "eval_wikibio_samples_per_second": 28.028,
      "eval_wikibio_steps_per_second": 0.056,
      "eval_wikibio_token_set_f1": 0.3186639345920932,
      "eval_wikibio_token_set_f1_sem": 0.005847377785073911,
      "eval_wikibio_token_set_precision": 0.32633156129899715,
      "eval_wikibio_token_set_recall": 0.3272993497343292,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 240625
    },
    {
      "epoch": 46.2,
      "eval_nq_accuracy": 0.5389375,
      "eval_nq_bleu_score": 12.255174826241957,
      "eval_nq_bleu_score_sem": 0.49766489291864496,
      "eval_nq_emb_cos_sim": 0.8423871994018555,
      "eval_nq_emb_cos_sim_sem": 0.007154197044146467,
      "eval_nq_emb_top1_equal": 0.3046875,
      "eval_nq_emb_top1_equal_sem": 0.04084279867618665,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1075503826141357,
      "eval_nq_n_ngrams_match_1": 23.664,
      "eval_nq_n_ngrams_match_2": 8.878,
      "eval_nq_n_ngrams_match_3": 4.13,
      "eval_nq_num_pred_words": 49.012,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.228060976481084,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4611815942826812,
      "eval_nq_runtime": 20.4638,
      "eval_nq_samples_per_second": 24.433,
      "eval_nq_steps_per_second": 0.049,
      "eval_nq_token_set_f1": 0.47574247881999204,
      "eval_nq_token_set_f1_sem": 0.005002256807773097,
      "eval_nq_token_set_precision": 0.433061993703655,
      "eval_nq_token_set_recall": 0.5362434656844651,
      "eval_nq_true_num_tokens": 64.0,
      "step": 240625
    },
    {
      "epoch": 46.21,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 240636
    },
    {
      "epoch": 46.21,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 240648
    },
    {
      "epoch": 46.21,
      "learning_rate": 0.001,
      "loss": 2.4723,
      "step": 240660
    },
    {
      "epoch": 46.21,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 240672
    },
    {
      "epoch": 46.21,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 240684
    },
    {
      "epoch": 46.22,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 240696
    },
    {
      "epoch": 46.22,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 240708
    },
    {
      "epoch": 46.22,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 240720
    },
    {
      "epoch": 46.22,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 240732
    },
    {
      "epoch": 46.23,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 240744
    },
    {
      "epoch": 46.23,
      "learning_rate": 0.001,
      "loss": 2.4714,
      "step": 240756
    },
    {
      "epoch": 46.23,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 240768
    },
    {
      "epoch": 46.23,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 240780
    },
    {
      "epoch": 46.24,
      "learning_rate": 0.001,
      "loss": 2.4745,
      "step": 240792
    },
    {
      "epoch": 46.24,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 240804
    },
    {
      "epoch": 46.24,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 240816
    },
    {
      "epoch": 46.24,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 240828
    },
    {
      "epoch": 46.24,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 240840
    },
    {
      "epoch": 46.25,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 240852
    },
    {
      "epoch": 46.25,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 240864
    },
    {
      "epoch": 46.25,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 240876
    },
    {
      "epoch": 46.25,
      "learning_rate": 0.001,
      "loss": 2.4757,
      "step": 240888
    },
    {
      "epoch": 46.26,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 240900
    },
    {
      "epoch": 46.26,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 240912
    },
    {
      "epoch": 46.26,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 240924
    },
    {
      "epoch": 46.26,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 240936
    },
    {
      "epoch": 46.26,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 240948
    },
    {
      "epoch": 46.27,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 240960
    },
    {
      "epoch": 46.27,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 240972
    },
    {
      "epoch": 46.27,
      "learning_rate": 0.001,
      "loss": 2.4761,
      "step": 240984
    },
    {
      "epoch": 46.27,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 240996
    },
    {
      "epoch": 46.28,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 241008
    },
    {
      "epoch": 46.28,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 241020
    },
    {
      "epoch": 46.28,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 241032
    },
    {
      "epoch": 46.28,
      "learning_rate": 0.001,
      "loss": 2.4764,
      "step": 241044
    },
    {
      "epoch": 46.29,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 241056
    },
    {
      "epoch": 46.29,
      "learning_rate": 0.001,
      "loss": 2.4761,
      "step": 241068
    },
    {
      "epoch": 46.29,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 241080
    },
    {
      "epoch": 46.29,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 241092
    },
    {
      "epoch": 46.29,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 241104
    },
    {
      "epoch": 46.3,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 241116
    },
    {
      "epoch": 46.3,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 241128
    },
    {
      "epoch": 46.3,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 241140
    },
    {
      "epoch": 46.3,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 241152
    },
    {
      "epoch": 46.31,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 241164
    },
    {
      "epoch": 46.31,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 241176
    },
    {
      "epoch": 46.31,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 241188
    },
    {
      "epoch": 46.31,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 241200
    },
    {
      "epoch": 46.32,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 241212
    },
    {
      "epoch": 46.32,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 241224
    },
    {
      "epoch": 46.32,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 241236
    },
    {
      "epoch": 46.32,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 241248
    },
    {
      "epoch": 46.32,
      "eval_ag_news_accuracy": 0.32990625,
      "eval_ag_news_bleu_score": 5.064259546913567,
      "eval_ag_news_bleu_score_sem": 0.16276135782846404,
      "eval_ag_news_emb_cos_sim": 0.8233256340026855,
      "eval_ag_news_emb_cos_sim_sem": 0.00726704720666791,
      "eval_ag_news_emb_top1_equal": 0.3203125,
      "eval_ag_news_emb_top1_equal_sem": 0.041403754790620424,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.457628011703491,
      "eval_ag_news_n_ngrams_match_1": 14.404,
      "eval_ag_news_n_ngrams_match_2": 3.306,
      "eval_ag_news_n_ngrams_match_3": 0.972,
      "eval_ag_news_num_pred_words": 46.488,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.741596454397726,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36044058466982654,
      "eval_ag_news_runtime": 30.2171,
      "eval_ag_news_samples_per_second": 16.547,
      "eval_ag_news_steps_per_second": 0.033,
      "eval_ag_news_token_set_f1": 0.3571753756234547,
      "eval_ag_news_token_set_f1_sem": 0.004507353959461323,
      "eval_ag_news_token_set_precision": 0.3449015915465036,
      "eval_ag_news_token_set_recall": 0.3851819490431336,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 241250
    },
    {
      "epoch": 46.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.11678125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2929250900653,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11955546534354838,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6932997703552246,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008553027897165262,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.176013708114624,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.548,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.05,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.302,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.95108697763756,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22495408032910458,
      "eval_anthropic_toxic_prompts_runtime": 11.014,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.397,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.091,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3652003227967441,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006446262059315402,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4611348657393043,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3275224839263923,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 241250
    },
    {
      "epoch": 46.32,
      "eval_arxiv_accuracy": 0.355875,
      "eval_arxiv_bleu_score": 4.420447042734797,
      "eval_arxiv_bleu_score_sem": 0.1196629610718991,
      "eval_arxiv_emb_cos_sim": 0.780036211013794,
      "eval_arxiv_emb_cos_sim_sem": 0.006999685380416514,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3037800788879395,
      "eval_arxiv_n_ngrams_match_1": 15.67,
      "eval_arxiv_n_ngrams_match_2": 3.066,
      "eval_arxiv_n_ngrams_match_3": 0.67,
      "eval_arxiv_num_pred_words": 40.562,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.21532078514388,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3762368929864656,
      "eval_arxiv_runtime": 11.562,
      "eval_arxiv_samples_per_second": 43.245,
      "eval_arxiv_steps_per_second": 0.086,
      "eval_arxiv_token_set_f1": 0.36768080061744435,
      "eval_arxiv_token_set_f1_sem": 0.0041402226800557025,
      "eval_arxiv_token_set_precision": 0.32126840310119836,
      "eval_arxiv_token_set_recall": 0.44616519755619827,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 241250
    },
    {
      "epoch": 46.32,
      "eval_python_code_alpaca_accuracy": 0.1625625,
      "eval_python_code_alpaca_bleu_score": 4.74742940138566,
      "eval_python_code_alpaca_bleu_score_sem": 0.1499882542581223,
      "eval_python_code_alpaca_emb_cos_sim": 0.7822372317314148,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006967462139096411,
      "eval_python_code_alpaca_emb_top1_equal": 0.1875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8513922691345215,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.074,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.936,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.986,
      "eval_python_code_alpaca_num_pred_words": 43.14,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.31186784884083,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.35139482803010574,
      "eval_python_code_alpaca_runtime": 11.2267,
      "eval_python_code_alpaca_samples_per_second": 44.537,
      "eval_python_code_alpaca_steps_per_second": 0.089,
      "eval_python_code_alpaca_token_set_f1": 0.4836683448111049,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0052867670675411085,
      "eval_python_code_alpaca_token_set_precision": 0.553907183624986,
      "eval_python_code_alpaca_token_set_recall": 0.4498540024236263,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 241250
    },
    {
      "epoch": 46.32,
      "eval_wikibio_accuracy": 0.332,
      "eval_wikibio_bleu_score": 6.361706537993236,
      "eval_wikibio_bleu_score_sem": 0.22598514869935404,
      "eval_wikibio_emb_cos_sim": 0.7513222098350525,
      "eval_wikibio_emb_cos_sim_sem": 0.008775885285567185,
      "eval_wikibio_emb_top1_equal": 0.1953125,
      "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.631347417831421,
      "eval_wikibio_n_ngrams_match_1": 10.454,
      "eval_wikibio_n_ngrams_match_2": 3.576,
      "eval_wikibio_n_ngrams_match_3": 1.364,
      "eval_wikibio_num_pred_words": 36.522,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.76366578861179,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36665510897863574,
      "eval_wikibio_runtime": 12.4778,
      "eval_wikibio_samples_per_second": 40.071,
      "eval_wikibio_steps_per_second": 0.08,
      "eval_wikibio_token_set_f1": 0.3300539736679318,
      "eval_wikibio_token_set_f1_sem": 0.005207958393225082,
      "eval_wikibio_token_set_precision": 0.3416930416172614,
      "eval_wikibio_token_set_recall": 0.33454915588064005,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 241250
    },
    {
      "epoch": 46.32,
      "eval_nq_accuracy": 0.53828125,
      "eval_nq_bleu_score": 12.240064989747756,
      "eval_nq_bleu_score_sem": 0.48635335223747905,
      "eval_nq_emb_cos_sim": 0.8417385816574097,
      "eval_nq_emb_cos_sim_sem": 0.007237234238247025,
      "eval_nq_emb_top1_equal": 0.359375,
      "eval_nq_emb_top1_equal_sem": 0.04257689651385297,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1077377796173096,
      "eval_nq_n_ngrams_match_1": 23.778,
      "eval_nq_n_ngrams_match_2": 8.82,
      "eval_nq_n_ngrams_match_3": 4.116,
      "eval_nq_num_pred_words": 49.168,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.229603034934062,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46107012518998153,
      "eval_nq_runtime": 11.9501,
      "eval_nq_samples_per_second": 41.841,
      "eval_nq_steps_per_second": 0.084,
      "eval_nq_token_set_f1": 0.47387866590384453,
      "eval_nq_token_set_f1_sem": 0.004848162849187167,
      "eval_nq_token_set_precision": 0.4327174058177407,
      "eval_nq_token_set_recall": 0.5316999989137778,
      "eval_nq_true_num_tokens": 64.0,
      "step": 241250
    },
    {
      "epoch": 46.32,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 241260
    },
    {
      "epoch": 46.33,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 241272
    },
    {
      "epoch": 46.33,
      "learning_rate": 0.001,
      "loss": 2.4701,
      "step": 241284
    },
    {
      "epoch": 46.33,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 241296
    },
    {
      "epoch": 46.33,
      "learning_rate": 0.001,
      "loss": 2.4764,
      "step": 241308
    },
    {
      "epoch": 46.34,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 241320
    },
    {
      "epoch": 46.34,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 241332
    },
    {
      "epoch": 46.34,
      "learning_rate": 0.001,
      "loss": 2.4745,
      "step": 241344
    },
    {
      "epoch": 46.34,
      "learning_rate": 0.001,
      "loss": 2.473,
      "step": 241356
    },
    {
      "epoch": 46.35,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 241368
    },
    {
      "epoch": 46.35,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 241380
    },
    {
      "epoch": 46.35,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 241392
    },
    {
      "epoch": 46.35,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 241404
    },
    {
      "epoch": 46.35,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 241416
    },
    {
      "epoch": 46.36,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 241428
    },
    {
      "epoch": 46.36,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 241440
    },
    {
      "epoch": 46.36,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 241452
    },
    {
      "epoch": 46.36,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 241464
    },
    {
      "epoch": 46.37,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 241476
    },
    {
      "epoch": 46.37,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 241488
    },
    {
      "epoch": 46.37,
      "learning_rate": 0.001,
      "loss": 2.4706,
      "step": 241500
    },
    {
      "epoch": 46.37,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 241512
    },
    {
      "epoch": 46.38,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 241524
    },
    {
      "epoch": 46.38,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 241536
    },
    {
      "epoch": 46.38,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 241548
    },
    {
      "epoch": 46.38,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 241560
    },
    {
      "epoch": 46.38,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 241572
    },
    {
      "epoch": 46.39,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 241584
    },
    {
      "epoch": 46.39,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 241596
    },
    {
      "epoch": 46.39,
      "learning_rate": 0.001,
      "loss": 2.4727,
      "step": 241608
    },
    {
      "epoch": 46.39,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 241620
    },
    {
      "epoch": 46.4,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 241632
    },
    {
      "epoch": 46.4,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 241644
    },
    {
      "epoch": 46.4,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 241656
    },
    {
      "epoch": 46.4,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 241668
    },
    {
      "epoch": 46.41,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 241680
    },
    {
      "epoch": 46.41,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 241692
    },
    {
      "epoch": 46.41,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 241704
    },
    {
      "epoch": 46.41,
      "learning_rate": 0.001,
      "loss": 2.4668,
      "step": 241716
    },
    {
      "epoch": 46.41,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 241728
    },
    {
      "epoch": 46.42,
      "learning_rate": 0.001,
      "loss": 2.4768,
      "step": 241740
    },
    {
      "epoch": 46.42,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 241752
    },
    {
      "epoch": 46.42,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 241764
    },
    {
      "epoch": 46.42,
      "learning_rate": 0.001,
      "loss": 2.4724,
      "step": 241776
    },
    {
      "epoch": 46.43,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 241788
    },
    {
      "epoch": 46.43,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 241800
    },
    {
      "epoch": 46.43,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 241812
    },
    {
      "epoch": 46.43,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 241824
    },
    {
      "epoch": 46.44,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 241836
    },
    {
      "epoch": 46.44,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 241848
    },
    {
      "epoch": 46.44,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 241860
    },
    {
      "epoch": 46.44,
      "learning_rate": 0.001,
      "loss": 2.4976,
      "step": 241872
    },
    {
      "epoch": 46.44,
      "eval_ag_news_accuracy": 0.3296875,
      "eval_ag_news_bleu_score": 5.084642026614604,
      "eval_ag_news_bleu_score_sem": 0.16016679319617716,
      "eval_ag_news_emb_cos_sim": 0.8224168419837952,
      "eval_ag_news_emb_cos_sim_sem": 0.007245111747651514,
      "eval_ag_news_emb_top1_equal": 0.2890625,
      "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4650425910949707,
      "eval_ag_news_n_ngrams_match_1": 14.488,
      "eval_ag_news_n_ngrams_match_2": 3.264,
      "eval_ag_news_n_ngrams_match_3": 0.952,
      "eval_ag_news_num_pred_words": 46.772,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.977821714568645,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3618038231855376,
      "eval_ag_news_runtime": 30.9142,
      "eval_ag_news_samples_per_second": 16.174,
      "eval_ag_news_steps_per_second": 0.032,
      "eval_ag_news_token_set_f1": 0.3587320573823345,
      "eval_ag_news_token_set_f1_sem": 0.004469019027870232,
      "eval_ag_news_token_set_precision": 0.3454440933379695,
      "eval_ag_news_token_set_recall": 0.38878451831262495,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 241875
    },
    {
      "epoch": 46.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.11746875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.440615900928326,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13112100901090346,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6902843713760376,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009069573087069698,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.190671443939209,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.562,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.154,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.852,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.618,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.30474123678018,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22370955471489626,
      "eval_anthropic_toxic_prompts_runtime": 29.1316,
      "eval_anthropic_toxic_prompts_samples_per_second": 17.163,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.034,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3627563001365179,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065597484170451985,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4602601738777415,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3241935636421385,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 241875
    },
    {
      "epoch": 46.44,
      "eval_arxiv_accuracy": 0.3535625,
      "eval_arxiv_bleu_score": 4.587565821380598,
      "eval_arxiv_bleu_score_sem": 0.1362682273078973,
      "eval_arxiv_emb_cos_sim": 0.7883133292198181,
      "eval_arxiv_emb_cos_sim_sem": 0.006566388777399696,
      "eval_arxiv_emb_top1_equal": 0.3359375,
      "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.322441339492798,
      "eval_arxiv_n_ngrams_match_1": 15.65,
      "eval_arxiv_n_ngrams_match_2": 3.2,
      "eval_arxiv_n_ngrams_match_3": 0.742,
      "eval_arxiv_num_pred_words": 40.994,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.727961361600936,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3711320883415423,
      "eval_arxiv_runtime": 30.6795,
      "eval_arxiv_samples_per_second": 16.298,
      "eval_arxiv_steps_per_second": 0.033,
      "eval_arxiv_token_set_f1": 0.3662087807138172,
      "eval_arxiv_token_set_f1_sem": 0.004321891433179117,
      "eval_arxiv_token_set_precision": 0.31922207319930457,
      "eval_arxiv_token_set_recall": 0.44744889708585134,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 241875
    },
    {
      "epoch": 46.44,
      "eval_python_code_alpaca_accuracy": 0.16246875,
      "eval_python_code_alpaca_bleu_score": 4.687255340246557,
      "eval_python_code_alpaca_bleu_score_sem": 0.15019681958371742,
      "eval_python_code_alpaca_emb_cos_sim": 0.7704471945762634,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009157781831191444,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8582279682159424,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.046,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.006,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.01,
      "eval_python_code_alpaca_num_pred_words": 43.156,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.43061195510475,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3463634712786531,
      "eval_python_code_alpaca_runtime": 29.1269,
      "eval_python_code_alpaca_samples_per_second": 17.166,
      "eval_python_code_alpaca_steps_per_second": 0.034,
      "eval_python_code_alpaca_token_set_f1": 0.4735165123242745,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0056185112210383826,
      "eval_python_code_alpaca_token_set_precision": 0.5495388137409534,
      "eval_python_code_alpaca_token_set_recall": 0.44209822638636487,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 241875
    },
    {
      "epoch": 46.44,
      "eval_wikibio_accuracy": 0.32709375,
      "eval_wikibio_bleu_score": 5.958341045771376,
      "eval_wikibio_bleu_score_sem": 0.2174500917069693,
      "eval_wikibio_emb_cos_sim": 0.7273484468460083,
      "eval_wikibio_emb_cos_sim_sem": 0.010700433536397599,
      "eval_wikibio_emb_top1_equal": 0.1875,
      "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6463050842285156,
      "eval_wikibio_n_ngrams_match_1": 10.022,
      "eval_wikibio_n_ngrams_match_2": 3.414,
      "eval_wikibio_n_ngrams_match_3": 1.254,
      "eval_wikibio_num_pred_words": 35.822,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.33276771128574,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3530682279269266,
      "eval_wikibio_runtime": 11.6722,
      "eval_wikibio_samples_per_second": 42.837,
      "eval_wikibio_steps_per_second": 0.086,
      "eval_wikibio_token_set_f1": 0.315965979692545,
      "eval_wikibio_token_set_f1_sem": 0.005772811421930752,
      "eval_wikibio_token_set_precision": 0.3239042816365887,
      "eval_wikibio_token_set_recall": 0.3253844288547698,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 241875
    },
    {
      "epoch": 46.44,
      "eval_nq_accuracy": 0.53840625,
      "eval_nq_bleu_score": 12.358487248963987,
      "eval_nq_bleu_score_sem": 0.4884760344559198,
      "eval_nq_emb_cos_sim": 0.8379926085472107,
      "eval_nq_emb_cos_sim_sem": 0.007062561892949688,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.106782913208008,
      "eval_nq_n_ngrams_match_1": 23.658,
      "eval_nq_n_ngrams_match_2": 8.88,
      "eval_nq_n_ngrams_match_3": 4.182,
      "eval_nq_num_pred_words": 48.95,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.221748613992261,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4587327511095819,
      "eval_nq_runtime": 16.9333,
      "eval_nq_samples_per_second": 29.528,
      "eval_nq_steps_per_second": 0.059,
      "eval_nq_token_set_f1": 0.4746710993493959,
      "eval_nq_token_set_f1_sem": 0.004854345292045013,
      "eval_nq_token_set_precision": 0.4326281716859562,
      "eval_nq_token_set_recall": 0.5336436845156283,
      "eval_nq_true_num_tokens": 64.0,
      "step": 241875
    },
    {
      "epoch": 46.44,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 241884
    },
    {
      "epoch": 46.45,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 241896
    },
    {
      "epoch": 46.45,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 241908
    },
    {
      "epoch": 46.45,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 241920
    },
    {
      "epoch": 46.45,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 241932
    },
    {
      "epoch": 46.46,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 241944
    },
    {
      "epoch": 46.46,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 241956
    },
    {
      "epoch": 46.46,
      "learning_rate": 0.001,
      "loss": 2.4769,
      "step": 241968
    },
    {
      "epoch": 46.46,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 241980
    },
    {
      "epoch": 46.47,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 241992
    },
    {
      "epoch": 46.47,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 242004
    },
    {
      "epoch": 46.47,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 242016
    },
    {
      "epoch": 46.47,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 242028
    },
    {
      "epoch": 46.47,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 242040
    },
    {
      "epoch": 46.48,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 242052
    },
    {
      "epoch": 46.48,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 242064
    },
    {
      "epoch": 46.48,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 242076
    },
    {
      "epoch": 46.48,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 242088
    },
    {
      "epoch": 46.49,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 242100
    },
    {
      "epoch": 46.49,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 242112
    },
    {
      "epoch": 46.49,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 242124
    },
    {
      "epoch": 46.49,
      "learning_rate": 0.001,
      "loss": 2.4745,
      "step": 242136
    },
    {
      "epoch": 46.5,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 242148
    },
    {
      "epoch": 46.5,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 242160
    },
    {
      "epoch": 46.5,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 242172
    },
    {
      "epoch": 46.5,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 242184
    },
    {
      "epoch": 46.5,
      "learning_rate": 0.001,
      "loss": 2.48,
      "step": 242196
    },
    {
      "epoch": 46.51,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 242208
    },
    {
      "epoch": 46.51,
      "learning_rate": 0.001,
      "loss": 2.4758,
      "step": 242220
    },
    {
      "epoch": 46.51,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 242232
    },
    {
      "epoch": 46.51,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 242244
    },
    {
      "epoch": 46.52,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 242256
    },
    {
      "epoch": 46.52,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 242268
    },
    {
      "epoch": 46.52,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 242280
    },
    {
      "epoch": 46.52,
      "learning_rate": 0.001,
      "loss": 2.471,
      "step": 242292
    },
    {
      "epoch": 46.53,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 242304
    },
    {
      "epoch": 46.53,
      "learning_rate": 0.001,
      "loss": 2.4782,
      "step": 242316
    },
    {
      "epoch": 46.53,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 242328
    },
    {
      "epoch": 46.53,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 242340
    },
    {
      "epoch": 46.53,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 242352
    },
    {
      "epoch": 46.54,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 242364
    },
    {
      "epoch": 46.54,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 242376
    },
    {
      "epoch": 46.54,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 242388
    },
    {
      "epoch": 46.54,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 242400
    },
    {
      "epoch": 46.55,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 242412
    },
    {
      "epoch": 46.55,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 242424
    },
    {
      "epoch": 46.55,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 242436
    },
    {
      "epoch": 46.55,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 242448
    },
    {
      "epoch": 46.56,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 242460
    },
    {
      "epoch": 46.56,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 242472
    },
    {
      "epoch": 46.56,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 242484
    },
    {
      "epoch": 46.56,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 242496
    },
    {
      "epoch": 46.56,
      "eval_ag_news_accuracy": 0.3304375,
      "eval_ag_news_bleu_score": 5.0147895358484815,
      "eval_ag_news_bleu_score_sem": 0.15958465491068766,
      "eval_ag_news_emb_cos_sim": 0.8218309283256531,
      "eval_ag_news_emb_cos_sim_sem": 0.0066238639078783955,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4549052715301514,
      "eval_ag_news_n_ngrams_match_1": 14.36,
      "eval_ag_news_n_ngrams_match_2": 3.216,
      "eval_ag_news_n_ngrams_match_3": 0.944,
      "eval_ag_news_num_pred_words": 46.47,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.655289883068022,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3590430609878206,
      "eval_ag_news_runtime": 15.8727,
      "eval_ag_news_samples_per_second": 31.501,
      "eval_ag_news_steps_per_second": 0.063,
      "eval_ag_news_token_set_f1": 0.3579887445455612,
      "eval_ag_news_token_set_f1_sem": 0.0043650586776531825,
      "eval_ag_news_token_set_precision": 0.34438800093333527,
      "eval_ag_news_token_set_recall": 0.38769083022653245,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 242500
    },
    {
      "epoch": 46.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.11709375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3395154646772034,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12719804499071863,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6862552165985107,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009476123563439307,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1812803745269775,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.352,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.048,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.822,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.482,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.077562121814168,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21649841090894642,
      "eval_anthropic_toxic_prompts_runtime": 12.4149,
      "eval_anthropic_toxic_prompts_samples_per_second": 40.274,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.081,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36090650861044277,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006607413680914105,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44784435857669164,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3339832018508541,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 242500
    },
    {
      "epoch": 46.56,
      "eval_arxiv_accuracy": 0.35321875,
      "eval_arxiv_bleu_score": 4.709397685389214,
      "eval_arxiv_bleu_score_sem": 0.13017014297566795,
      "eval_arxiv_emb_cos_sim": 0.7913519144058228,
      "eval_arxiv_emb_cos_sim_sem": 0.0068440535324784816,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3240878582000732,
      "eval_arxiv_n_ngrams_match_1": 15.912,
      "eval_arxiv_n_ngrams_match_2": 3.294,
      "eval_arxiv_n_ngrams_match_3": 0.78,
      "eval_arxiv_num_pred_words": 41.348,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.7736535749167,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37678167135450635,
      "eval_arxiv_runtime": 16.9108,
      "eval_arxiv_samples_per_second": 29.567,
      "eval_arxiv_steps_per_second": 0.059,
      "eval_arxiv_token_set_f1": 0.3749786536115361,
      "eval_arxiv_token_set_f1_sem": 0.00423406580137907,
      "eval_arxiv_token_set_precision": 0.3266201011569917,
      "eval_arxiv_token_set_recall": 0.4599227426804347,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 242500
    },
    {
      "epoch": 46.56,
      "eval_python_code_alpaca_accuracy": 0.1638125,
      "eval_python_code_alpaca_bleu_score": 4.753542972403131,
      "eval_python_code_alpaca_bleu_score_sem": 0.15269941418383537,
      "eval_python_code_alpaca_emb_cos_sim": 0.7625956535339355,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008567124700903138,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.862983465194702,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.02,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.98,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.028,
      "eval_python_code_alpaca_num_pred_words": 44.044,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.513700584873433,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3417390203002659,
      "eval_python_code_alpaca_runtime": 29.3975,
      "eval_python_code_alpaca_samples_per_second": 17.008,
      "eval_python_code_alpaca_steps_per_second": 0.034,
      "eval_python_code_alpaca_token_set_f1": 0.4845210223439105,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005446935440651199,
      "eval_python_code_alpaca_token_set_precision": 0.5474563393900826,
      "eval_python_code_alpaca_token_set_recall": 0.4572541838133401,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 242500
    },
    {
      "epoch": 46.56,
      "eval_wikibio_accuracy": 0.32965625,
      "eval_wikibio_bleu_score": 6.199104057835193,
      "eval_wikibio_bleu_score_sem": 0.21852074990903678,
      "eval_wikibio_emb_cos_sim": 0.7372463941574097,
      "eval_wikibio_emb_cos_sim_sem": 0.010695194814836596,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6502938270568848,
      "eval_wikibio_n_ngrams_match_1": 10.102,
      "eval_wikibio_n_ngrams_match_2": 3.51,
      "eval_wikibio_n_ngrams_match_3": 1.304,
      "eval_wikibio_num_pred_words": 35.488,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.48597260792705,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3588946318349425,
      "eval_wikibio_runtime": 18.0011,
      "eval_wikibio_samples_per_second": 27.776,
      "eval_wikibio_steps_per_second": 0.056,
      "eval_wikibio_token_set_f1": 0.323072826372439,
      "eval_wikibio_token_set_f1_sem": 0.00563513111265325,
      "eval_wikibio_token_set_precision": 0.32923979947755805,
      "eval_wikibio_token_set_recall": 0.3343350847546233,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 242500
    },
    {
      "epoch": 46.56,
      "eval_nq_accuracy": 0.5393125,
      "eval_nq_bleu_score": 12.401884825808072,
      "eval_nq_bleu_score_sem": 0.4935568006258743,
      "eval_nq_emb_cos_sim": 0.839489221572876,
      "eval_nq_emb_cos_sim_sem": 0.007569988217052089,
      "eval_nq_emb_top1_equal": 0.3359375,
      "eval_nq_emb_top1_equal_sem": 0.04191137143408563,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.104843854904175,
      "eval_nq_n_ngrams_match_1": 23.732,
      "eval_nq_n_ngrams_match_2": 8.902,
      "eval_nq_n_ngrams_match_3": 4.184,
      "eval_nq_num_pred_words": 49.15,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.205821610754604,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4632106348476336,
      "eval_nq_runtime": 11.9895,
      "eval_nq_samples_per_second": 41.703,
      "eval_nq_steps_per_second": 0.083,
      "eval_nq_token_set_f1": 0.4746018846814656,
      "eval_nq_token_set_f1_sem": 0.005044325450182254,
      "eval_nq_token_set_precision": 0.43326331968603143,
      "eval_nq_token_set_recall": 0.5330526004274339,
      "eval_nq_true_num_tokens": 64.0,
      "step": 242500
    },
    {
      "epoch": 46.56,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 242508
    },
    {
      "epoch": 46.57,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 242520
    },
    {
      "epoch": 46.57,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 242532
    },
    {
      "epoch": 46.57,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 242544
    },
    {
      "epoch": 46.57,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 242556
    },
    {
      "epoch": 46.58,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 242568
    },
    {
      "epoch": 46.58,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 242580
    },
    {
      "epoch": 46.58,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 242592
    },
    {
      "epoch": 46.58,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 242604
    },
    {
      "epoch": 46.59,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 242616
    },
    {
      "epoch": 46.59,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 242628
    },
    {
      "epoch": 46.59,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 242640
    },
    {
      "epoch": 46.59,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 242652
    },
    {
      "epoch": 46.59,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 242664
    },
    {
      "epoch": 46.6,
      "learning_rate": 0.001,
      "loss": 2.4756,
      "step": 242676
    },
    {
      "epoch": 46.6,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 242688
    },
    {
      "epoch": 46.6,
      "learning_rate": 0.001,
      "loss": 2.4724,
      "step": 242700
    },
    {
      "epoch": 46.6,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 242712
    },
    {
      "epoch": 46.61,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 242724
    },
    {
      "epoch": 46.61,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 242736
    },
    {
      "epoch": 46.61,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 242748
    },
    {
      "epoch": 46.61,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 242760
    },
    {
      "epoch": 46.62,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 242772
    },
    {
      "epoch": 46.62,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 242784
    },
    {
      "epoch": 46.62,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 242796
    },
    {
      "epoch": 46.62,
      "learning_rate": 0.001,
      "loss": 2.4745,
      "step": 242808
    },
    {
      "epoch": 46.62,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 242820
    },
    {
      "epoch": 46.63,
      "learning_rate": 0.001,
      "loss": 2.4752,
      "step": 242832
    },
    {
      "epoch": 46.63,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 242844
    },
    {
      "epoch": 46.63,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 242856
    },
    {
      "epoch": 46.63,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 242868
    },
    {
      "epoch": 46.64,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 242880
    },
    {
      "epoch": 46.64,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 242892
    },
    {
      "epoch": 46.64,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 242904
    },
    {
      "epoch": 46.64,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 242916
    },
    {
      "epoch": 46.65,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 242928
    },
    {
      "epoch": 46.65,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 242940
    },
    {
      "epoch": 46.65,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 242952
    },
    {
      "epoch": 46.65,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 242964
    },
    {
      "epoch": 46.65,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 242976
    },
    {
      "epoch": 46.66,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 242988
    },
    {
      "epoch": 46.66,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 243000
    },
    {
      "epoch": 46.66,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 243012
    },
    {
      "epoch": 46.66,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 243024
    },
    {
      "epoch": 46.67,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 243036
    },
    {
      "epoch": 46.67,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 243048
    },
    {
      "epoch": 46.67,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 243060
    },
    {
      "epoch": 46.67,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 243072
    },
    {
      "epoch": 46.68,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 243084
    },
    {
      "epoch": 46.68,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 243096
    },
    {
      "epoch": 46.68,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 243108
    },
    {
      "epoch": 46.68,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 243120
    },
    {
      "epoch": 46.68,
      "eval_ag_news_accuracy": 0.330125,
      "eval_ag_news_bleu_score": 5.154653789908301,
      "eval_ag_news_bleu_score_sem": 0.17062236167367198,
      "eval_ag_news_emb_cos_sim": 0.8238040804862976,
      "eval_ag_news_emb_cos_sim_sem": 0.0062663980464112805,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.455888509750366,
      "eval_ag_news_n_ngrams_match_1": 14.568,
      "eval_ag_news_n_ngrams_match_2": 3.362,
      "eval_ag_news_n_ngrams_match_3": 0.984,
      "eval_ag_news_num_pred_words": 46.354,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.686429880462107,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3658949290313125,
      "eval_ag_news_runtime": 12.6369,
      "eval_ag_news_samples_per_second": 39.567,
      "eval_ag_news_steps_per_second": 0.079,
      "eval_ag_news_token_set_f1": 0.3628202448725998,
      "eval_ag_news_token_set_f1_sem": 0.004526394411412497,
      "eval_ag_news_token_set_precision": 0.3480184762274712,
      "eval_ag_news_token_set_recall": 0.393966522650403,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 243125
    },
    {
      "epoch": 46.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.11575,
      "eval_anthropic_toxic_prompts_bleu_score": 3.371620502622668,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1302202195352204,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6879521608352661,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008665812863780414,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.19242262840271,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.512,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.088,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.196,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.34734061081799,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22434746598187452,
      "eval_anthropic_toxic_prompts_runtime": 12.0603,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.458,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.083,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3681940245403478,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006311540758298674,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4585773723164939,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33606220631117817,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 243125
    },
    {
      "epoch": 46.68,
      "eval_arxiv_accuracy": 0.3543125,
      "eval_arxiv_bleu_score": 4.510159697244193,
      "eval_arxiv_bleu_score_sem": 0.13235250084794623,
      "eval_arxiv_emb_cos_sim": 0.7854233384132385,
      "eval_arxiv_emb_cos_sim_sem": 0.006893196757842185,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.31634259223938,
      "eval_arxiv_n_ngrams_match_1": 15.692,
      "eval_arxiv_n_ngrams_match_2": 3.098,
      "eval_arxiv_n_ngrams_match_3": 0.696,
      "eval_arxiv_num_pred_words": 40.282,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.559370153545746,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37622325720937877,
      "eval_arxiv_runtime": 17.2826,
      "eval_arxiv_samples_per_second": 28.931,
      "eval_arxiv_steps_per_second": 0.058,
      "eval_arxiv_token_set_f1": 0.3681895761925773,
      "eval_arxiv_token_set_f1_sem": 0.004325737464815855,
      "eval_arxiv_token_set_precision": 0.3205020006401268,
      "eval_arxiv_token_set_recall": 0.4510585337220203,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 243125
    },
    {
      "epoch": 46.68,
      "eval_python_code_alpaca_accuracy": 0.16128125,
      "eval_python_code_alpaca_bleu_score": 4.784843518504221,
      "eval_python_code_alpaca_bleu_score_sem": 0.15087232854001884,
      "eval_python_code_alpaca_emb_cos_sim": 0.7689688205718994,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008392794798749954,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.871361255645752,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.99,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.97,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.002,
      "eval_python_code_alpaca_num_pred_words": 43.066,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.66104303869998,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.345981739899315,
      "eval_python_code_alpaca_runtime": 11.2119,
      "eval_python_code_alpaca_samples_per_second": 44.595,
      "eval_python_code_alpaca_steps_per_second": 0.089,
      "eval_python_code_alpaca_token_set_f1": 0.48245358037137476,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005309595032016044,
      "eval_python_code_alpaca_token_set_precision": 0.5453286615089248,
      "eval_python_code_alpaca_token_set_recall": 0.45740623444184725,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 243125
    },
    {
      "epoch": 46.68,
      "eval_wikibio_accuracy": 0.33078125,
      "eval_wikibio_bleu_score": 6.225084517037956,
      "eval_wikibio_bleu_score_sem": 0.22225109490955056,
      "eval_wikibio_emb_cos_sim": 0.7402669191360474,
      "eval_wikibio_emb_cos_sim_sem": 0.009899571633339242,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.596177339553833,
      "eval_wikibio_n_ngrams_match_1": 10.168,
      "eval_wikibio_n_ngrams_match_2": 3.43,
      "eval_wikibio_n_ngrams_match_3": 1.29,
      "eval_wikibio_num_pred_words": 35.382,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.45859888018138,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36033282229314456,
      "eval_wikibio_runtime": 13.1564,
      "eval_wikibio_samples_per_second": 38.004,
      "eval_wikibio_steps_per_second": 0.076,
      "eval_wikibio_token_set_f1": 0.32314003112975126,
      "eval_wikibio_token_set_f1_sem": 0.0055351492160330236,
      "eval_wikibio_token_set_precision": 0.32881593414703353,
      "eval_wikibio_token_set_recall": 0.3366387826826832,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 243125
    },
    {
      "epoch": 46.68,
      "eval_nq_accuracy": 0.53953125,
      "eval_nq_bleu_score": 12.41028290681622,
      "eval_nq_bleu_score_sem": 0.49611651861096845,
      "eval_nq_emb_cos_sim": 0.8431179523468018,
      "eval_nq_emb_cos_sim_sem": 0.006613498417322748,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1034491062164307,
      "eval_nq_n_ngrams_match_1": 23.65,
      "eval_nq_n_ngrams_match_2": 8.892,
      "eval_nq_n_ngrams_match_3": 4.186,
      "eval_nq_num_pred_words": 49.044,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.194384529612169,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45980285574893875,
      "eval_nq_runtime": 19.4993,
      "eval_nq_samples_per_second": 25.642,
      "eval_nq_steps_per_second": 0.051,
      "eval_nq_token_set_f1": 0.47188570265152296,
      "eval_nq_token_set_f1_sem": 0.0048763636092038695,
      "eval_nq_token_set_precision": 0.43007271010702747,
      "eval_nq_token_set_recall": 0.5305909207414481,
      "eval_nq_true_num_tokens": 64.0,
      "step": 243125
    },
    {
      "epoch": 46.68,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 243132
    },
    {
      "epoch": 46.69,
      "learning_rate": 0.001,
      "loss": 2.4927,
      "step": 243144
    },
    {
      "epoch": 46.69,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 243156
    },
    {
      "epoch": 46.69,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 243168
    },
    {
      "epoch": 46.69,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 243180
    },
    {
      "epoch": 46.7,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 243192
    },
    {
      "epoch": 46.7,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 243204
    },
    {
      "epoch": 46.7,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 243216
    },
    {
      "epoch": 46.7,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 243228
    },
    {
      "epoch": 46.71,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 243240
    },
    {
      "epoch": 46.71,
      "learning_rate": 0.001,
      "loss": 2.4748,
      "step": 243252
    },
    {
      "epoch": 46.71,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 243264
    },
    {
      "epoch": 46.71,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 243276
    },
    {
      "epoch": 46.71,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 243288
    },
    {
      "epoch": 46.72,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 243300
    },
    {
      "epoch": 46.72,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 243312
    },
    {
      "epoch": 46.72,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 243324
    },
    {
      "epoch": 46.72,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 243336
    },
    {
      "epoch": 46.73,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 243348
    },
    {
      "epoch": 46.73,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 243360
    },
    {
      "epoch": 46.73,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 243372
    },
    {
      "epoch": 46.73,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 243384
    },
    {
      "epoch": 46.74,
      "learning_rate": 0.001,
      "loss": 2.4796,
      "step": 243396
    },
    {
      "epoch": 46.74,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 243408
    },
    {
      "epoch": 46.74,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 243420
    },
    {
      "epoch": 46.74,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 243432
    },
    {
      "epoch": 46.74,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 243444
    },
    {
      "epoch": 46.75,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 243456
    },
    {
      "epoch": 46.75,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 243468
    },
    {
      "epoch": 46.75,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 243480
    },
    {
      "epoch": 46.75,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 243492
    },
    {
      "epoch": 46.76,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 243504
    },
    {
      "epoch": 46.76,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 243516
    },
    {
      "epoch": 46.76,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 243528
    },
    {
      "epoch": 46.76,
      "learning_rate": 0.001,
      "loss": 2.4762,
      "step": 243540
    },
    {
      "epoch": 46.76,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 243552
    },
    {
      "epoch": 46.77,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 243564
    },
    {
      "epoch": 46.77,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 243576
    },
    {
      "epoch": 46.77,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 243588
    },
    {
      "epoch": 46.77,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 243600
    },
    {
      "epoch": 46.78,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 243612
    },
    {
      "epoch": 46.78,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 243624
    },
    {
      "epoch": 46.78,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 243636
    },
    {
      "epoch": 46.78,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 243648
    },
    {
      "epoch": 46.79,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 243660
    },
    {
      "epoch": 46.79,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 243672
    },
    {
      "epoch": 46.79,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 243684
    },
    {
      "epoch": 46.79,
      "learning_rate": 0.001,
      "loss": 2.495,
      "step": 243696
    },
    {
      "epoch": 46.79,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 243708
    },
    {
      "epoch": 46.8,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 243720
    },
    {
      "epoch": 46.8,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 243732
    },
    {
      "epoch": 46.8,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 243744
    },
    {
      "epoch": 46.8,
      "eval_ag_news_accuracy": 0.33053125,
      "eval_ag_news_bleu_score": 5.039099309980454,
      "eval_ag_news_bleu_score_sem": 0.16045699538588595,
      "eval_ag_news_emb_cos_sim": 0.8177430033683777,
      "eval_ag_news_emb_cos_sim_sem": 0.007100475617363269,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.455599546432495,
      "eval_ag_news_n_ngrams_match_1": 14.558,
      "eval_ag_news_n_ngrams_match_2": 3.358,
      "eval_ag_news_n_ngrams_match_3": 0.954,
      "eval_ag_news_num_pred_words": 46.75,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.677274987330208,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36112618782361333,
      "eval_ag_news_runtime": 30.2062,
      "eval_ag_news_samples_per_second": 16.553,
      "eval_ag_news_steps_per_second": 0.033,
      "eval_ag_news_token_set_f1": 0.3601579649836729,
      "eval_ag_news_token_set_f1_sem": 0.004504752513229392,
      "eval_ag_news_token_set_precision": 0.34783631246687213,
      "eval_ag_news_token_set_recall": 0.388737858799268,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 243750
    },
    {
      "epoch": 46.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.1168125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2752443221500096,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12777298422486882,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6866858005523682,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009372066287188448,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1995556354522705,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.398,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.552,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.52163123215981,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2185410184506723,
      "eval_anthropic_toxic_prompts_runtime": 36.0892,
      "eval_anthropic_toxic_prompts_samples_per_second": 13.855,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.028,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36072478194335755,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006357141499689125,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4523530944084032,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3261110783356425,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 243750
    },
    {
      "epoch": 46.8,
      "eval_arxiv_accuracy": 0.35609375,
      "eval_arxiv_bleu_score": 4.447450755461512,
      "eval_arxiv_bleu_score_sem": 0.124638000482428,
      "eval_arxiv_emb_cos_sim": 0.780614972114563,
      "eval_arxiv_emb_cos_sim_sem": 0.00677338709696038,
      "eval_arxiv_emb_top1_equal": 0.3515625,
      "eval_arxiv_emb_top1_equal_sem": 0.04236756101983345,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3115153312683105,
      "eval_arxiv_n_ngrams_match_1": 15.756,
      "eval_arxiv_n_ngrams_match_2": 3.092,
      "eval_arxiv_n_ngrams_match_3": 0.662,
      "eval_arxiv_num_pred_words": 41.334,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.426654465962347,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3740744791341803,
      "eval_arxiv_runtime": 30.6156,
      "eval_arxiv_samples_per_second": 16.332,
      "eval_arxiv_steps_per_second": 0.033,
      "eval_arxiv_token_set_f1": 0.36621881777434234,
      "eval_arxiv_token_set_f1_sem": 0.004211189609961602,
      "eval_arxiv_token_set_precision": 0.3213277691125244,
      "eval_arxiv_token_set_recall": 0.4422843764241028,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 243750
    },
    {
      "epoch": 46.8,
      "eval_python_code_alpaca_accuracy": 0.16175,
      "eval_python_code_alpaca_bleu_score": 4.742927892019021,
      "eval_python_code_alpaca_bleu_score_sem": 0.14746088091806286,
      "eval_python_code_alpaca_emb_cos_sim": 0.7770613431930542,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.0074594903755131605,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8479361534118652,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.962,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.916,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.974,
      "eval_python_code_alpaca_num_pred_words": 42.832,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.252139304014797,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3460431630138763,
      "eval_python_code_alpaca_runtime": 30.1762,
      "eval_python_code_alpaca_samples_per_second": 16.569,
      "eval_python_code_alpaca_steps_per_second": 0.033,
      "eval_python_code_alpaca_token_set_f1": 0.48362512000929325,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005489204647118722,
      "eval_python_code_alpaca_token_set_precision": 0.5414038521108293,
      "eval_python_code_alpaca_token_set_recall": 0.46068559773746326,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 243750
    },
    {
      "epoch": 46.8,
      "eval_wikibio_accuracy": 0.33084375,
      "eval_wikibio_bleu_score": 6.234119576658955,
      "eval_wikibio_bleu_score_sem": 0.2183858152906804,
      "eval_wikibio_emb_cos_sim": 0.7522262334823608,
      "eval_wikibio_emb_cos_sim_sem": 0.008664405554166855,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.605022668838501,
      "eval_wikibio_n_ngrams_match_1": 10.374,
      "eval_wikibio_n_ngrams_match_2": 3.548,
      "eval_wikibio_n_ngrams_match_3": 1.338,
      "eval_wikibio_num_pred_words": 36.764,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.78251766474602,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3669585286582726,
      "eval_wikibio_runtime": 29.993,
      "eval_wikibio_samples_per_second": 16.671,
      "eval_wikibio_steps_per_second": 0.033,
      "eval_wikibio_token_set_f1": 0.32738013850868886,
      "eval_wikibio_token_set_f1_sem": 0.005082182857883322,
      "eval_wikibio_token_set_precision": 0.3353376015714099,
      "eval_wikibio_token_set_recall": 0.3359335564814072,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 243750
    },
    {
      "epoch": 46.8,
      "eval_nq_accuracy": 0.537875,
      "eval_nq_bleu_score": 12.316738043884445,
      "eval_nq_bleu_score_sem": 0.49831910958334596,
      "eval_nq_emb_cos_sim": 0.8426940441131592,
      "eval_nq_emb_cos_sim_sem": 0.0066900482894751674,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.108543634414673,
      "eval_nq_n_ngrams_match_1": 23.73,
      "eval_nq_n_ngrams_match_2": 8.872,
      "eval_nq_n_ngrams_match_3": 4.14,
      "eval_nq_num_pred_words": 49.178,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.236237572898242,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4599833752389065,
      "eval_nq_runtime": 31.5153,
      "eval_nq_samples_per_second": 15.865,
      "eval_nq_steps_per_second": 0.032,
      "eval_nq_token_set_f1": 0.4748004010561954,
      "eval_nq_token_set_f1_sem": 0.004913855745807401,
      "eval_nq_token_set_precision": 0.43466219557873326,
      "eval_nq_token_set_recall": 0.5296665214234823,
      "eval_nq_true_num_tokens": 64.0,
      "step": 243750
    },
    {
      "epoch": 46.8,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 243756
    },
    {
      "epoch": 46.81,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 243768
    },
    {
      "epoch": 46.81,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 243780
    },
    {
      "epoch": 46.81,
      "learning_rate": 0.001,
      "loss": 2.4906,
      "step": 243792
    },
    {
      "epoch": 46.81,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 243804
    },
    {
      "epoch": 46.82,
      "learning_rate": 0.001,
      "loss": 2.4798,
      "step": 243816
    },
    {
      "epoch": 46.82,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 243828
    },
    {
      "epoch": 46.82,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 243840
    },
    {
      "epoch": 46.82,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 243852
    },
    {
      "epoch": 46.82,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 243864
    },
    {
      "epoch": 46.83,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 243876
    },
    {
      "epoch": 46.83,
      "learning_rate": 0.001,
      "loss": 2.4922,
      "step": 243888
    },
    {
      "epoch": 46.83,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 243900
    },
    {
      "epoch": 46.83,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 243912
    },
    {
      "epoch": 46.84,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 243924
    },
    {
      "epoch": 46.84,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 243936
    },
    {
      "epoch": 46.84,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 243948
    },
    {
      "epoch": 46.84,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 243960
    },
    {
      "epoch": 46.85,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 243972
    },
    {
      "epoch": 46.85,
      "learning_rate": 0.001,
      "loss": 2.4912,
      "step": 243984
    },
    {
      "epoch": 46.85,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 243996
    },
    {
      "epoch": 46.85,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 244008
    },
    {
      "epoch": 46.85,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 244020
    },
    {
      "epoch": 46.86,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 244032
    },
    {
      "epoch": 46.86,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 244044
    },
    {
      "epoch": 46.86,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 244056
    },
    {
      "epoch": 46.86,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 244068
    },
    {
      "epoch": 46.87,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 244080
    },
    {
      "epoch": 46.87,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 244092
    },
    {
      "epoch": 46.87,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 244104
    },
    {
      "epoch": 46.87,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 244116
    },
    {
      "epoch": 46.88,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 244128
    },
    {
      "epoch": 46.88,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 244140
    },
    {
      "epoch": 46.88,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 244152
    },
    {
      "epoch": 46.88,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 244164
    },
    {
      "epoch": 46.88,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 244176
    },
    {
      "epoch": 46.89,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 244188
    },
    {
      "epoch": 46.89,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 244200
    },
    {
      "epoch": 46.89,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 244212
    },
    {
      "epoch": 46.89,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 244224
    },
    {
      "epoch": 46.9,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 244236
    },
    {
      "epoch": 46.9,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 244248
    },
    {
      "epoch": 46.9,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 244260
    },
    {
      "epoch": 46.9,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 244272
    },
    {
      "epoch": 46.91,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 244284
    },
    {
      "epoch": 46.91,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 244296
    },
    {
      "epoch": 46.91,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 244308
    },
    {
      "epoch": 46.91,
      "learning_rate": 0.001,
      "loss": 2.4917,
      "step": 244320
    },
    {
      "epoch": 46.91,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 244332
    },
    {
      "epoch": 46.92,
      "learning_rate": 0.001,
      "loss": 2.4944,
      "step": 244344
    },
    {
      "epoch": 46.92,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 244356
    },
    {
      "epoch": 46.92,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 244368
    },
    {
      "epoch": 46.92,
      "eval_ag_news_accuracy": 0.3298125,
      "eval_ag_news_bleu_score": 4.962390928750171,
      "eval_ag_news_bleu_score_sem": 0.1577795556939404,
      "eval_ag_news_emb_cos_sim": 0.8230937719345093,
      "eval_ag_news_emb_cos_sim_sem": 0.006215335822351316,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.45223331451416,
      "eval_ag_news_n_ngrams_match_1": 14.468,
      "eval_ag_news_n_ngrams_match_2": 3.23,
      "eval_ag_news_n_ngrams_match_3": 0.89,
      "eval_ag_news_num_pred_words": 46.366,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.570821207760844,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.363137736754119,
      "eval_ag_news_runtime": 37.4267,
      "eval_ag_news_samples_per_second": 13.359,
      "eval_ag_news_steps_per_second": 0.027,
      "eval_ag_news_token_set_f1": 0.3607285461914485,
      "eval_ag_news_token_set_f1_sem": 0.004437250615944355,
      "eval_ag_news_token_set_precision": 0.3469920166092059,
      "eval_ag_news_token_set_recall": 0.3902508826310556,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 244375
    },
    {
      "epoch": 46.92,
      "eval_anthropic_toxic_prompts_accuracy": 0.11771875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2925499486339223,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1278967785829679,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6840674877166748,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009150309326659363,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.173318386077881,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.468,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.026,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.686,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.886618006451442,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2183830024605264,
      "eval_anthropic_toxic_prompts_runtime": 30.6044,
      "eval_anthropic_toxic_prompts_samples_per_second": 16.338,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.033,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3618598421796209,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006626758408281386,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45160102785618533,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32704402421459855,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 244375
    },
    {
      "epoch": 46.92,
      "eval_arxiv_accuracy": 0.35521875,
      "eval_arxiv_bleu_score": 4.485724999979893,
      "eval_arxiv_bleu_score_sem": 0.131572420176096,
      "eval_arxiv_emb_cos_sim": 0.7817816734313965,
      "eval_arxiv_emb_cos_sim_sem": 0.007995185381165228,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.310394287109375,
      "eval_arxiv_n_ngrams_match_1": 15.606,
      "eval_arxiv_n_ngrams_match_2": 3.094,
      "eval_arxiv_n_ngrams_match_3": 0.692,
      "eval_arxiv_num_pred_words": 40.19,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.395925202822838,
      "eval_arxiv_pred_num_tokens": 62.9140625,
      "eval_arxiv_rouge_score": 0.3742979304523928,
      "eval_arxiv_runtime": 29.2873,
      "eval_arxiv_samples_per_second": 17.072,
      "eval_arxiv_steps_per_second": 0.034,
      "eval_arxiv_token_set_f1": 0.36926164088446767,
      "eval_arxiv_token_set_f1_sem": 0.004198561910363197,
      "eval_arxiv_token_set_precision": 0.3211493159963727,
      "eval_arxiv_token_set_recall": 0.4571534011221035,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 244375
    },
    {
      "epoch": 46.92,
      "eval_python_code_alpaca_accuracy": 0.16421875,
      "eval_python_code_alpaca_bleu_score": 4.869395686517465,
      "eval_python_code_alpaca_bleu_score_sem": 0.16228264601249945,
      "eval_python_code_alpaca_emb_cos_sim": 0.7633101940155029,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00776597633098381,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.843024492263794,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.09,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.028,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.034,
      "eval_python_code_alpaca_num_pred_words": 43.308,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.16761040027448,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3443562091412039,
      "eval_python_code_alpaca_runtime": 30.5049,
      "eval_python_code_alpaca_samples_per_second": 16.391,
      "eval_python_code_alpaca_steps_per_second": 0.033,
      "eval_python_code_alpaca_token_set_f1": 0.48226221929118135,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005672199341244246,
      "eval_python_code_alpaca_token_set_precision": 0.5531533212175661,
      "eval_python_code_alpaca_token_set_recall": 0.45301052435937644,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 244375
    },
    {
      "epoch": 46.92,
      "eval_wikibio_accuracy": 0.33259375,
      "eval_wikibio_bleu_score": 6.365955373794853,
      "eval_wikibio_bleu_score_sem": 0.22052584561828015,
      "eval_wikibio_emb_cos_sim": 0.7567028999328613,
      "eval_wikibio_emb_cos_sim_sem": 0.008532059181486583,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.593489408493042,
      "eval_wikibio_n_ngrams_match_1": 10.31,
      "eval_wikibio_n_ngrams_match_2": 3.572,
      "eval_wikibio_n_ngrams_match_3": 1.39,
      "eval_wikibio_num_pred_words": 36.002,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.36073226809527,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36272023720448,
      "eval_wikibio_runtime": 21.9374,
      "eval_wikibio_samples_per_second": 22.792,
      "eval_wikibio_steps_per_second": 0.046,
      "eval_wikibio_token_set_f1": 0.328821071063146,
      "eval_wikibio_token_set_f1_sem": 0.005372802226609237,
      "eval_wikibio_token_set_precision": 0.3361162671775071,
      "eval_wikibio_token_set_recall": 0.3371563485334888,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 244375
    },
    {
      "epoch": 46.92,
      "eval_nq_accuracy": 0.53909375,
      "eval_nq_bleu_score": 12.432785528827589,
      "eval_nq_bleu_score_sem": 0.5016220057976496,
      "eval_nq_emb_cos_sim": 0.8387429714202881,
      "eval_nq_emb_cos_sim_sem": 0.0078774692332743,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1078877449035645,
      "eval_nq_n_ngrams_match_1": 23.53,
      "eval_nq_n_ngrams_match_2": 8.954,
      "eval_nq_n_ngrams_match_3": 4.226,
      "eval_nq_num_pred_words": 48.91,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.230837282253772,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4560999104233717,
      "eval_nq_runtime": 30.6294,
      "eval_nq_samples_per_second": 16.324,
      "eval_nq_steps_per_second": 0.033,
      "eval_nq_token_set_f1": 0.47177922781546106,
      "eval_nq_token_set_f1_sem": 0.005179308555629063,
      "eval_nq_token_set_precision": 0.4288001810334077,
      "eval_nq_token_set_recall": 0.5334257556443286,
      "eval_nq_true_num_tokens": 64.0,
      "step": 244375
    },
    {
      "epoch": 46.92,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 244380
    },
    {
      "epoch": 46.93,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 244392
    },
    {
      "epoch": 46.93,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 244404
    },
    {
      "epoch": 46.93,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 244416
    },
    {
      "epoch": 46.93,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 244428
    },
    {
      "epoch": 46.94,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 244440
    },
    {
      "epoch": 46.94,
      "learning_rate": 0.001,
      "loss": 2.4969,
      "step": 244452
    },
    {
      "epoch": 46.94,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 244464
    },
    {
      "epoch": 46.94,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 244476
    },
    {
      "epoch": 46.94,
      "learning_rate": 0.001,
      "loss": 2.5013,
      "step": 244488
    },
    {
      "epoch": 46.95,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 244500
    },
    {
      "epoch": 46.95,
      "learning_rate": 0.001,
      "loss": 2.4934,
      "step": 244512
    },
    {
      "epoch": 46.95,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 244524
    },
    {
      "epoch": 46.95,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 244536
    },
    {
      "epoch": 46.96,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 244548
    },
    {
      "epoch": 46.96,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 244560
    },
    {
      "epoch": 46.96,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 244572
    },
    {
      "epoch": 46.96,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 244584
    },
    {
      "epoch": 46.97,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 244596
    },
    {
      "epoch": 46.97,
      "learning_rate": 0.001,
      "loss": 2.4962,
      "step": 244608
    },
    {
      "epoch": 46.97,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 244620
    },
    {
      "epoch": 46.97,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 244632
    },
    {
      "epoch": 46.97,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 244644
    },
    {
      "epoch": 46.98,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 244656
    },
    {
      "epoch": 46.98,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 244668
    },
    {
      "epoch": 46.98,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 244680
    },
    {
      "epoch": 46.98,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 244692
    },
    {
      "epoch": 46.99,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 244704
    },
    {
      "epoch": 46.99,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 244716
    },
    {
      "epoch": 46.99,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 244728
    },
    {
      "epoch": 46.99,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 244740
    },
    {
      "epoch": 47.0,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 244752
    },
    {
      "epoch": 47.0,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 244764
    },
    {
      "epoch": 47.0,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 244776
    },
    {
      "epoch": 47.0,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 244788
    },
    {
      "epoch": 47.0,
      "learning_rate": 0.001,
      "loss": 2.4753,
      "step": 244800
    },
    {
      "epoch": 47.01,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 244812
    },
    {
      "epoch": 47.01,
      "learning_rate": 0.001,
      "loss": 2.4731,
      "step": 244824
    },
    {
      "epoch": 47.01,
      "learning_rate": 0.001,
      "loss": 2.4642,
      "step": 244836
    },
    {
      "epoch": 47.01,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 244848
    },
    {
      "epoch": 47.02,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 244860
    },
    {
      "epoch": 47.02,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 244872
    },
    {
      "epoch": 47.02,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 244884
    },
    {
      "epoch": 47.02,
      "learning_rate": 0.001,
      "loss": 2.4733,
      "step": 244896
    },
    {
      "epoch": 47.03,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 244908
    },
    {
      "epoch": 47.03,
      "learning_rate": 0.001,
      "loss": 2.4744,
      "step": 244920
    },
    {
      "epoch": 47.03,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 244932
    },
    {
      "epoch": 47.03,
      "learning_rate": 0.001,
      "loss": 2.4685,
      "step": 244944
    },
    {
      "epoch": 47.03,
      "learning_rate": 0.001,
      "loss": 2.4761,
      "step": 244956
    },
    {
      "epoch": 47.04,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 244968
    },
    {
      "epoch": 47.04,
      "learning_rate": 0.001,
      "loss": 2.4768,
      "step": 244980
    },
    {
      "epoch": 47.04,
      "learning_rate": 0.001,
      "loss": 2.4701,
      "step": 244992
    },
    {
      "epoch": 47.04,
      "eval_ag_news_accuracy": 0.33209375,
      "eval_ag_news_bleu_score": 4.925295757937099,
      "eval_ag_news_bleu_score_sem": 0.15157324942359807,
      "eval_ag_news_emb_cos_sim": 0.8138347864151001,
      "eval_ag_news_emb_cos_sim_sem": 0.00782541985099687,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.44590425491333,
      "eval_ag_news_n_ngrams_match_1": 14.496,
      "eval_ag_news_n_ngrams_match_2": 3.304,
      "eval_ag_news_n_ngrams_match_3": 0.908,
      "eval_ag_news_num_pred_words": 46.722,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.37163858292946,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.35854532559491736,
      "eval_ag_news_runtime": 12.0986,
      "eval_ag_news_samples_per_second": 41.327,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.35947735744225884,
      "eval_ag_news_token_set_f1_sem": 0.004478944389067876,
      "eval_ag_news_token_set_precision": 0.3464737878193696,
      "eval_ag_news_token_set_recall": 0.38848512375453,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 245000
    },
    {
      "epoch": 47.04,
      "eval_anthropic_toxic_prompts_accuracy": 0.11784375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.435449649711825,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12565953244350866,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6939759850502014,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008376048011911953,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1880574226379395,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.574,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.122,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.816,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.346,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.24129109161105,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.225962911332243,
      "eval_anthropic_toxic_prompts_runtime": 12.0806,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.389,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.083,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3658123598298576,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006262961764184194,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4649568963644189,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32727427145892146,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 245000
    },
    {
      "epoch": 47.04,
      "eval_arxiv_accuracy": 0.35521875,
      "eval_arxiv_bleu_score": 4.5693387377181995,
      "eval_arxiv_bleu_score_sem": 0.13515266858895042,
      "eval_arxiv_emb_cos_sim": 0.7914153933525085,
      "eval_arxiv_emb_cos_sim_sem": 0.006605425320199388,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.315833806991577,
      "eval_arxiv_n_ngrams_match_1": 15.798,
      "eval_arxiv_n_ngrams_match_2": 3.154,
      "eval_arxiv_n_ngrams_match_3": 0.706,
      "eval_arxiv_num_pred_words": 41.268,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.545351919010752,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3768625888130388,
      "eval_arxiv_runtime": 11.8295,
      "eval_arxiv_samples_per_second": 42.267,
      "eval_arxiv_steps_per_second": 0.085,
      "eval_arxiv_token_set_f1": 0.37089241852354093,
      "eval_arxiv_token_set_f1_sem": 0.003965028715257392,
      "eval_arxiv_token_set_precision": 0.3237756862481294,
      "eval_arxiv_token_set_recall": 0.4496472154634021,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 245000
    },
    {
      "epoch": 47.04,
      "eval_python_code_alpaca_accuracy": 0.16203125,
      "eval_python_code_alpaca_bleu_score": 4.6709153834448,
      "eval_python_code_alpaca_bleu_score_sem": 0.1489269565286898,
      "eval_python_code_alpaca_emb_cos_sim": 0.7627075910568237,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009097241834572592,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.865194797515869,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.93,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.962,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.996,
      "eval_python_code_alpaca_num_pred_words": 43.102,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.55247204953714,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34162463797619924,
      "eval_python_code_alpaca_runtime": 11.6621,
      "eval_python_code_alpaca_samples_per_second": 42.874,
      "eval_python_code_alpaca_steps_per_second": 0.086,
      "eval_python_code_alpaca_token_set_f1": 0.478837769366088,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0056724362378326055,
      "eval_python_code_alpaca_token_set_precision": 0.5454503489781254,
      "eval_python_code_alpaca_token_set_recall": 0.4472350972555811,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 245000
    },
    {
      "epoch": 47.04,
      "eval_wikibio_accuracy": 0.33003125,
      "eval_wikibio_bleu_score": 6.144214884836507,
      "eval_wikibio_bleu_score_sem": 0.21037107462738655,
      "eval_wikibio_emb_cos_sim": 0.7528545260429382,
      "eval_wikibio_emb_cos_sim_sem": 0.008942246140629271,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6246695518493652,
      "eval_wikibio_n_ngrams_match_1": 10.168,
      "eval_wikibio_n_ngrams_match_2": 3.412,
      "eval_wikibio_n_ngrams_match_3": 1.272,
      "eval_wikibio_num_pred_words": 36.168,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.512325232778636,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36010652868234394,
      "eval_wikibio_runtime": 14.9713,
      "eval_wikibio_samples_per_second": 33.397,
      "eval_wikibio_steps_per_second": 0.067,
      "eval_wikibio_token_set_f1": 0.3227942991028087,
      "eval_wikibio_token_set_f1_sem": 0.005350308801267859,
      "eval_wikibio_token_set_precision": 0.3317673437737749,
      "eval_wikibio_token_set_recall": 0.3311929048937341,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 245000
    },
    {
      "epoch": 47.04,
      "eval_nq_accuracy": 0.538875,
      "eval_nq_bleu_score": 12.48542827698002,
      "eval_nq_bleu_score_sem": 0.48766691134829954,
      "eval_nq_emb_cos_sim": 0.8438307046890259,
      "eval_nq_emb_cos_sim_sem": 0.0068573436191940815,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.105804681777954,
      "eval_nq_n_ngrams_match_1": 23.758,
      "eval_nq_n_ngrams_match_2": 8.998,
      "eval_nq_n_ngrams_match_3": 4.22,
      "eval_nq_num_pred_words": 49.148,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.21370977365225,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46308293267395306,
      "eval_nq_runtime": 33.8755,
      "eval_nq_samples_per_second": 14.76,
      "eval_nq_steps_per_second": 0.03,
      "eval_nq_token_set_f1": 0.47685870947935743,
      "eval_nq_token_set_f1_sem": 0.004842791199554861,
      "eval_nq_token_set_precision": 0.4353981588254932,
      "eval_nq_token_set_recall": 0.5363340850169116,
      "eval_nq_true_num_tokens": 64.0,
      "step": 245000
    },
    {
      "epoch": 47.04,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 245004
    },
    {
      "epoch": 47.05,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 245016
    },
    {
      "epoch": 47.05,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 245028
    },
    {
      "epoch": 47.05,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 245040
    },
    {
      "epoch": 47.05,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 245052
    },
    {
      "epoch": 47.06,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 245064
    },
    {
      "epoch": 47.06,
      "learning_rate": 0.001,
      "loss": 2.472,
      "step": 245076
    },
    {
      "epoch": 47.06,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 245088
    },
    {
      "epoch": 47.06,
      "learning_rate": 0.001,
      "loss": 2.4782,
      "step": 245100
    },
    {
      "epoch": 47.06,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 245112
    },
    {
      "epoch": 47.07,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 245124
    },
    {
      "epoch": 47.07,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 245136
    },
    {
      "epoch": 47.07,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 245148
    },
    {
      "epoch": 47.07,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 245160
    },
    {
      "epoch": 47.08,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 245172
    },
    {
      "epoch": 47.08,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 245184
    },
    {
      "epoch": 47.08,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 245196
    },
    {
      "epoch": 47.08,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 245208
    },
    {
      "epoch": 47.09,
      "learning_rate": 0.001,
      "loss": 2.472,
      "step": 245220
    },
    {
      "epoch": 47.09,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 245232
    },
    {
      "epoch": 47.09,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 245244
    },
    {
      "epoch": 47.09,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 245256
    },
    {
      "epoch": 47.09,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 245268
    },
    {
      "epoch": 47.1,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 245280
    },
    {
      "epoch": 47.1,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 245292
    },
    {
      "epoch": 47.1,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 245304
    },
    {
      "epoch": 47.1,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 245316
    },
    {
      "epoch": 47.11,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 245328
    },
    {
      "epoch": 47.11,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 245340
    },
    {
      "epoch": 47.11,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 245352
    },
    {
      "epoch": 47.11,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 245364
    },
    {
      "epoch": 47.12,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 245376
    },
    {
      "epoch": 47.12,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 245388
    },
    {
      "epoch": 47.12,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 245400
    },
    {
      "epoch": 47.12,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 245412
    },
    {
      "epoch": 47.12,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 245424
    },
    {
      "epoch": 47.13,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 245436
    },
    {
      "epoch": 47.13,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 245448
    },
    {
      "epoch": 47.13,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 245460
    },
    {
      "epoch": 47.13,
      "learning_rate": 0.001,
      "loss": 2.4741,
      "step": 245472
    },
    {
      "epoch": 47.14,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 245484
    },
    {
      "epoch": 47.14,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 245496
    },
    {
      "epoch": 47.14,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 245508
    },
    {
      "epoch": 47.14,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 245520
    },
    {
      "epoch": 47.15,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 245532
    },
    {
      "epoch": 47.15,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 245544
    },
    {
      "epoch": 47.15,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 245556
    },
    {
      "epoch": 47.15,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 245568
    },
    {
      "epoch": 47.15,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 245580
    },
    {
      "epoch": 47.16,
      "learning_rate": 0.001,
      "loss": 2.4733,
      "step": 245592
    },
    {
      "epoch": 47.16,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 245604
    },
    {
      "epoch": 47.16,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 245616
    },
    {
      "epoch": 47.16,
      "eval_ag_news_accuracy": 0.32975,
      "eval_ag_news_bleu_score": 5.095891868333192,
      "eval_ag_news_bleu_score_sem": 0.15943742532720626,
      "eval_ag_news_emb_cos_sim": 0.8234723210334778,
      "eval_ag_news_emb_cos_sim_sem": 0.006856294557792865,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4530296325683594,
      "eval_ag_news_n_ngrams_match_1": 14.62,
      "eval_ag_news_n_ngrams_match_2": 3.358,
      "eval_ag_news_n_ngrams_match_3": 0.938,
      "eval_ag_news_num_pred_words": 46.804,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.595971635215168,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3638075014427663,
      "eval_ag_news_runtime": 34.6683,
      "eval_ag_news_samples_per_second": 14.422,
      "eval_ag_news_steps_per_second": 0.029,
      "eval_ag_news_token_set_f1": 0.361439056316256,
      "eval_ag_news_token_set_f1_sem": 0.00458129653332981,
      "eval_ag_news_token_set_precision": 0.3496636224836855,
      "eval_ag_news_token_set_recall": 0.38659686179655955,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 245625
    },
    {
      "epoch": 47.16,
      "eval_anthropic_toxic_prompts_accuracy": 0.1173125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.4667467996448904,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1355228854603887,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.693714439868927,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00836533480985609,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2145566940307617,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.556,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.124,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.88,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.834,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.89225457984562,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22428777102998337,
      "eval_anthropic_toxic_prompts_runtime": 34.9172,
      "eval_anthropic_toxic_prompts_samples_per_second": 14.32,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.029,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36637338820197096,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006580240863976744,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.46019843590764575,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32943045333637133,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 245625
    },
    {
      "epoch": 47.16,
      "eval_arxiv_accuracy": 0.35496875,
      "eval_arxiv_bleu_score": 4.670369731043923,
      "eval_arxiv_bleu_score_sem": 0.13696372933979614,
      "eval_arxiv_emb_cos_sim": 0.7951237559318542,
      "eval_arxiv_emb_cos_sim_sem": 0.005358955983091984,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3004679679870605,
      "eval_arxiv_n_ngrams_match_1": 15.94,
      "eval_arxiv_n_ngrams_match_2": 3.212,
      "eval_arxiv_n_ngrams_match_3": 0.73,
      "eval_arxiv_num_pred_words": 41.522,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.12532973693376,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3792798416906389,
      "eval_arxiv_runtime": 32.6239,
      "eval_arxiv_samples_per_second": 15.326,
      "eval_arxiv_steps_per_second": 0.031,
      "eval_arxiv_token_set_f1": 0.36966386096588283,
      "eval_arxiv_token_set_f1_sem": 0.004125212815473106,
      "eval_arxiv_token_set_precision": 0.32719840939950184,
      "eval_arxiv_token_set_recall": 0.44019621005456366,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 245625
    },
    {
      "epoch": 47.16,
      "eval_python_code_alpaca_accuracy": 0.16253125,
      "eval_python_code_alpaca_bleu_score": 4.976036605527539,
      "eval_python_code_alpaca_bleu_score_sem": 0.157347021349905,
      "eval_python_code_alpaca_emb_cos_sim": 0.7751104831695557,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007062985337068321,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8574025630950928,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.192,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.09,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.078,
      "eval_python_code_alpaca_num_pred_words": 42.92,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.41623057478644,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.35352815037520247,
      "eval_python_code_alpaca_runtime": 26.9993,
      "eval_python_code_alpaca_samples_per_second": 18.519,
      "eval_python_code_alpaca_steps_per_second": 0.037,
      "eval_python_code_alpaca_token_set_f1": 0.48542807527786097,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00523008801301433,
      "eval_python_code_alpaca_token_set_precision": 0.5576286797307609,
      "eval_python_code_alpaca_token_set_recall": 0.4498511173790471,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 245625
    },
    {
      "epoch": 47.16,
      "eval_wikibio_accuracy": 0.333125,
      "eval_wikibio_bleu_score": 6.44085541546715,
      "eval_wikibio_bleu_score_sem": 0.23410197951135908,
      "eval_wikibio_emb_cos_sim": 0.7461103796958923,
      "eval_wikibio_emb_cos_sim_sem": 0.008843071971905106,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6005210876464844,
      "eval_wikibio_n_ngrams_match_1": 10.222,
      "eval_wikibio_n_ngrams_match_2": 3.568,
      "eval_wikibio_n_ngrams_match_3": 1.416,
      "eval_wikibio_num_pred_words": 35.742,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.617310301194934,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36266366691392105,
      "eval_wikibio_runtime": 32.5894,
      "eval_wikibio_samples_per_second": 15.342,
      "eval_wikibio_steps_per_second": 0.031,
      "eval_wikibio_token_set_f1": 0.32505223914894016,
      "eval_wikibio_token_set_f1_sem": 0.005395906597591434,
      "eval_wikibio_token_set_precision": 0.3342663134899181,
      "eval_wikibio_token_set_recall": 0.33298039057251944,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 245625
    },
    {
      "epoch": 47.16,
      "eval_nq_accuracy": 0.53975,
      "eval_nq_bleu_score": 12.431233616869063,
      "eval_nq_bleu_score_sem": 0.5039949780717249,
      "eval_nq_emb_cos_sim": 0.8445185422897339,
      "eval_nq_emb_cos_sim_sem": 0.006614425647989987,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.106003522872925,
      "eval_nq_n_ngrams_match_1": 23.414,
      "eval_nq_n_ngrams_match_2": 8.87,
      "eval_nq_n_ngrams_match_3": 4.204,
      "eval_nq_num_pred_words": 48.94,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.215343159084107,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4548226045607402,
      "eval_nq_runtime": 23.735,
      "eval_nq_samples_per_second": 21.066,
      "eval_nq_steps_per_second": 0.042,
      "eval_nq_token_set_f1": 0.47117816099443643,
      "eval_nq_token_set_f1_sem": 0.004860512411022548,
      "eval_nq_token_set_precision": 0.42950036872631,
      "eval_nq_token_set_recall": 0.5296503290431468,
      "eval_nq_true_num_tokens": 64.0,
      "step": 245625
    },
    {
      "epoch": 47.16,
      "learning_rate": 0.001,
      "loss": 2.4759,
      "step": 245628
    },
    {
      "epoch": 47.17,
      "learning_rate": 0.001,
      "loss": 2.4771,
      "step": 245640
    },
    {
      "epoch": 47.17,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 245652
    },
    {
      "epoch": 47.17,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 245664
    },
    {
      "epoch": 47.17,
      "learning_rate": 0.001,
      "loss": 2.4725,
      "step": 245676
    },
    {
      "epoch": 47.18,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 245688
    },
    {
      "epoch": 47.18,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 245700
    },
    {
      "epoch": 47.18,
      "learning_rate": 0.001,
      "loss": 2.4758,
      "step": 245712
    },
    {
      "epoch": 47.18,
      "learning_rate": 0.001,
      "loss": 2.4701,
      "step": 245724
    },
    {
      "epoch": 47.18,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 245736
    },
    {
      "epoch": 47.19,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 245748
    },
    {
      "epoch": 47.19,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 245760
    },
    {
      "epoch": 47.19,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 245772
    },
    {
      "epoch": 47.19,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 245784
    },
    {
      "epoch": 47.2,
      "learning_rate": 0.001,
      "loss": 2.4746,
      "step": 245796
    },
    {
      "epoch": 47.2,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 245808
    },
    {
      "epoch": 47.2,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 245820
    },
    {
      "epoch": 47.2,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 245832
    },
    {
      "epoch": 47.21,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 245844
    },
    {
      "epoch": 47.21,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 245856
    },
    {
      "epoch": 47.21,
      "learning_rate": 0.001,
      "loss": 2.4768,
      "step": 245868
    },
    {
      "epoch": 47.21,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 245880
    },
    {
      "epoch": 47.21,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 245892
    },
    {
      "epoch": 47.22,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 245904
    },
    {
      "epoch": 47.22,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 245916
    },
    {
      "epoch": 47.22,
      "learning_rate": 0.001,
      "loss": 2.4797,
      "step": 245928
    },
    {
      "epoch": 47.22,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 245940
    },
    {
      "epoch": 47.23,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 245952
    },
    {
      "epoch": 47.23,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 245964
    },
    {
      "epoch": 47.23,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 245976
    },
    {
      "epoch": 47.23,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 245988
    },
    {
      "epoch": 47.24,
      "learning_rate": 0.001,
      "loss": 2.4698,
      "step": 246000
    },
    {
      "epoch": 47.24,
      "learning_rate": 0.001,
      "loss": 2.4706,
      "step": 246012
    },
    {
      "epoch": 47.24,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 246024
    },
    {
      "epoch": 47.24,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 246036
    },
    {
      "epoch": 47.24,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 246048
    },
    {
      "epoch": 47.25,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 246060
    },
    {
      "epoch": 47.25,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 246072
    },
    {
      "epoch": 47.25,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 246084
    },
    {
      "epoch": 47.25,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 246096
    },
    {
      "epoch": 47.26,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 246108
    },
    {
      "epoch": 47.26,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 246120
    },
    {
      "epoch": 47.26,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 246132
    },
    {
      "epoch": 47.26,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 246144
    },
    {
      "epoch": 47.26,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 246156
    },
    {
      "epoch": 47.27,
      "learning_rate": 0.001,
      "loss": 2.4759,
      "step": 246168
    },
    {
      "epoch": 47.27,
      "learning_rate": 0.001,
      "loss": 2.4736,
      "step": 246180
    },
    {
      "epoch": 47.27,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 246192
    },
    {
      "epoch": 47.27,
      "learning_rate": 0.001,
      "loss": 2.4769,
      "step": 246204
    },
    {
      "epoch": 47.28,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 246216
    },
    {
      "epoch": 47.28,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 246228
    },
    {
      "epoch": 47.28,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 246240
    },
    {
      "epoch": 47.28,
      "eval_ag_news_accuracy": 0.330875,
      "eval_ag_news_bleu_score": 5.069283979206854,
      "eval_ag_news_bleu_score_sem": 0.15037329134454597,
      "eval_ag_news_emb_cos_sim": 0.8174196481704712,
      "eval_ag_news_emb_cos_sim_sem": 0.006828056827835074,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4608304500579834,
      "eval_ag_news_n_ngrams_match_1": 14.536,
      "eval_ag_news_n_ngrams_match_2": 3.332,
      "eval_ag_news_n_ngrams_match_3": 0.96,
      "eval_ag_news_num_pred_words": 46.82,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.843409898942763,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36355621736268795,
      "eval_ag_news_runtime": 12.6654,
      "eval_ag_news_samples_per_second": 39.478,
      "eval_ag_news_steps_per_second": 0.079,
      "eval_ag_news_token_set_f1": 0.36138147065303444,
      "eval_ag_news_token_set_f1_sem": 0.004455285735451715,
      "eval_ag_news_token_set_precision": 0.34958395829939504,
      "eval_ag_news_token_set_recall": 0.3883823749164066,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 246250
    },
    {
      "epoch": 47.28,
      "eval_anthropic_toxic_prompts_accuracy": 0.11778125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3545482265217457,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1358527323889524,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6918302774429321,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009046826959763976,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.177814245223999,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.478,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.04,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.78,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.738,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.994250645782653,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.221042587001745,
      "eval_anthropic_toxic_prompts_runtime": 11.2064,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.617,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3645269389199886,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006546077958475243,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.453796950926804,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3304425966327437,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 246250
    },
    {
      "epoch": 47.28,
      "eval_arxiv_accuracy": 0.354125,
      "eval_arxiv_bleu_score": 4.585063696401905,
      "eval_arxiv_bleu_score_sem": 0.13507661118758754,
      "eval_arxiv_emb_cos_sim": 0.7911890149116516,
      "eval_arxiv_emb_cos_sim_sem": 0.006057483362332366,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3176190853118896,
      "eval_arxiv_n_ngrams_match_1": 15.744,
      "eval_arxiv_n_ngrams_match_2": 3.086,
      "eval_arxiv_n_ngrams_match_3": 0.712,
      "eval_arxiv_num_pred_words": 40.916,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.594571961281417,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3746047285374328,
      "eval_arxiv_runtime": 11.5567,
      "eval_arxiv_samples_per_second": 43.265,
      "eval_arxiv_steps_per_second": 0.087,
      "eval_arxiv_token_set_f1": 0.3682811102187006,
      "eval_arxiv_token_set_f1_sem": 0.0041616267102067925,
      "eval_arxiv_token_set_precision": 0.3207730655900764,
      "eval_arxiv_token_set_recall": 0.4463019354585246,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 246250
    },
    {
      "epoch": 47.28,
      "eval_python_code_alpaca_accuracy": 0.1626875,
      "eval_python_code_alpaca_bleu_score": 4.826769904015877,
      "eval_python_code_alpaca_bleu_score_sem": 0.15539924185609735,
      "eval_python_code_alpaca_emb_cos_sim": 0.7731510400772095,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007862736275541711,
      "eval_python_code_alpaca_emb_top1_equal": 0.1953125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.843956232070923,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.076,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.054,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.048,
      "eval_python_code_alpaca_num_pred_words": 43.328,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.183613600529306,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34707660506223303,
      "eval_python_code_alpaca_runtime": 11.1597,
      "eval_python_code_alpaca_samples_per_second": 44.804,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.48806449492791165,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00546412908273593,
      "eval_python_code_alpaca_token_set_precision": 0.5511415035134234,
      "eval_python_code_alpaca_token_set_recall": 0.46355931959320923,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 246250
    },
    {
      "epoch": 47.28,
      "eval_wikibio_accuracy": 0.332625,
      "eval_wikibio_bleu_score": 6.085197534410365,
      "eval_wikibio_bleu_score_sem": 0.21429132046116717,
      "eval_wikibio_emb_cos_sim": 0.7510096430778503,
      "eval_wikibio_emb_cos_sim_sem": 0.007621918402773574,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.580207586288452,
      "eval_wikibio_n_ngrams_match_1": 9.964,
      "eval_wikibio_n_ngrams_match_2": 3.378,
      "eval_wikibio_n_ngrams_match_3": 1.244,
      "eval_wikibio_num_pred_words": 35.164,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 35.88098847524684,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3560414074782632,
      "eval_wikibio_runtime": 11.6049,
      "eval_wikibio_samples_per_second": 43.085,
      "eval_wikibio_steps_per_second": 0.086,
      "eval_wikibio_token_set_f1": 0.3175300190040947,
      "eval_wikibio_token_set_f1_sem": 0.0057529849220731896,
      "eval_wikibio_token_set_precision": 0.32446516253160845,
      "eval_wikibio_token_set_recall": 0.3277066342158966,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 246250
    },
    {
      "epoch": 47.28,
      "eval_nq_accuracy": 0.53934375,
      "eval_nq_bleu_score": 12.24617707581566,
      "eval_nq_bleu_score_sem": 0.48214842429722665,
      "eval_nq_emb_cos_sim": 0.8433565497398376,
      "eval_nq_emb_cos_sim_sem": 0.007000108165990639,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.104877471923828,
      "eval_nq_n_ngrams_match_1": 23.542,
      "eval_nq_n_ngrams_match_2": 8.894,
      "eval_nq_n_ngrams_match_3": 4.116,
      "eval_nq_num_pred_words": 48.858,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.206097470657733,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4590039839001292,
      "eval_nq_runtime": 36.7928,
      "eval_nq_samples_per_second": 13.59,
      "eval_nq_steps_per_second": 0.027,
      "eval_nq_token_set_f1": 0.47020593382796755,
      "eval_nq_token_set_f1_sem": 0.004965353417429101,
      "eval_nq_token_set_precision": 0.4283757020782135,
      "eval_nq_token_set_recall": 0.5287112408349433,
      "eval_nq_true_num_tokens": 64.0,
      "step": 246250
    },
    {
      "epoch": 47.28,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 246252
    },
    {
      "epoch": 47.29,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 246264
    },
    {
      "epoch": 47.29,
      "learning_rate": 0.001,
      "loss": 2.4796,
      "step": 246276
    },
    {
      "epoch": 47.29,
      "learning_rate": 0.001,
      "loss": 2.4771,
      "step": 246288
    },
    {
      "epoch": 47.29,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 246300
    },
    {
      "epoch": 47.29,
      "learning_rate": 0.001,
      "loss": 2.4703,
      "step": 246312
    },
    {
      "epoch": 47.3,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 246324
    },
    {
      "epoch": 47.3,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 246336
    },
    {
      "epoch": 47.3,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 246348
    },
    {
      "epoch": 47.3,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 246360
    },
    {
      "epoch": 47.31,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 246372
    },
    {
      "epoch": 47.31,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 246384
    },
    {
      "epoch": 47.31,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 246396
    },
    {
      "epoch": 47.31,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 246408
    },
    {
      "epoch": 47.32,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 246420
    },
    {
      "epoch": 47.32,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 246432
    },
    {
      "epoch": 47.32,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 246444
    },
    {
      "epoch": 47.32,
      "learning_rate": 0.001,
      "loss": 2.4798,
      "step": 246456
    },
    {
      "epoch": 47.32,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 246468
    },
    {
      "epoch": 47.33,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 246480
    },
    {
      "epoch": 47.33,
      "learning_rate": 0.001,
      "loss": 2.4733,
      "step": 246492
    },
    {
      "epoch": 47.33,
      "learning_rate": 0.001,
      "loss": 2.472,
      "step": 246504
    },
    {
      "epoch": 47.33,
      "learning_rate": 0.001,
      "loss": 2.4761,
      "step": 246516
    },
    {
      "epoch": 47.34,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 246528
    },
    {
      "epoch": 47.34,
      "learning_rate": 0.001,
      "loss": 2.4714,
      "step": 246540
    },
    {
      "epoch": 47.34,
      "learning_rate": 0.001,
      "loss": 2.4769,
      "step": 246552
    },
    {
      "epoch": 47.34,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 246564
    },
    {
      "epoch": 47.35,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 246576
    },
    {
      "epoch": 47.35,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 246588
    },
    {
      "epoch": 47.35,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 246600
    },
    {
      "epoch": 47.35,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 246612
    },
    {
      "epoch": 47.35,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 246624
    },
    {
      "epoch": 47.36,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 246636
    },
    {
      "epoch": 47.36,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 246648
    },
    {
      "epoch": 47.36,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 246660
    },
    {
      "epoch": 47.36,
      "learning_rate": 0.001,
      "loss": 2.4746,
      "step": 246672
    },
    {
      "epoch": 47.37,
      "learning_rate": 0.001,
      "loss": 2.4913,
      "step": 246684
    },
    {
      "epoch": 47.37,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 246696
    },
    {
      "epoch": 47.37,
      "learning_rate": 0.001,
      "loss": 2.4673,
      "step": 246708
    },
    {
      "epoch": 47.37,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 246720
    },
    {
      "epoch": 47.38,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 246732
    },
    {
      "epoch": 47.38,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 246744
    },
    {
      "epoch": 47.38,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 246756
    },
    {
      "epoch": 47.38,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 246768
    },
    {
      "epoch": 47.38,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 246780
    },
    {
      "epoch": 47.39,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 246792
    },
    {
      "epoch": 47.39,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 246804
    },
    {
      "epoch": 47.39,
      "learning_rate": 0.001,
      "loss": 2.4798,
      "step": 246816
    },
    {
      "epoch": 47.39,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 246828
    },
    {
      "epoch": 47.4,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 246840
    },
    {
      "epoch": 47.4,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 246852
    },
    {
      "epoch": 47.4,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 246864
    },
    {
      "epoch": 47.4,
      "eval_ag_news_accuracy": 0.33125,
      "eval_ag_news_bleu_score": 4.95748454221818,
      "eval_ag_news_bleu_score_sem": 0.15717453753524005,
      "eval_ag_news_emb_cos_sim": 0.8210819363594055,
      "eval_ag_news_emb_cos_sim_sem": 0.006851312010045467,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4686532020568848,
      "eval_ag_news_n_ngrams_match_1": 14.724,
      "eval_ag_news_n_ngrams_match_2": 3.256,
      "eval_ag_news_n_ngrams_match_3": 0.94,
      "eval_ag_news_num_pred_words": 47.106,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 32.09348987890135,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3636885512137098,
      "eval_ag_news_runtime": 16.3839,
      "eval_ag_news_samples_per_second": 30.518,
      "eval_ag_news_steps_per_second": 0.061,
      "eval_ag_news_token_set_f1": 0.36612103356267095,
      "eval_ag_news_token_set_f1_sem": 0.004392574909400384,
      "eval_ag_news_token_set_precision": 0.35428537255538034,
      "eval_ag_news_token_set_recall": 0.39319283618688394,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 246875
    },
    {
      "epoch": 47.4,
      "eval_anthropic_toxic_prompts_accuracy": 0.1178125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2521426563901223,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12029724585942032,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6833691000938416,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008713470412734107,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1902172565460205,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.324,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.008,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.728,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.293704836202455,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21781817418923197,
      "eval_anthropic_toxic_prompts_runtime": 32.4497,
      "eval_anthropic_toxic_prompts_samples_per_second": 15.408,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.031,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35963204052864417,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006591739026702393,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4453172756458448,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3290254931467475,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 246875
    },
    {
      "epoch": 47.4,
      "eval_arxiv_accuracy": 0.3550625,
      "eval_arxiv_bleu_score": 4.568349374104083,
      "eval_arxiv_bleu_score_sem": 0.13337471206948545,
      "eval_arxiv_emb_cos_sim": 0.7892099618911743,
      "eval_arxiv_emb_cos_sim_sem": 0.005877521587546912,
      "eval_arxiv_emb_top1_equal": 0.2421875,
      "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.315105438232422,
      "eval_arxiv_n_ngrams_match_1": 15.916,
      "eval_arxiv_n_ngrams_match_2": 3.196,
      "eval_arxiv_n_ngrams_match_3": 0.7,
      "eval_arxiv_num_pred_words": 41.246,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.525296050133843,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37910221318098236,
      "eval_arxiv_runtime": 15.0963,
      "eval_arxiv_samples_per_second": 33.121,
      "eval_arxiv_steps_per_second": 0.066,
      "eval_arxiv_token_set_f1": 0.3736075627778935,
      "eval_arxiv_token_set_f1_sem": 0.004002237537676729,
      "eval_arxiv_token_set_precision": 0.32596646971140175,
      "eval_arxiv_token_set_recall": 0.45409043922187126,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 246875
    },
    {
      "epoch": 47.4,
      "eval_python_code_alpaca_accuracy": 0.16396875,
      "eval_python_code_alpaca_bleu_score": 4.7093845402385535,
      "eval_python_code_alpaca_bleu_score_sem": 0.15228736223768255,
      "eval_python_code_alpaca_emb_cos_sim": 0.764851450920105,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.009325005379558987,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8560683727264404,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.982,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.978,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.998,
      "eval_python_code_alpaca_num_pred_words": 43.442,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.393009501805874,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34056783123403533,
      "eval_python_code_alpaca_runtime": 11.1072,
      "eval_python_code_alpaca_samples_per_second": 45.016,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.4801650704960793,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005775790764804429,
      "eval_python_code_alpaca_token_set_precision": 0.5470332982090649,
      "eval_python_code_alpaca_token_set_recall": 0.45306159639444776,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 246875
    },
    {
      "epoch": 47.4,
      "eval_wikibio_accuracy": 0.330625,
      "eval_wikibio_bleu_score": 6.1075811307460315,
      "eval_wikibio_bleu_score_sem": 0.209743417085095,
      "eval_wikibio_emb_cos_sim": 0.7490042448043823,
      "eval_wikibio_emb_cos_sim_sem": 0.00874728930739705,
      "eval_wikibio_emb_top1_equal": 0.25,
      "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.5973165035247803,
      "eval_wikibio_n_ngrams_match_1": 10.404,
      "eval_wikibio_n_ngrams_match_2": 3.508,
      "eval_wikibio_n_ngrams_match_3": 1.292,
      "eval_wikibio_num_pred_words": 36.562,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.500154867504754,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3624132932822166,
      "eval_wikibio_runtime": 11.2427,
      "eval_wikibio_samples_per_second": 44.473,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.32757770210092213,
      "eval_wikibio_token_set_f1_sem": 0.005386957918527208,
      "eval_wikibio_token_set_precision": 0.33755201264840085,
      "eval_wikibio_token_set_recall": 0.33488611119247536,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 246875
    },
    {
      "epoch": 47.4,
      "eval_nq_accuracy": 0.5394375,
      "eval_nq_bleu_score": 12.437865973147138,
      "eval_nq_bleu_score_sem": 0.5012949543237201,
      "eval_nq_emb_cos_sim": 0.837842583656311,
      "eval_nq_emb_cos_sim_sem": 0.00682203838957662,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1043810844421387,
      "eval_nq_n_ngrams_match_1": 23.618,
      "eval_nq_n_ngrams_match_2": 8.926,
      "eval_nq_n_ngrams_match_3": 4.202,
      "eval_nq_num_pred_words": 49.09,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.202025077425905,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4611810921748841,
      "eval_nq_runtime": 11.9935,
      "eval_nq_samples_per_second": 41.689,
      "eval_nq_steps_per_second": 0.083,
      "eval_nq_token_set_f1": 0.472476325106537,
      "eval_nq_token_set_f1_sem": 0.005051576596544739,
      "eval_nq_token_set_precision": 0.43133373470389247,
      "eval_nq_token_set_recall": 0.5310151216208895,
      "eval_nq_true_num_tokens": 64.0,
      "step": 246875
    },
    {
      "epoch": 47.4,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 246876
    },
    {
      "epoch": 47.41,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 246888
    },
    {
      "epoch": 47.41,
      "learning_rate": 0.001,
      "loss": 2.4762,
      "step": 246900
    },
    {
      "epoch": 47.41,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 246912
    },
    {
      "epoch": 47.41,
      "learning_rate": 0.001,
      "loss": 2.4712,
      "step": 246924
    },
    {
      "epoch": 47.41,
      "learning_rate": 0.001,
      "loss": 2.4764,
      "step": 246936
    },
    {
      "epoch": 47.42,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 246948
    },
    {
      "epoch": 47.42,
      "learning_rate": 0.001,
      "loss": 2.4725,
      "step": 246960
    },
    {
      "epoch": 47.42,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 246972
    },
    {
      "epoch": 47.42,
      "learning_rate": 0.001,
      "loss": 2.4714,
      "step": 246984
    },
    {
      "epoch": 47.43,
      "learning_rate": 0.001,
      "loss": 2.473,
      "step": 246996
    },
    {
      "epoch": 47.43,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 247008
    },
    {
      "epoch": 47.43,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 247020
    },
    {
      "epoch": 47.43,
      "learning_rate": 0.001,
      "loss": 2.4722,
      "step": 247032
    },
    {
      "epoch": 47.44,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 247044
    },
    {
      "epoch": 47.44,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 247056
    },
    {
      "epoch": 47.44,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 247068
    },
    {
      "epoch": 47.44,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 247080
    },
    {
      "epoch": 47.44,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 247092
    },
    {
      "epoch": 47.45,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 247104
    },
    {
      "epoch": 47.45,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 247116
    },
    {
      "epoch": 47.45,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 247128
    },
    {
      "epoch": 47.45,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 247140
    },
    {
      "epoch": 47.46,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 247152
    },
    {
      "epoch": 47.46,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 247164
    },
    {
      "epoch": 47.46,
      "learning_rate": 0.001,
      "loss": 2.4776,
      "step": 247176
    },
    {
      "epoch": 47.46,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 247188
    },
    {
      "epoch": 47.47,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 247200
    },
    {
      "epoch": 47.47,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 247212
    },
    {
      "epoch": 47.47,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 247224
    },
    {
      "epoch": 47.47,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 247236
    },
    {
      "epoch": 47.47,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 247248
    },
    {
      "epoch": 47.48,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 247260
    },
    {
      "epoch": 47.48,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 247272
    },
    {
      "epoch": 47.48,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 247284
    },
    {
      "epoch": 47.48,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 247296
    },
    {
      "epoch": 47.49,
      "learning_rate": 0.001,
      "loss": 2.474,
      "step": 247308
    },
    {
      "epoch": 47.49,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 247320
    },
    {
      "epoch": 47.49,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 247332
    },
    {
      "epoch": 47.49,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 247344
    },
    {
      "epoch": 47.5,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 247356
    },
    {
      "epoch": 47.5,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 247368
    },
    {
      "epoch": 47.5,
      "learning_rate": 0.001,
      "loss": 2.4746,
      "step": 247380
    },
    {
      "epoch": 47.5,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 247392
    },
    {
      "epoch": 47.5,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 247404
    },
    {
      "epoch": 47.51,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 247416
    },
    {
      "epoch": 47.51,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 247428
    },
    {
      "epoch": 47.51,
      "learning_rate": 0.001,
      "loss": 2.4745,
      "step": 247440
    },
    {
      "epoch": 47.51,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 247452
    },
    {
      "epoch": 47.52,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 247464
    },
    {
      "epoch": 47.52,
      "learning_rate": 0.001,
      "loss": 2.4712,
      "step": 247476
    },
    {
      "epoch": 47.52,
      "learning_rate": 0.001,
      "loss": 2.4739,
      "step": 247488
    },
    {
      "epoch": 47.52,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 247500
    },
    {
      "epoch": 47.52,
      "eval_ag_news_accuracy": 0.33175,
      "eval_ag_news_bleu_score": 5.029422770897393,
      "eval_ag_news_bleu_score_sem": 0.16380101756335028,
      "eval_ag_news_emb_cos_sim": 0.8288478851318359,
      "eval_ag_news_emb_cos_sim_sem": 0.006212303883436563,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4597585201263428,
      "eval_ag_news_n_ngrams_match_1": 14.672,
      "eval_ag_news_n_ngrams_match_2": 3.298,
      "eval_ag_news_n_ngrams_match_3": 0.94,
      "eval_ag_news_num_pred_words": 47.014,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.809294282788297,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3642090152251275,
      "eval_ag_news_runtime": 12.0264,
      "eval_ag_news_samples_per_second": 41.575,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.3624695242294932,
      "eval_ag_news_token_set_f1_sem": 0.004342850011884466,
      "eval_ag_news_token_set_precision": 0.35081021556791264,
      "eval_ag_news_token_set_recall": 0.39002082699221696,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 247500
    },
    {
      "epoch": 47.52,
      "eval_anthropic_toxic_prompts_accuracy": 0.1168125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3159917800262,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1199578645385769,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6887571811676025,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009510374463299488,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1992640495300293,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.498,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.1,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.81,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.916,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.514482112044163,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21999013113870575,
      "eval_anthropic_toxic_prompts_runtime": 11.4006,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.857,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.088,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36671549708315293,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006487966368412724,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4545445209656313,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33112234474259844,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 247500
    },
    {
      "epoch": 47.52,
      "eval_arxiv_accuracy": 0.35709375,
      "eval_arxiv_bleu_score": 4.55082629701745,
      "eval_arxiv_bleu_score_sem": 0.12964846411783915,
      "eval_arxiv_emb_cos_sim": 0.7826601266860962,
      "eval_arxiv_emb_cos_sim_sem": 0.006554796418425676,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.308187961578369,
      "eval_arxiv_n_ngrams_match_1": 15.87,
      "eval_arxiv_n_ngrams_match_2": 3.118,
      "eval_arxiv_n_ngrams_match_3": 0.718,
      "eval_arxiv_num_pred_words": 41.796,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.33554750452353,
      "eval_arxiv_pred_num_tokens": 62.9765625,
      "eval_arxiv_rouge_score": 0.3761210040313659,
      "eval_arxiv_runtime": 11.4119,
      "eval_arxiv_samples_per_second": 43.814,
      "eval_arxiv_steps_per_second": 0.088,
      "eval_arxiv_token_set_f1": 0.3695147895228119,
      "eval_arxiv_token_set_f1_sem": 0.004085653139148685,
      "eval_arxiv_token_set_precision": 0.3232452985980358,
      "eval_arxiv_token_set_recall": 0.44458742916011473,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 247500
    },
    {
      "epoch": 47.52,
      "eval_python_code_alpaca_accuracy": 0.16459375,
      "eval_python_code_alpaca_bleu_score": 4.853859947409812,
      "eval_python_code_alpaca_bleu_score_sem": 0.15523373529222304,
      "eval_python_code_alpaca_emb_cos_sim": 0.7744649052619934,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008070760607445831,
      "eval_python_code_alpaca_emb_top1_equal": 0.1171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.820350408554077,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.984,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.058,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.066,
      "eval_python_code_alpaca_num_pred_words": 44.282,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.782730454228673,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.33825892757214127,
      "eval_python_code_alpaca_runtime": 11.173,
      "eval_python_code_alpaca_samples_per_second": 44.751,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.4827229738097629,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005685745307924299,
      "eval_python_code_alpaca_token_set_precision": 0.5498215139490301,
      "eval_python_code_alpaca_token_set_recall": 0.4569048945331764,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 247500
    },
    {
      "epoch": 47.52,
      "eval_wikibio_accuracy": 0.33390625,
      "eval_wikibio_bleu_score": 6.296963039401893,
      "eval_wikibio_bleu_score_sem": 0.21705758597565802,
      "eval_wikibio_emb_cos_sim": 0.7550753951072693,
      "eval_wikibio_emb_cos_sim_sem": 0.008168087558825105,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6192288398742676,
      "eval_wikibio_n_ngrams_match_1": 10.388,
      "eval_wikibio_n_ngrams_match_2": 3.556,
      "eval_wikibio_n_ngrams_match_3": 1.346,
      "eval_wikibio_num_pred_words": 36.542,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.30878567780481,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36517735316905797,
      "eval_wikibio_runtime": 11.0875,
      "eval_wikibio_samples_per_second": 45.096,
      "eval_wikibio_steps_per_second": 0.09,
      "eval_wikibio_token_set_f1": 0.3285122624127821,
      "eval_wikibio_token_set_f1_sem": 0.0053412733078049245,
      "eval_wikibio_token_set_precision": 0.33708939106803176,
      "eval_wikibio_token_set_recall": 0.3357096609863899,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 247500
    },
    {
      "epoch": 47.52,
      "eval_nq_accuracy": 0.54084375,
      "eval_nq_bleu_score": 12.38619399887228,
      "eval_nq_bleu_score_sem": 0.504320471283675,
      "eval_nq_emb_cos_sim": 0.8339306712150574,
      "eval_nq_emb_cos_sim_sem": 0.007504061326357938,
      "eval_nq_emb_top1_equal": 0.34375,
      "eval_nq_emb_top1_equal_sem": 0.04214578430296913,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1038002967834473,
      "eval_nq_n_ngrams_match_1": 23.684,
      "eval_nq_n_ngrams_match_2": 8.902,
      "eval_nq_n_ngrams_match_3": 4.222,
      "eval_nq_num_pred_words": 49.35,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.197262825547082,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45790492008081407,
      "eval_nq_runtime": 12.7857,
      "eval_nq_samples_per_second": 39.106,
      "eval_nq_steps_per_second": 0.078,
      "eval_nq_token_set_f1": 0.4723201313880448,
      "eval_nq_token_set_f1_sem": 0.005060248364909892,
      "eval_nq_token_set_precision": 0.4317872366237719,
      "eval_nq_token_set_recall": 0.5298413138759623,
      "eval_nq_true_num_tokens": 64.0,
      "step": 247500
    },
    {
      "epoch": 47.53,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 247512
    },
    {
      "epoch": 47.53,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 247524
    },
    {
      "epoch": 47.53,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 247536
    },
    {
      "epoch": 47.53,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 247548
    },
    {
      "epoch": 47.53,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 247560
    },
    {
      "epoch": 47.54,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 247572
    },
    {
      "epoch": 47.54,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 247584
    },
    {
      "epoch": 47.54,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 247596
    },
    {
      "epoch": 47.54,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 247608
    },
    {
      "epoch": 47.55,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 247620
    },
    {
      "epoch": 47.55,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 247632
    },
    {
      "epoch": 47.55,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 247644
    },
    {
      "epoch": 47.55,
      "learning_rate": 0.001,
      "loss": 2.4762,
      "step": 247656
    },
    {
      "epoch": 47.56,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 247668
    },
    {
      "epoch": 47.56,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 247680
    },
    {
      "epoch": 47.56,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 247692
    },
    {
      "epoch": 47.56,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 247704
    },
    {
      "epoch": 47.56,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 247716
    },
    {
      "epoch": 47.57,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 247728
    },
    {
      "epoch": 47.57,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 247740
    },
    {
      "epoch": 47.57,
      "learning_rate": 0.001,
      "loss": 2.4919,
      "step": 247752
    },
    {
      "epoch": 47.57,
      "learning_rate": 0.001,
      "loss": 2.4705,
      "step": 247764
    },
    {
      "epoch": 47.58,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 247776
    },
    {
      "epoch": 47.58,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 247788
    },
    {
      "epoch": 47.58,
      "learning_rate": 0.001,
      "loss": 2.4982,
      "step": 247800
    },
    {
      "epoch": 47.58,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 247812
    },
    {
      "epoch": 47.59,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 247824
    },
    {
      "epoch": 47.59,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 247836
    },
    {
      "epoch": 47.59,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 247848
    },
    {
      "epoch": 47.59,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 247860
    },
    {
      "epoch": 47.59,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 247872
    },
    {
      "epoch": 47.6,
      "learning_rate": 0.001,
      "loss": 2.4727,
      "step": 247884
    },
    {
      "epoch": 47.6,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 247896
    },
    {
      "epoch": 47.6,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 247908
    },
    {
      "epoch": 47.6,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 247920
    },
    {
      "epoch": 47.61,
      "learning_rate": 0.001,
      "loss": 2.47,
      "step": 247932
    },
    {
      "epoch": 47.61,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 247944
    },
    {
      "epoch": 47.61,
      "learning_rate": 0.001,
      "loss": 2.4705,
      "step": 247956
    },
    {
      "epoch": 47.61,
      "learning_rate": 0.001,
      "loss": 2.4761,
      "step": 247968
    },
    {
      "epoch": 47.62,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 247980
    },
    {
      "epoch": 47.62,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 247992
    },
    {
      "epoch": 47.62,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 248004
    },
    {
      "epoch": 47.62,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 248016
    },
    {
      "epoch": 47.62,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 248028
    },
    {
      "epoch": 47.63,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 248040
    },
    {
      "epoch": 47.63,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 248052
    },
    {
      "epoch": 47.63,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 248064
    },
    {
      "epoch": 47.63,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 248076
    },
    {
      "epoch": 47.64,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 248088
    },
    {
      "epoch": 47.64,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 248100
    },
    {
      "epoch": 47.64,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 248112
    },
    {
      "epoch": 47.64,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 248124
    },
    {
      "epoch": 47.64,
      "eval_ag_news_accuracy": 0.3326875,
      "eval_ag_news_bleu_score": 4.991878825321206,
      "eval_ag_news_bleu_score_sem": 0.1594841729142032,
      "eval_ag_news_emb_cos_sim": 0.8210830688476562,
      "eval_ag_news_emb_cos_sim_sem": 0.007206565820510269,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4462733268737793,
      "eval_ag_news_n_ngrams_match_1": 14.49,
      "eval_ag_news_n_ngrams_match_2": 3.268,
      "eval_ag_news_n_ngrams_match_3": 0.902,
      "eval_ag_news_num_pred_words": 46.324,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.38321911197659,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3623592694074649,
      "eval_ag_news_runtime": 12.0906,
      "eval_ag_news_samples_per_second": 41.354,
      "eval_ag_news_steps_per_second": 0.083,
      "eval_ag_news_token_set_f1": 0.36091635249547704,
      "eval_ag_news_token_set_f1_sem": 0.004384558194972008,
      "eval_ag_news_token_set_precision": 0.3475602129415628,
      "eval_ag_news_token_set_recall": 0.39079082250721275,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 248125
    },
    {
      "epoch": 47.64,
      "eval_anthropic_toxic_prompts_accuracy": 0.1169375,
      "eval_anthropic_toxic_prompts_bleu_score": 3.293148509397367,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1185256629904491,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6882367730140686,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009334609592663407,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1941885948181152,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.454,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.044,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.81,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.574,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.39037518426273,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22141263498086622,
      "eval_anthropic_toxic_prompts_runtime": 13.082,
      "eval_anthropic_toxic_prompts_samples_per_second": 38.22,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.076,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36222316663025583,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00650730959606252,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45246064724649093,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32619813806069026,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 248125
    },
    {
      "epoch": 47.64,
      "eval_arxiv_accuracy": 0.35634375,
      "eval_arxiv_bleu_score": 4.447612161166064,
      "eval_arxiv_bleu_score_sem": 0.1323879350373058,
      "eval_arxiv_emb_cos_sim": 0.7812366485595703,
      "eval_arxiv_emb_cos_sim_sem": 0.007202708066782506,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.310638666152954,
      "eval_arxiv_n_ngrams_match_1": 15.67,
      "eval_arxiv_n_ngrams_match_2": 3.11,
      "eval_arxiv_n_ngrams_match_3": 0.676,
      "eval_arxiv_num_pred_words": 40.206,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.402621010946135,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3756236847446874,
      "eval_arxiv_runtime": 11.9695,
      "eval_arxiv_samples_per_second": 41.773,
      "eval_arxiv_steps_per_second": 0.084,
      "eval_arxiv_token_set_f1": 0.3666380583630598,
      "eval_arxiv_token_set_f1_sem": 0.004279919035351922,
      "eval_arxiv_token_set_precision": 0.32057749850464246,
      "eval_arxiv_token_set_recall": 0.44920929263093656,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 248125
    },
    {
      "epoch": 47.64,
      "eval_python_code_alpaca_accuracy": 0.163875,
      "eval_python_code_alpaca_bleu_score": 4.757590874097533,
      "eval_python_code_alpaca_bleu_score_sem": 0.14959500405046608,
      "eval_python_code_alpaca_emb_cos_sim": 0.7674453854560852,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.008086995046656623,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8286101818084717,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.186,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.002,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.984,
      "eval_python_code_alpaca_num_pred_words": 43.604,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.921926073124364,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3477354298339224,
      "eval_python_code_alpaca_runtime": 17.237,
      "eval_python_code_alpaca_samples_per_second": 29.007,
      "eval_python_code_alpaca_steps_per_second": 0.058,
      "eval_python_code_alpaca_token_set_f1": 0.48834096784614445,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005232869025755022,
      "eval_python_code_alpaca_token_set_precision": 0.5592495199402681,
      "eval_python_code_alpaca_token_set_recall": 0.4567530569456588,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 248125
    },
    {
      "epoch": 47.64,
      "eval_wikibio_accuracy": 0.33115625,
      "eval_wikibio_bleu_score": 6.173020548637241,
      "eval_wikibio_bleu_score_sem": 0.2162235697229472,
      "eval_wikibio_emb_cos_sim": 0.7419592142105103,
      "eval_wikibio_emb_cos_sim_sem": 0.008961096977170277,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6299922466278076,
      "eval_wikibio_n_ngrams_match_1": 10.252,
      "eval_wikibio_n_ngrams_match_2": 3.46,
      "eval_wikibio_n_ngrams_match_3": 1.29,
      "eval_wikibio_num_pred_words": 36.578,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.71252421681164,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36247058554973255,
      "eval_wikibio_runtime": 12.4009,
      "eval_wikibio_samples_per_second": 40.32,
      "eval_wikibio_steps_per_second": 0.081,
      "eval_wikibio_token_set_f1": 0.32500288959311974,
      "eval_wikibio_token_set_f1_sem": 0.0052746729137608415,
      "eval_wikibio_token_set_precision": 0.33421849558845984,
      "eval_wikibio_token_set_recall": 0.330506573524447,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 248125
    },
    {
      "epoch": 47.64,
      "eval_nq_accuracy": 0.53865625,
      "eval_nq_bleu_score": 12.164957660255928,
      "eval_nq_bleu_score_sem": 0.49219298409302564,
      "eval_nq_emb_cos_sim": 0.8435643911361694,
      "eval_nq_emb_cos_sim_sem": 0.0065411731081890375,
      "eval_nq_emb_top1_equal": 0.359375,
      "eval_nq_emb_top1_equal_sem": 0.04257689651385297,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1049978733062744,
      "eval_nq_n_ngrams_match_1": 23.524,
      "eval_nq_n_ngrams_match_2": 8.772,
      "eval_nq_n_ngrams_match_3": 4.046,
      "eval_nq_num_pred_words": 49.052,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.207085555619893,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4580813632384835,
      "eval_nq_runtime": 21.1875,
      "eval_nq_samples_per_second": 23.599,
      "eval_nq_steps_per_second": 0.047,
      "eval_nq_token_set_f1": 0.4672014668803913,
      "eval_nq_token_set_f1_sem": 0.004979673036775632,
      "eval_nq_token_set_precision": 0.4267685199957736,
      "eval_nq_token_set_recall": 0.5234748105371331,
      "eval_nq_true_num_tokens": 64.0,
      "step": 248125
    },
    {
      "epoch": 47.65,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 248136
    },
    {
      "epoch": 47.65,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 248148
    },
    {
      "epoch": 47.65,
      "learning_rate": 0.001,
      "loss": 2.4745,
      "step": 248160
    },
    {
      "epoch": 47.65,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 248172
    },
    {
      "epoch": 47.65,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 248184
    },
    {
      "epoch": 47.66,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 248196
    },
    {
      "epoch": 47.66,
      "learning_rate": 0.001,
      "loss": 2.4777,
      "step": 248208
    },
    {
      "epoch": 47.66,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 248220
    },
    {
      "epoch": 47.66,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 248232
    },
    {
      "epoch": 47.67,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 248244
    },
    {
      "epoch": 47.67,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 248256
    },
    {
      "epoch": 47.67,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 248268
    },
    {
      "epoch": 47.67,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 248280
    },
    {
      "epoch": 47.68,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 248292
    },
    {
      "epoch": 47.68,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 248304
    },
    {
      "epoch": 47.68,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 248316
    },
    {
      "epoch": 47.68,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 248328
    },
    {
      "epoch": 47.68,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 248340
    },
    {
      "epoch": 47.69,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 248352
    },
    {
      "epoch": 47.69,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 248364
    },
    {
      "epoch": 47.69,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 248376
    },
    {
      "epoch": 47.69,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 248388
    },
    {
      "epoch": 47.7,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 248400
    },
    {
      "epoch": 47.7,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 248412
    },
    {
      "epoch": 47.7,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 248424
    },
    {
      "epoch": 47.7,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 248436
    },
    {
      "epoch": 47.71,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 248448
    },
    {
      "epoch": 47.71,
      "learning_rate": 0.001,
      "loss": 2.4938,
      "step": 248460
    },
    {
      "epoch": 47.71,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 248472
    },
    {
      "epoch": 47.71,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 248484
    },
    {
      "epoch": 47.71,
      "learning_rate": 0.001,
      "loss": 2.4933,
      "step": 248496
    },
    {
      "epoch": 47.72,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 248508
    },
    {
      "epoch": 47.72,
      "learning_rate": 0.001,
      "loss": 2.494,
      "step": 248520
    },
    {
      "epoch": 47.72,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 248532
    },
    {
      "epoch": 47.72,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 248544
    },
    {
      "epoch": 47.73,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 248556
    },
    {
      "epoch": 47.73,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 248568
    },
    {
      "epoch": 47.73,
      "learning_rate": 0.001,
      "loss": 2.4876,
      "step": 248580
    },
    {
      "epoch": 47.73,
      "learning_rate": 0.001,
      "loss": 2.4715,
      "step": 248592
    },
    {
      "epoch": 47.74,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 248604
    },
    {
      "epoch": 47.74,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 248616
    },
    {
      "epoch": 47.74,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 248628
    },
    {
      "epoch": 47.74,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 248640
    },
    {
      "epoch": 47.74,
      "learning_rate": 0.001,
      "loss": 2.4971,
      "step": 248652
    },
    {
      "epoch": 47.75,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 248664
    },
    {
      "epoch": 47.75,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 248676
    },
    {
      "epoch": 47.75,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 248688
    },
    {
      "epoch": 47.75,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 248700
    },
    {
      "epoch": 47.76,
      "learning_rate": 0.001,
      "loss": 2.4953,
      "step": 248712
    },
    {
      "epoch": 47.76,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 248724
    },
    {
      "epoch": 47.76,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 248736
    },
    {
      "epoch": 47.76,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 248748
    },
    {
      "epoch": 47.76,
      "eval_ag_news_accuracy": 0.33178125,
      "eval_ag_news_bleu_score": 5.1813409721901404,
      "eval_ag_news_bleu_score_sem": 0.16947469492284958,
      "eval_ag_news_emb_cos_sim": 0.8197761178016663,
      "eval_ag_news_emb_cos_sim_sem": 0.0067856189026516515,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4435057640075684,
      "eval_ag_news_n_ngrams_match_1": 14.658,
      "eval_ag_news_n_ngrams_match_2": 3.388,
      "eval_ag_news_n_ngrams_match_3": 0.99,
      "eval_ag_news_num_pred_words": 46.894,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.296484157720922,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3650632896430201,
      "eval_ag_news_runtime": 12.5634,
      "eval_ag_news_samples_per_second": 39.798,
      "eval_ag_news_steps_per_second": 0.08,
      "eval_ag_news_token_set_f1": 0.36405006717945027,
      "eval_ag_news_token_set_f1_sem": 0.0045575006687500145,
      "eval_ag_news_token_set_precision": 0.35088080169794217,
      "eval_ag_news_token_set_recall": 0.39225027686730346,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 248750
    },
    {
      "epoch": 47.76,
      "eval_anthropic_toxic_prompts_accuracy": 0.1171875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.338124336445745,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.126176027487603,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6882451772689819,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009843266077906719,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.183750629425049,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.456,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.068,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.816,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.178,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.137113360666334,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21981441704678467,
      "eval_anthropic_toxic_prompts_runtime": 13.3897,
      "eval_anthropic_toxic_prompts_samples_per_second": 37.342,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.075,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35698655829917686,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006383206273064008,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4492739199412488,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3209038862451919,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 248750
    },
    {
      "epoch": 47.76,
      "eval_arxiv_accuracy": 0.35425,
      "eval_arxiv_bleu_score": 4.546258497820296,
      "eval_arxiv_bleu_score_sem": 0.13208640610322758,
      "eval_arxiv_emb_cos_sim": 0.779262900352478,
      "eval_arxiv_emb_cos_sim_sem": 0.008558350912423286,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3064181804656982,
      "eval_arxiv_n_ngrams_match_1": 15.654,
      "eval_arxiv_n_ngrams_match_2": 3.148,
      "eval_arxiv_n_ngrams_match_3": 0.722,
      "eval_arxiv_num_pred_words": 40.408,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.2872123527808,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3744534864106186,
      "eval_arxiv_runtime": 12.3596,
      "eval_arxiv_samples_per_second": 40.455,
      "eval_arxiv_steps_per_second": 0.081,
      "eval_arxiv_token_set_f1": 0.3672515792691267,
      "eval_arxiv_token_set_f1_sem": 0.004318107325377595,
      "eval_arxiv_token_set_precision": 0.31965560457117304,
      "eval_arxiv_token_set_recall": 0.44986833436728046,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 248750
    },
    {
      "epoch": 47.76,
      "eval_python_code_alpaca_accuracy": 0.1634375,
      "eval_python_code_alpaca_bleu_score": 4.840313444083068,
      "eval_python_code_alpaca_bleu_score_sem": 0.15701355395283076,
      "eval_python_code_alpaca_emb_cos_sim": 0.7639352083206177,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00903370546044242,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8244967460632324,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.076,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.062,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.052,
      "eval_python_code_alpaca_num_pred_words": 42.918,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.852461783928643,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34581750969477737,
      "eval_python_code_alpaca_runtime": 18.9461,
      "eval_python_code_alpaca_samples_per_second": 26.391,
      "eval_python_code_alpaca_steps_per_second": 0.053,
      "eval_python_code_alpaca_token_set_f1": 0.47879834858823295,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005547133602944839,
      "eval_python_code_alpaca_token_set_precision": 0.5533665450521097,
      "eval_python_code_alpaca_token_set_recall": 0.4478643519443287,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 248750
    },
    {
      "epoch": 47.76,
      "eval_wikibio_accuracy": 0.33171875,
      "eval_wikibio_bleu_score": 6.291519913828881,
      "eval_wikibio_bleu_score_sem": 0.21300405143483134,
      "eval_wikibio_emb_cos_sim": 0.756807804107666,
      "eval_wikibio_emb_cos_sim_sem": 0.007285925043533743,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6334481239318848,
      "eval_wikibio_n_ngrams_match_1": 10.43,
      "eval_wikibio_n_ngrams_match_2": 3.524,
      "eval_wikibio_n_ngrams_match_3": 1.322,
      "eval_wikibio_num_pred_words": 36.418,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.843079534976354,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36813694957930015,
      "eval_wikibio_runtime": 13.3914,
      "eval_wikibio_samples_per_second": 37.337,
      "eval_wikibio_steps_per_second": 0.075,
      "eval_wikibio_token_set_f1": 0.33083042184134814,
      "eval_wikibio_token_set_f1_sem": 0.004997707837593411,
      "eval_wikibio_token_set_precision": 0.3401205010532537,
      "eval_wikibio_token_set_recall": 0.33678032830033583,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 248750
    },
    {
      "epoch": 47.76,
      "eval_nq_accuracy": 0.5383125,
      "eval_nq_bleu_score": 12.350032568777765,
      "eval_nq_bleu_score_sem": 0.4891152951987281,
      "eval_nq_emb_cos_sim": 0.8401965498924255,
      "eval_nq_emb_cos_sim_sem": 0.006955711070895968,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1046643257141113,
      "eval_nq_n_ngrams_match_1": 23.66,
      "eval_nq_n_ngrams_match_2": 8.838,
      "eval_nq_n_ngrams_match_3": 4.124,
      "eval_nq_num_pred_words": 49.03,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.204348558478918,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4593533050827407,
      "eval_nq_runtime": 12.5346,
      "eval_nq_samples_per_second": 39.89,
      "eval_nq_steps_per_second": 0.08,
      "eval_nq_token_set_f1": 0.4725723252747118,
      "eval_nq_token_set_f1_sem": 0.004943369866499317,
      "eval_nq_token_set_precision": 0.43251952547362293,
      "eval_nq_token_set_recall": 0.5284720460052428,
      "eval_nq_true_num_tokens": 64.0,
      "step": 248750
    },
    {
      "epoch": 47.76,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 248760
    },
    {
      "epoch": 47.77,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 248772
    },
    {
      "epoch": 47.77,
      "learning_rate": 0.001,
      "loss": 2.4987,
      "step": 248784
    },
    {
      "epoch": 47.77,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 248796
    },
    {
      "epoch": 47.77,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 248808
    },
    {
      "epoch": 47.78,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 248820
    },
    {
      "epoch": 47.78,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 248832
    },
    {
      "epoch": 47.78,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 248844
    },
    {
      "epoch": 47.78,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 248856
    },
    {
      "epoch": 47.79,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 248868
    },
    {
      "epoch": 47.79,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 248880
    },
    {
      "epoch": 47.79,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 248892
    },
    {
      "epoch": 47.79,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 248904
    },
    {
      "epoch": 47.79,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 248916
    },
    {
      "epoch": 47.8,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 248928
    },
    {
      "epoch": 47.8,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 248940
    },
    {
      "epoch": 47.8,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 248952
    },
    {
      "epoch": 47.8,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 248964
    },
    {
      "epoch": 47.81,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 248976
    },
    {
      "epoch": 47.81,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 248988
    },
    {
      "epoch": 47.81,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 249000
    },
    {
      "epoch": 47.81,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 249012
    },
    {
      "epoch": 47.82,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 249024
    },
    {
      "epoch": 47.82,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 249036
    },
    {
      "epoch": 47.82,
      "learning_rate": 0.001,
      "loss": 2.4887,
      "step": 249048
    },
    {
      "epoch": 47.82,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 249060
    },
    {
      "epoch": 47.82,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 249072
    },
    {
      "epoch": 47.83,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 249084
    },
    {
      "epoch": 47.83,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 249096
    },
    {
      "epoch": 47.83,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 249108
    },
    {
      "epoch": 47.83,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 249120
    },
    {
      "epoch": 47.84,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 249132
    },
    {
      "epoch": 47.84,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 249144
    },
    {
      "epoch": 47.84,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 249156
    },
    {
      "epoch": 47.84,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 249168
    },
    {
      "epoch": 47.85,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 249180
    },
    {
      "epoch": 47.85,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 249192
    },
    {
      "epoch": 47.85,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 249204
    },
    {
      "epoch": 47.85,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 249216
    },
    {
      "epoch": 47.85,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 249228
    },
    {
      "epoch": 47.86,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 249240
    },
    {
      "epoch": 47.86,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 249252
    },
    {
      "epoch": 47.86,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 249264
    },
    {
      "epoch": 47.86,
      "learning_rate": 0.001,
      "loss": 2.5022,
      "step": 249276
    },
    {
      "epoch": 47.87,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 249288
    },
    {
      "epoch": 47.87,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 249300
    },
    {
      "epoch": 47.87,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 249312
    },
    {
      "epoch": 47.87,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 249324
    },
    {
      "epoch": 47.88,
      "learning_rate": 0.001,
      "loss": 2.4948,
      "step": 249336
    },
    {
      "epoch": 47.88,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 249348
    },
    {
      "epoch": 47.88,
      "learning_rate": 0.001,
      "loss": 2.4996,
      "step": 249360
    },
    {
      "epoch": 47.88,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 249372
    },
    {
      "epoch": 47.88,
      "eval_ag_news_accuracy": 0.3326875,
      "eval_ag_news_bleu_score": 5.094311246219722,
      "eval_ag_news_bleu_score_sem": 0.16178231435843113,
      "eval_ag_news_emb_cos_sim": 0.8272091746330261,
      "eval_ag_news_emb_cos_sim_sem": 0.0058888149673143085,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4528400897979736,
      "eval_ag_news_n_ngrams_match_1": 14.64,
      "eval_ag_news_n_ngrams_match_2": 3.36,
      "eval_ag_news_n_ngrams_match_3": 0.962,
      "eval_ag_news_num_pred_words": 47.15,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.589983414748275,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3650431527465434,
      "eval_ag_news_runtime": 16.4939,
      "eval_ag_news_samples_per_second": 30.314,
      "eval_ag_news_steps_per_second": 0.061,
      "eval_ag_news_token_set_f1": 0.36372649564581144,
      "eval_ag_news_token_set_f1_sem": 0.004362924790996712,
      "eval_ag_news_token_set_precision": 0.35209632210321207,
      "eval_ag_news_token_set_recall": 0.39088022647759174,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 249375
    },
    {
      "epoch": 47.88,
      "eval_anthropic_toxic_prompts_accuracy": 0.116,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3111473594140794,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12225925352009366,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.691220760345459,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009095238542179309,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.181739330291748,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.524,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.09,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.822,
      "eval_anthropic_toxic_prompts_num_pred_words": 48.108,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.088615193993174,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21980840822097467,
      "eval_anthropic_toxic_prompts_runtime": 11.4759,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.57,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36636641612646337,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006508005136516142,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45786588999368416,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3323222626343548,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 249375
    },
    {
      "epoch": 47.88,
      "eval_arxiv_accuracy": 0.3566875,
      "eval_arxiv_bleu_score": 4.627996455625923,
      "eval_arxiv_bleu_score_sem": 0.14153317852089609,
      "eval_arxiv_emb_cos_sim": 0.790118932723999,
      "eval_arxiv_emb_cos_sim_sem": 0.006327755955309046,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.311711311340332,
      "eval_arxiv_n_ngrams_match_1": 15.966,
      "eval_arxiv_n_ngrams_match_2": 3.174,
      "eval_arxiv_n_ngrams_match_3": 0.73,
      "eval_arxiv_num_pred_words": 41.262,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.432030070418364,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3781419794927081,
      "eval_arxiv_runtime": 12.1669,
      "eval_arxiv_samples_per_second": 41.095,
      "eval_arxiv_steps_per_second": 0.082,
      "eval_arxiv_token_set_f1": 0.37344940864008863,
      "eval_arxiv_token_set_f1_sem": 0.004335465312531154,
      "eval_arxiv_token_set_precision": 0.3273930618648491,
      "eval_arxiv_token_set_recall": 0.45024986199642736,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 249375
    },
    {
      "epoch": 47.88,
      "eval_python_code_alpaca_accuracy": 0.16425,
      "eval_python_code_alpaca_bleu_score": 4.739159174673058,
      "eval_python_code_alpaca_bleu_score_sem": 0.1465690038225698,
      "eval_python_code_alpaca_emb_cos_sim": 0.7764459848403931,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007967120189868528,
      "eval_python_code_alpaca_emb_top1_equal": 0.125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.845170736312866,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.1,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.054,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.016,
      "eval_python_code_alpaca_num_pred_words": 43.646,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.204495850362754,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34362570934376124,
      "eval_python_code_alpaca_runtime": 11.1557,
      "eval_python_code_alpaca_samples_per_second": 44.82,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.4833799893595455,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0055056761700500775,
      "eval_python_code_alpaca_token_set_precision": 0.5531893356831864,
      "eval_python_code_alpaca_token_set_recall": 0.45541811555591716,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 249375
    },
    {
      "epoch": 47.88,
      "eval_wikibio_accuracy": 0.33440625,
      "eval_wikibio_bleu_score": 6.316261810104175,
      "eval_wikibio_bleu_score_sem": 0.21310147077984917,
      "eval_wikibio_emb_cos_sim": 0.7608048915863037,
      "eval_wikibio_emb_cos_sim_sem": 0.008395486928698587,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6134183406829834,
      "eval_wikibio_n_ngrams_match_1": 10.36,
      "eval_wikibio_n_ngrams_match_2": 3.574,
      "eval_wikibio_n_ngrams_match_3": 1.35,
      "eval_wikibio_num_pred_words": 36.58,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.09263159849502,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3675050112083451,
      "eval_wikibio_runtime": 11.2377,
      "eval_wikibio_samples_per_second": 44.493,
      "eval_wikibio_steps_per_second": 0.089,
      "eval_wikibio_token_set_f1": 0.32762593209439095,
      "eval_wikibio_token_set_f1_sem": 0.005229883661863675,
      "eval_wikibio_token_set_precision": 0.33849174256731424,
      "eval_wikibio_token_set_recall": 0.3318670215393933,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 249375
    },
    {
      "epoch": 47.88,
      "eval_nq_accuracy": 0.53959375,
      "eval_nq_bleu_score": 12.404047305223402,
      "eval_nq_bleu_score_sem": 0.4993819882216008,
      "eval_nq_emb_cos_sim": 0.8405977487564087,
      "eval_nq_emb_cos_sim_sem": 0.007364287888716638,
      "eval_nq_emb_top1_equal": 0.28125,
      "eval_nq_emb_top1_equal_sem": 0.039896367485272234,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1072959899902344,
      "eval_nq_n_ngrams_match_1": 23.684,
      "eval_nq_n_ngrams_match_2": 8.95,
      "eval_nq_n_ngrams_match_3": 4.208,
      "eval_nq_num_pred_words": 49.034,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.225968084679062,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4603848110094526,
      "eval_nq_runtime": 12.1299,
      "eval_nq_samples_per_second": 41.22,
      "eval_nq_steps_per_second": 0.082,
      "eval_nq_token_set_f1": 0.47281263431806797,
      "eval_nq_token_set_f1_sem": 0.004971267093609227,
      "eval_nq_token_set_precision": 0.43155680507352223,
      "eval_nq_token_set_recall": 0.530710810333764,
      "eval_nq_true_num_tokens": 64.0,
      "step": 249375
    },
    {
      "epoch": 47.88,
      "learning_rate": 0.001,
      "loss": 2.4966,
      "step": 249384
    },
    {
      "epoch": 47.89,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 249396
    },
    {
      "epoch": 47.89,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 249408
    },
    {
      "epoch": 47.89,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 249420
    },
    {
      "epoch": 47.89,
      "learning_rate": 0.001,
      "loss": 2.498,
      "step": 249432
    },
    {
      "epoch": 47.9,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 249444
    },
    {
      "epoch": 47.9,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 249456
    },
    {
      "epoch": 47.9,
      "learning_rate": 0.001,
      "loss": 2.4898,
      "step": 249468
    },
    {
      "epoch": 47.9,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 249480
    },
    {
      "epoch": 47.91,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 249492
    },
    {
      "epoch": 47.91,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 249504
    },
    {
      "epoch": 47.91,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 249516
    },
    {
      "epoch": 47.91,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 249528
    },
    {
      "epoch": 47.91,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 249540
    },
    {
      "epoch": 47.92,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 249552
    },
    {
      "epoch": 47.92,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 249564
    },
    {
      "epoch": 47.92,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 249576
    },
    {
      "epoch": 47.92,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 249588
    },
    {
      "epoch": 47.93,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 249600
    },
    {
      "epoch": 47.93,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 249612
    },
    {
      "epoch": 47.93,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 249624
    },
    {
      "epoch": 47.93,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 249636
    },
    {
      "epoch": 47.94,
      "learning_rate": 0.001,
      "loss": 2.48,
      "step": 249648
    },
    {
      "epoch": 47.94,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 249660
    },
    {
      "epoch": 47.94,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 249672
    },
    {
      "epoch": 47.94,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 249684
    },
    {
      "epoch": 47.94,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 249696
    },
    {
      "epoch": 47.95,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 249708
    },
    {
      "epoch": 47.95,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 249720
    },
    {
      "epoch": 47.95,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 249732
    },
    {
      "epoch": 47.95,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 249744
    },
    {
      "epoch": 47.96,
      "learning_rate": 0.001,
      "loss": 2.4974,
      "step": 249756
    },
    {
      "epoch": 47.96,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 249768
    },
    {
      "epoch": 47.96,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 249780
    },
    {
      "epoch": 47.96,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 249792
    },
    {
      "epoch": 47.97,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 249804
    },
    {
      "epoch": 47.97,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 249816
    },
    {
      "epoch": 47.97,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 249828
    },
    {
      "epoch": 47.97,
      "learning_rate": 0.001,
      "loss": 2.5035,
      "step": 249840
    },
    {
      "epoch": 47.97,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 249852
    },
    {
      "epoch": 47.98,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 249864
    },
    {
      "epoch": 47.98,
      "learning_rate": 0.001,
      "loss": 2.4935,
      "step": 249876
    },
    {
      "epoch": 47.98,
      "learning_rate": 0.001,
      "loss": 2.4997,
      "step": 249888
    },
    {
      "epoch": 47.98,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 249900
    },
    {
      "epoch": 47.99,
      "learning_rate": 0.001,
      "loss": 2.4954,
      "step": 249912
    },
    {
      "epoch": 47.99,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 249924
    },
    {
      "epoch": 47.99,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 249936
    },
    {
      "epoch": 47.99,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 249948
    },
    {
      "epoch": 48.0,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 249960
    },
    {
      "epoch": 48.0,
      "learning_rate": 0.001,
      "loss": 2.4965,
      "step": 249972
    },
    {
      "epoch": 48.0,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 249984
    },
    {
      "epoch": 48.0,
      "learning_rate": 0.001,
      "loss": 2.4746,
      "step": 249996
    },
    {
      "epoch": 48.0,
      "eval_ag_news_accuracy": 0.333,
      "eval_ag_news_bleu_score": 4.976351304144446,
      "eval_ag_news_bleu_score_sem": 0.15219663898429564,
      "eval_ag_news_emb_cos_sim": 0.8216410279273987,
      "eval_ag_news_emb_cos_sim_sem": 0.006388932764467116,
      "eval_ag_news_emb_top1_equal": 0.265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4505717754364014,
      "eval_ag_news_n_ngrams_match_1": 14.562,
      "eval_ag_news_n_ngrams_match_2": 3.272,
      "eval_ag_news_n_ngrams_match_3": 0.94,
      "eval_ag_news_num_pred_words": 46.52,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.518408609455346,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36547325964035304,
      "eval_ag_news_runtime": 16.7875,
      "eval_ag_news_samples_per_second": 29.784,
      "eval_ag_news_steps_per_second": 0.06,
      "eval_ag_news_token_set_f1": 0.3611641671533246,
      "eval_ag_news_token_set_f1_sem": 0.004320317545460231,
      "eval_ag_news_token_set_precision": 0.3479161112839795,
      "eval_ag_news_token_set_recall": 0.38902752215403175,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 250000
    },
    {
      "epoch": 48.0,
      "eval_anthropic_toxic_prompts_accuracy": 0.11678125,
      "eval_anthropic_toxic_prompts_bleu_score": 3.4926227989688905,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1273502353359989,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6953889727592468,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008645840525942314,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.20438814163208,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.418,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.174,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.87,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.434,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.640418956662735,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21922156664019604,
      "eval_anthropic_toxic_prompts_runtime": 13.6896,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.524,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.073,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3664467303634207,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066477240187314095,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4538589120389553,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3349302629762433,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 250000
    },
    {
      "epoch": 48.0,
      "eval_arxiv_accuracy": 0.35684375,
      "eval_arxiv_bleu_score": 4.573399632545602,
      "eval_arxiv_bleu_score_sem": 0.14076170848554803,
      "eval_arxiv_emb_cos_sim": 0.7903249263763428,
      "eval_arxiv_emb_cos_sim_sem": 0.006268415826783739,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.309251546859741,
      "eval_arxiv_n_ngrams_match_1": 15.69,
      "eval_arxiv_n_ngrams_match_2": 3.082,
      "eval_arxiv_n_ngrams_match_3": 0.724,
      "eval_arxiv_num_pred_words": 40.532,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.364636657162713,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37800332397791225,
      "eval_arxiv_runtime": 13.4098,
      "eval_arxiv_samples_per_second": 37.286,
      "eval_arxiv_steps_per_second": 0.075,
      "eval_arxiv_token_set_f1": 0.36725584720743554,
      "eval_arxiv_token_set_f1_sem": 0.004144421319963543,
      "eval_arxiv_token_set_precision": 0.3217808379845878,
      "eval_arxiv_token_set_recall": 0.44403633856085034,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 250000
    },
    {
      "epoch": 48.0,
      "eval_python_code_alpaca_accuracy": 0.1654375,
      "eval_python_code_alpaca_bleu_score": 5.1548312159158,
      "eval_python_code_alpaca_bleu_score_sem": 0.16573807504942967,
      "eval_python_code_alpaca_emb_cos_sim": 0.77726149559021,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007279054448681959,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.822619915008545,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.27,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.174,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.154,
      "eval_python_code_alpaca_num_pred_words": 43.632,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.820862223098946,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.35120400874866275,
      "eval_python_code_alpaca_runtime": 14.2054,
      "eval_python_code_alpaca_samples_per_second": 35.198,
      "eval_python_code_alpaca_steps_per_second": 0.07,
      "eval_python_code_alpaca_token_set_f1": 0.49310856020986243,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005361499865049245,
      "eval_python_code_alpaca_token_set_precision": 0.5608457442122253,
      "eval_python_code_alpaca_token_set_recall": 0.46043463793300393,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 250000
    },
    {
      "epoch": 48.0,
      "eval_wikibio_accuracy": 0.33415625,
      "eval_wikibio_bleu_score": 6.292058713973434,
      "eval_wikibio_bleu_score_sem": 0.21369389203475148,
      "eval_wikibio_emb_cos_sim": 0.7567078471183777,
      "eval_wikibio_emb_cos_sim_sem": 0.008591763485124804,
      "eval_wikibio_emb_top1_equal": 0.1171875,
      "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6028592586517334,
      "eval_wikibio_n_ngrams_match_1": 10.288,
      "eval_wikibio_n_ngrams_match_2": 3.542,
      "eval_wikibio_n_ngrams_match_3": 1.33,
      "eval_wikibio_num_pred_words": 36.186,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.70302800670636,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36515826051373507,
      "eval_wikibio_runtime": 17.6021,
      "eval_wikibio_samples_per_second": 28.406,
      "eval_wikibio_steps_per_second": 0.057,
      "eval_wikibio_token_set_f1": 0.3302672128499839,
      "eval_wikibio_token_set_f1_sem": 0.00524216625780752,
      "eval_wikibio_token_set_precision": 0.3359925807969038,
      "eval_wikibio_token_set_recall": 0.3395744931915767,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 250000
    },
    {
      "epoch": 48.0,
      "eval_nq_accuracy": 0.5396875,
      "eval_nq_bleu_score": 12.45586321621732,
      "eval_nq_bleu_score_sem": 0.48988617764236103,
      "eval_nq_emb_cos_sim": 0.836689829826355,
      "eval_nq_emb_cos_sim_sem": 0.006868378190969839,
      "eval_nq_emb_top1_equal": 0.3203125,
      "eval_nq_emb_top1_equal_sem": 0.041403754790620424,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.101829767227173,
      "eval_nq_n_ngrams_match_1": 23.646,
      "eval_nq_n_ngrams_match_2": 8.936,
      "eval_nq_n_ngrams_match_3": 4.186,
      "eval_nq_num_pred_words": 49.212,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.18112578135172,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4586611125271711,
      "eval_nq_runtime": 18.3333,
      "eval_nq_samples_per_second": 27.273,
      "eval_nq_steps_per_second": 0.055,
      "eval_nq_token_set_f1": 0.4707373578555305,
      "eval_nq_token_set_f1_sem": 0.005047285913235144,
      "eval_nq_token_set_precision": 0.4278669815829763,
      "eval_nq_token_set_recall": 0.5322160792486194,
      "eval_nq_true_num_tokens": 64.0,
      "step": 250000
    },
    {
      "epoch": 48.0,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 250008
    },
    {
      "epoch": 48.01,
      "learning_rate": 0.001,
      "loss": 2.4757,
      "step": 250020
    },
    {
      "epoch": 48.01,
      "learning_rate": 0.001,
      "loss": 2.473,
      "step": 250032
    },
    {
      "epoch": 48.01,
      "learning_rate": 0.001,
      "loss": 2.4653,
      "step": 250044
    },
    {
      "epoch": 48.01,
      "learning_rate": 0.001,
      "loss": 2.4691,
      "step": 250056
    },
    {
      "epoch": 48.02,
      "learning_rate": 0.001,
      "loss": 2.4628,
      "step": 250068
    },
    {
      "epoch": 48.02,
      "learning_rate": 0.001,
      "loss": 2.4701,
      "step": 250080
    },
    {
      "epoch": 48.02,
      "learning_rate": 0.001,
      "loss": 2.4738,
      "step": 250092
    },
    {
      "epoch": 48.02,
      "learning_rate": 0.001,
      "loss": 2.4725,
      "step": 250104
    },
    {
      "epoch": 48.03,
      "learning_rate": 0.001,
      "loss": 2.4732,
      "step": 250116
    },
    {
      "epoch": 48.03,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 250128
    },
    {
      "epoch": 48.03,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 250140
    },
    {
      "epoch": 48.03,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 250152
    },
    {
      "epoch": 48.03,
      "learning_rate": 0.001,
      "loss": 2.462,
      "step": 250164
    },
    {
      "epoch": 48.04,
      "learning_rate": 0.001,
      "loss": 2.4628,
      "step": 250176
    },
    {
      "epoch": 48.04,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 250188
    },
    {
      "epoch": 48.04,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 250200
    },
    {
      "epoch": 48.04,
      "learning_rate": 0.001,
      "loss": 2.4719,
      "step": 250212
    },
    {
      "epoch": 48.05,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 250224
    },
    {
      "epoch": 48.05,
      "learning_rate": 0.001,
      "loss": 2.4727,
      "step": 250236
    },
    {
      "epoch": 48.05,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 250248
    },
    {
      "epoch": 48.05,
      "learning_rate": 0.001,
      "loss": 2.4605,
      "step": 250260
    },
    {
      "epoch": 48.06,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 250272
    },
    {
      "epoch": 48.06,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 250284
    },
    {
      "epoch": 48.06,
      "learning_rate": 0.001,
      "loss": 2.4713,
      "step": 250296
    },
    {
      "epoch": 48.06,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 250308
    },
    {
      "epoch": 48.06,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 250320
    },
    {
      "epoch": 48.07,
      "learning_rate": 0.001,
      "loss": 2.4704,
      "step": 250332
    },
    {
      "epoch": 48.07,
      "learning_rate": 0.001,
      "loss": 2.4697,
      "step": 250344
    },
    {
      "epoch": 48.07,
      "learning_rate": 0.001,
      "loss": 2.4677,
      "step": 250356
    },
    {
      "epoch": 48.07,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 250368
    },
    {
      "epoch": 48.08,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 250380
    },
    {
      "epoch": 48.08,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 250392
    },
    {
      "epoch": 48.08,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 250404
    },
    {
      "epoch": 48.08,
      "learning_rate": 0.001,
      "loss": 2.4744,
      "step": 250416
    },
    {
      "epoch": 48.09,
      "learning_rate": 0.001,
      "loss": 2.4873,
      "step": 250428
    },
    {
      "epoch": 48.09,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 250440
    },
    {
      "epoch": 48.09,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 250452
    },
    {
      "epoch": 48.09,
      "learning_rate": 0.001,
      "loss": 2.4718,
      "step": 250464
    },
    {
      "epoch": 48.09,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 250476
    },
    {
      "epoch": 48.1,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 250488
    },
    {
      "epoch": 48.1,
      "learning_rate": 0.001,
      "loss": 2.4743,
      "step": 250500
    },
    {
      "epoch": 48.1,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 250512
    },
    {
      "epoch": 48.1,
      "learning_rate": 0.001,
      "loss": 2.4705,
      "step": 250524
    },
    {
      "epoch": 48.11,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 250536
    },
    {
      "epoch": 48.11,
      "learning_rate": 0.001,
      "loss": 2.4727,
      "step": 250548
    },
    {
      "epoch": 48.11,
      "learning_rate": 0.001,
      "loss": 2.4728,
      "step": 250560
    },
    {
      "epoch": 48.11,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 250572
    },
    {
      "epoch": 48.12,
      "learning_rate": 0.001,
      "loss": 2.473,
      "step": 250584
    },
    {
      "epoch": 48.12,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 250596
    },
    {
      "epoch": 48.12,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 250608
    },
    {
      "epoch": 48.12,
      "learning_rate": 0.001,
      "loss": 2.474,
      "step": 250620
    },
    {
      "epoch": 48.12,
      "eval_ag_news_accuracy": 0.3311875,
      "eval_ag_news_bleu_score": 5.139758274458105,
      "eval_ag_news_bleu_score_sem": 0.1594382743298427,
      "eval_ag_news_emb_cos_sim": 0.8248339891433716,
      "eval_ag_news_emb_cos_sim_sem": 0.006527133597823354,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.460014581680298,
      "eval_ag_news_n_ngrams_match_1": 14.676,
      "eval_ag_news_n_ngrams_match_2": 3.392,
      "eval_ag_news_n_ngrams_match_3": 0.958,
      "eval_ag_news_num_pred_words": 46.874,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.817440463029836,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36488812800624415,
      "eval_ag_news_runtime": 11.5577,
      "eval_ag_news_samples_per_second": 43.261,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.36204404038841675,
      "eval_ag_news_token_set_f1_sem": 0.00449969604583042,
      "eval_ag_news_token_set_precision": 0.34946652317310706,
      "eval_ag_news_token_set_recall": 0.3889837193239418,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 250625
    },
    {
      "epoch": 48.12,
      "eval_anthropic_toxic_prompts_accuracy": 0.11665625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.361079890503321,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1280400925058103,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6960893869400024,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008995374218810777,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.190563440322876,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.534,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.11,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.808,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.324,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.302116378582213,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22300698806984248,
      "eval_anthropic_toxic_prompts_runtime": 11.2219,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.556,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36094798571661507,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066335762361913785,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4567114963136891,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32255889335372684,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 250625
    },
    {
      "epoch": 48.12,
      "eval_arxiv_accuracy": 0.3540625,
      "eval_arxiv_bleu_score": 4.731141276595753,
      "eval_arxiv_bleu_score_sem": 0.135710392393234,
      "eval_arxiv_emb_cos_sim": 0.7890267372131348,
      "eval_arxiv_emb_cos_sim_sem": 0.006840106216697633,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.321216344833374,
      "eval_arxiv_n_ngrams_match_1": 15.808,
      "eval_arxiv_n_ngrams_match_2": 3.262,
      "eval_arxiv_n_ngrams_match_3": 0.774,
      "eval_arxiv_num_pred_words": 41.228,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.694015552978357,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37571313045028343,
      "eval_arxiv_runtime": 11.3363,
      "eval_arxiv_samples_per_second": 44.106,
      "eval_arxiv_steps_per_second": 0.088,
      "eval_arxiv_token_set_f1": 0.36838095837752993,
      "eval_arxiv_token_set_f1_sem": 0.004210803563776229,
      "eval_arxiv_token_set_precision": 0.3233462041974542,
      "eval_arxiv_token_set_recall": 0.442847770455988,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 250625
    },
    {
      "epoch": 48.12,
      "eval_python_code_alpaca_accuracy": 0.1648125,
      "eval_python_code_alpaca_bleu_score": 5.200160592115089,
      "eval_python_code_alpaca_bleu_score_sem": 0.16877561616190814,
      "eval_python_code_alpaca_emb_cos_sim": 0.7871952056884766,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007216122618410114,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8048338890075684,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.36,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.334,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.252,
      "eval_python_code_alpaca_num_pred_words": 44.176,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.52433080508394,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3483535281781177,
      "eval_python_code_alpaca_runtime": 11.1175,
      "eval_python_code_alpaca_samples_per_second": 44.974,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.4857537839483617,
      "eval_python_code_alpaca_token_set_f1_sem": 0.0051787267579279555,
      "eval_python_code_alpaca_token_set_precision": 0.5689601697740975,
      "eval_python_code_alpaca_token_set_recall": 0.444310862304604,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 250625
    },
    {
      "epoch": 48.12,
      "eval_wikibio_accuracy": 0.33190625,
      "eval_wikibio_bleu_score": 6.039174249369688,
      "eval_wikibio_bleu_score_sem": 0.21558128963785914,
      "eval_wikibio_emb_cos_sim": 0.7473443746566772,
      "eval_wikibio_emb_cos_sim_sem": 0.00937715486060163,
      "eval_wikibio_emb_top1_equal": 0.2734375,
      "eval_wikibio_emb_top1_equal_sem": 0.03955156411760461,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6011593341827393,
      "eval_wikibio_n_ngrams_match_1": 9.924,
      "eval_wikibio_n_ngrams_match_2": 3.37,
      "eval_wikibio_n_ngrams_match_3": 1.244,
      "eval_wikibio_num_pred_words": 35.85,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.640688632437495,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3570580379457571,
      "eval_wikibio_runtime": 11.9395,
      "eval_wikibio_samples_per_second": 41.878,
      "eval_wikibio_steps_per_second": 0.084,
      "eval_wikibio_token_set_f1": 0.31896550870984725,
      "eval_wikibio_token_set_f1_sem": 0.005661441125922278,
      "eval_wikibio_token_set_precision": 0.3269826723831596,
      "eval_wikibio_token_set_recall": 0.32874119211446584,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 250625
    },
    {
      "epoch": 48.12,
      "eval_nq_accuracy": 0.53815625,
      "eval_nq_bleu_score": 12.594178108198438,
      "eval_nq_bleu_score_sem": 0.5045598870209774,
      "eval_nq_emb_cos_sim": 0.8444705009460449,
      "eval_nq_emb_cos_sim_sem": 0.006603551102529669,
      "eval_nq_emb_top1_equal": 0.3671875,
      "eval_nq_emb_top1_equal_sem": 0.04277397517748991,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.103944778442383,
      "eval_nq_n_ngrams_match_1": 23.724,
      "eval_nq_n_ngrams_match_2": 9.018,
      "eval_nq_n_ngrams_match_3": 4.25,
      "eval_nq_num_pred_words": 49.122,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.198447265241693,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46023629689377865,
      "eval_nq_runtime": 12.0333,
      "eval_nq_samples_per_second": 41.551,
      "eval_nq_steps_per_second": 0.083,
      "eval_nq_token_set_f1": 0.47348552007413786,
      "eval_nq_token_set_f1_sem": 0.005088084412755815,
      "eval_nq_token_set_precision": 0.4324209050827016,
      "eval_nq_token_set_recall": 0.5312504527279666,
      "eval_nq_true_num_tokens": 64.0,
      "step": 250625
    },
    {
      "epoch": 48.12,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 250632
    },
    {
      "epoch": 48.13,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 250644
    },
    {
      "epoch": 48.13,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 250656
    },
    {
      "epoch": 48.13,
      "learning_rate": 0.001,
      "loss": 2.4769,
      "step": 250668
    },
    {
      "epoch": 48.13,
      "learning_rate": 0.001,
      "loss": 2.471,
      "step": 250680
    },
    {
      "epoch": 48.14,
      "learning_rate": 0.001,
      "loss": 2.4706,
      "step": 250692
    },
    {
      "epoch": 48.14,
      "learning_rate": 0.001,
      "loss": 2.4738,
      "step": 250704
    },
    {
      "epoch": 48.14,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 250716
    },
    {
      "epoch": 48.14,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 250728
    },
    {
      "epoch": 48.15,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 250740
    },
    {
      "epoch": 48.15,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 250752
    },
    {
      "epoch": 48.15,
      "learning_rate": 0.001,
      "loss": 2.4741,
      "step": 250764
    },
    {
      "epoch": 48.15,
      "learning_rate": 0.001,
      "loss": 2.4703,
      "step": 250776
    },
    {
      "epoch": 48.15,
      "learning_rate": 0.001,
      "loss": 2.4748,
      "step": 250788
    },
    {
      "epoch": 48.16,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 250800
    },
    {
      "epoch": 48.16,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 250812
    },
    {
      "epoch": 48.16,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 250824
    },
    {
      "epoch": 48.16,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 250836
    },
    {
      "epoch": 48.17,
      "learning_rate": 0.001,
      "loss": 2.4743,
      "step": 250848
    },
    {
      "epoch": 48.17,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 250860
    },
    {
      "epoch": 48.17,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 250872
    },
    {
      "epoch": 48.17,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 250884
    },
    {
      "epoch": 48.18,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 250896
    },
    {
      "epoch": 48.18,
      "learning_rate": 0.001,
      "loss": 2.4769,
      "step": 250908
    },
    {
      "epoch": 48.18,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 250920
    },
    {
      "epoch": 48.18,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 250932
    },
    {
      "epoch": 48.18,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 250944
    },
    {
      "epoch": 48.19,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 250956
    },
    {
      "epoch": 48.19,
      "learning_rate": 0.001,
      "loss": 2.4759,
      "step": 250968
    },
    {
      "epoch": 48.19,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 250980
    },
    {
      "epoch": 48.19,
      "learning_rate": 0.001,
      "loss": 2.4812,
      "step": 250992
    },
    {
      "epoch": 48.2,
      "learning_rate": 0.001,
      "loss": 2.4768,
      "step": 251004
    },
    {
      "epoch": 48.2,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 251016
    },
    {
      "epoch": 48.2,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 251028
    },
    {
      "epoch": 48.2,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 251040
    },
    {
      "epoch": 48.21,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 251052
    },
    {
      "epoch": 48.21,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 251064
    },
    {
      "epoch": 48.21,
      "learning_rate": 0.001,
      "loss": 2.4737,
      "step": 251076
    },
    {
      "epoch": 48.21,
      "learning_rate": 0.001,
      "loss": 2.4685,
      "step": 251088
    },
    {
      "epoch": 48.21,
      "learning_rate": 0.001,
      "loss": 2.4764,
      "step": 251100
    },
    {
      "epoch": 48.22,
      "learning_rate": 0.001,
      "loss": 2.4709,
      "step": 251112
    },
    {
      "epoch": 48.22,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 251124
    },
    {
      "epoch": 48.22,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 251136
    },
    {
      "epoch": 48.22,
      "learning_rate": 0.001,
      "loss": 2.4738,
      "step": 251148
    },
    {
      "epoch": 48.23,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 251160
    },
    {
      "epoch": 48.23,
      "learning_rate": 0.001,
      "loss": 2.4718,
      "step": 251172
    },
    {
      "epoch": 48.23,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 251184
    },
    {
      "epoch": 48.23,
      "learning_rate": 0.001,
      "loss": 2.4798,
      "step": 251196
    },
    {
      "epoch": 48.24,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 251208
    },
    {
      "epoch": 48.24,
      "learning_rate": 0.001,
      "loss": 2.4777,
      "step": 251220
    },
    {
      "epoch": 48.24,
      "learning_rate": 0.001,
      "loss": 2.4738,
      "step": 251232
    },
    {
      "epoch": 48.24,
      "learning_rate": 0.001,
      "loss": 2.4731,
      "step": 251244
    },
    {
      "epoch": 48.24,
      "eval_ag_news_accuracy": 0.33078125,
      "eval_ag_news_bleu_score": 5.09959671108279,
      "eval_ag_news_bleu_score_sem": 0.16014220244775615,
      "eval_ag_news_emb_cos_sim": 0.8223634958267212,
      "eval_ag_news_emb_cos_sim_sem": 0.0062494326227966294,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4594814777374268,
      "eval_ag_news_n_ngrams_match_1": 14.536,
      "eval_ag_news_n_ngrams_match_2": 3.328,
      "eval_ag_news_n_ngrams_match_3": 0.966,
      "eval_ag_news_num_pred_words": 46.438,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.800482980518932,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3640067607748212,
      "eval_ag_news_runtime": 11.5479,
      "eval_ag_news_samples_per_second": 43.298,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.36098790997584823,
      "eval_ag_news_token_set_f1_sem": 0.0044507064389004685,
      "eval_ag_news_token_set_precision": 0.3476410126022377,
      "eval_ag_news_token_set_recall": 0.39049919153744506,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 251250
    },
    {
      "epoch": 48.24,
      "eval_anthropic_toxic_prompts_accuracy": 0.1165,
      "eval_anthropic_toxic_prompts_bleu_score": 3.4080332414745,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1266418285440086,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6937938928604126,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008794806586370545,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2096006870269775,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.392,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.082,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.836,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.152,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.769193589258837,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22015547013252357,
      "eval_anthropic_toxic_prompts_runtime": 12.573,
      "eval_anthropic_toxic_prompts_samples_per_second": 39.768,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.08,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3607077909796986,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006354294148384737,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45063219653479836,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3283815092734517,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 251250
    },
    {
      "epoch": 48.24,
      "eval_arxiv_accuracy": 0.35534375,
      "eval_arxiv_bleu_score": 4.497678610095454,
      "eval_arxiv_bleu_score_sem": 0.13852984143801117,
      "eval_arxiv_emb_cos_sim": 0.7842373847961426,
      "eval_arxiv_emb_cos_sim_sem": 0.007200587553466802,
      "eval_arxiv_emb_top1_equal": 0.3203125,
      "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3105201721191406,
      "eval_arxiv_n_ngrams_match_1": 15.584,
      "eval_arxiv_n_ngrams_match_2": 3.082,
      "eval_arxiv_n_ngrams_match_3": 0.684,
      "eval_arxiv_num_pred_words": 40.456,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.399374156215742,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3733434561234425,
      "eval_arxiv_runtime": 11.4525,
      "eval_arxiv_samples_per_second": 43.659,
      "eval_arxiv_steps_per_second": 0.087,
      "eval_arxiv_token_set_f1": 0.36444946024990826,
      "eval_arxiv_token_set_f1_sem": 0.004467964127470384,
      "eval_arxiv_token_set_precision": 0.31892920578939843,
      "eval_arxiv_token_set_recall": 0.44565416650040224,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 251250
    },
    {
      "epoch": 48.24,
      "eval_python_code_alpaca_accuracy": 0.16434375,
      "eval_python_code_alpaca_bleu_score": 4.965559602453602,
      "eval_python_code_alpaca_bleu_score_sem": 0.16300538877905096,
      "eval_python_code_alpaca_emb_cos_sim": 0.7760825157165527,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00773737245027958,
      "eval_python_code_alpaca_emb_top1_equal": 0.1796875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.819366216659546,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.06,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.078,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.09,
      "eval_python_code_alpaca_num_pred_words": 43.126,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.766221152440643,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3472794596335977,
      "eval_python_code_alpaca_runtime": 11.164,
      "eval_python_code_alpaca_samples_per_second": 44.787,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.48566086343735,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00542900190715754,
      "eval_python_code_alpaca_token_set_precision": 0.553812915165647,
      "eval_python_code_alpaca_token_set_recall": 0.45463936680057865,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 251250
    },
    {
      "epoch": 48.24,
      "eval_wikibio_accuracy": 0.33140625,
      "eval_wikibio_bleu_score": 5.866905590101334,
      "eval_wikibio_bleu_score_sem": 0.21141034914752507,
      "eval_wikibio_emb_cos_sim": 0.7367610931396484,
      "eval_wikibio_emb_cos_sim_sem": 0.010105310815800664,
      "eval_wikibio_emb_top1_equal": 0.2421875,
      "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6136386394500732,
      "eval_wikibio_n_ngrams_match_1": 9.726,
      "eval_wikibio_n_ngrams_match_2": 3.224,
      "eval_wikibio_n_ngrams_match_3": 1.172,
      "eval_wikibio_num_pred_words": 34.774,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.10080395965178,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.34991224266827015,
      "eval_wikibio_runtime": 11.995,
      "eval_wikibio_samples_per_second": 41.684,
      "eval_wikibio_steps_per_second": 0.083,
      "eval_wikibio_token_set_f1": 0.31528805844395424,
      "eval_wikibio_token_set_f1_sem": 0.0056001249024216744,
      "eval_wikibio_token_set_precision": 0.3176935238206678,
      "eval_wikibio_token_set_recall": 0.3338562315888643,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 251250
    },
    {
      "epoch": 48.24,
      "eval_nq_accuracy": 0.5401875,
      "eval_nq_bleu_score": 12.269426629372164,
      "eval_nq_bleu_score_sem": 0.49251996946522614,
      "eval_nq_emb_cos_sim": 0.836445689201355,
      "eval_nq_emb_cos_sim_sem": 0.00690769922499698,
      "eval_nq_emb_top1_equal": 0.3359375,
      "eval_nq_emb_top1_equal_sem": 0.04191137143408563,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1021807193756104,
      "eval_nq_n_ngrams_match_1": 23.596,
      "eval_nq_n_ngrams_match_2": 8.796,
      "eval_nq_n_ngrams_match_3": 4.134,
      "eval_nq_num_pred_words": 49.008,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.183997468904307,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45823407640517116,
      "eval_nq_runtime": 11.6865,
      "eval_nq_samples_per_second": 42.784,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4709152746179991,
      "eval_nq_token_set_f1_sem": 0.004818976117621279,
      "eval_nq_token_set_precision": 0.43048724253270526,
      "eval_nq_token_set_recall": 0.5275287415318805,
      "eval_nq_true_num_tokens": 64.0,
      "step": 251250
    },
    {
      "epoch": 48.24,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 251256
    },
    {
      "epoch": 48.25,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 251268
    },
    {
      "epoch": 48.25,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 251280
    },
    {
      "epoch": 48.25,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 251292
    },
    {
      "epoch": 48.25,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 251304
    },
    {
      "epoch": 48.26,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 251316
    },
    {
      "epoch": 48.26,
      "learning_rate": 0.001,
      "loss": 2.474,
      "step": 251328
    },
    {
      "epoch": 48.26,
      "learning_rate": 0.001,
      "loss": 2.4757,
      "step": 251340
    },
    {
      "epoch": 48.26,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 251352
    },
    {
      "epoch": 48.26,
      "learning_rate": 0.001,
      "loss": 2.4782,
      "step": 251364
    },
    {
      "epoch": 48.27,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 251376
    },
    {
      "epoch": 48.27,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 251388
    },
    {
      "epoch": 48.27,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 251400
    },
    {
      "epoch": 48.27,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 251412
    },
    {
      "epoch": 48.28,
      "learning_rate": 0.001,
      "loss": 2.4762,
      "step": 251424
    },
    {
      "epoch": 48.28,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 251436
    },
    {
      "epoch": 48.28,
      "learning_rate": 0.001,
      "loss": 2.4702,
      "step": 251448
    },
    {
      "epoch": 48.28,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 251460
    },
    {
      "epoch": 48.29,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 251472
    },
    {
      "epoch": 48.29,
      "learning_rate": 0.001,
      "loss": 2.4761,
      "step": 251484
    },
    {
      "epoch": 48.29,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 251496
    },
    {
      "epoch": 48.29,
      "learning_rate": 0.001,
      "loss": 2.4759,
      "step": 251508
    },
    {
      "epoch": 48.29,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 251520
    },
    {
      "epoch": 48.3,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 251532
    },
    {
      "epoch": 48.3,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 251544
    },
    {
      "epoch": 48.3,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 251556
    },
    {
      "epoch": 48.3,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 251568
    },
    {
      "epoch": 48.31,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 251580
    },
    {
      "epoch": 48.31,
      "learning_rate": 0.001,
      "loss": 2.476,
      "step": 251592
    },
    {
      "epoch": 48.31,
      "learning_rate": 0.001,
      "loss": 2.4761,
      "step": 251604
    },
    {
      "epoch": 48.31,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 251616
    },
    {
      "epoch": 48.32,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 251628
    },
    {
      "epoch": 48.32,
      "learning_rate": 0.001,
      "loss": 2.4832,
      "step": 251640
    },
    {
      "epoch": 48.32,
      "learning_rate": 0.001,
      "loss": 2.4724,
      "step": 251652
    },
    {
      "epoch": 48.32,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 251664
    },
    {
      "epoch": 48.32,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 251676
    },
    {
      "epoch": 48.33,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 251688
    },
    {
      "epoch": 48.33,
      "learning_rate": 0.001,
      "loss": 2.48,
      "step": 251700
    },
    {
      "epoch": 48.33,
      "learning_rate": 0.001,
      "loss": 2.4782,
      "step": 251712
    },
    {
      "epoch": 48.33,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 251724
    },
    {
      "epoch": 48.34,
      "learning_rate": 0.001,
      "loss": 2.4745,
      "step": 251736
    },
    {
      "epoch": 48.34,
      "learning_rate": 0.001,
      "loss": 2.4777,
      "step": 251748
    },
    {
      "epoch": 48.34,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 251760
    },
    {
      "epoch": 48.34,
      "learning_rate": 0.001,
      "loss": 2.4763,
      "step": 251772
    },
    {
      "epoch": 48.35,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 251784
    },
    {
      "epoch": 48.35,
      "learning_rate": 0.001,
      "loss": 2.4699,
      "step": 251796
    },
    {
      "epoch": 48.35,
      "learning_rate": 0.001,
      "loss": 2.4776,
      "step": 251808
    },
    {
      "epoch": 48.35,
      "learning_rate": 0.001,
      "loss": 2.4719,
      "step": 251820
    },
    {
      "epoch": 48.35,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 251832
    },
    {
      "epoch": 48.36,
      "learning_rate": 0.001,
      "loss": 2.4736,
      "step": 251844
    },
    {
      "epoch": 48.36,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 251856
    },
    {
      "epoch": 48.36,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 251868
    },
    {
      "epoch": 48.36,
      "eval_ag_news_accuracy": 0.33090625,
      "eval_ag_news_bleu_score": 5.167693823396464,
      "eval_ag_news_bleu_score_sem": 0.1643706627612869,
      "eval_ag_news_emb_cos_sim": 0.8261070847511292,
      "eval_ag_news_emb_cos_sim_sem": 0.006726918220843595,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.456059455871582,
      "eval_ag_news_n_ngrams_match_1": 14.776,
      "eval_ag_news_n_ngrams_match_2": 3.5,
      "eval_ag_news_n_ngrams_match_3": 1.004,
      "eval_ag_news_num_pred_words": 46.92,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.69184701575129,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3673643766483693,
      "eval_ag_news_runtime": 11.9375,
      "eval_ag_news_samples_per_second": 41.885,
      "eval_ag_news_steps_per_second": 0.084,
      "eval_ag_news_token_set_f1": 0.3647391261588922,
      "eval_ag_news_token_set_f1_sem": 0.0046172232975958,
      "eval_ag_news_token_set_precision": 0.35199514681047417,
      "eval_ag_news_token_set_recall": 0.39216877659465427,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 251875
    },
    {
      "epoch": 48.36,
      "eval_anthropic_toxic_prompts_accuracy": 0.11690625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2433695670770337,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12217276880675339,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6950536966323853,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008528013742574045,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2095835208892822,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.384,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.836,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.768768401520507,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21722775682923107,
      "eval_anthropic_toxic_prompts_runtime": 11.1324,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.914,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.09,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36048937684694327,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006526683291917304,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4514346911599886,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3265819297445124,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 251875
    },
    {
      "epoch": 48.36,
      "eval_arxiv_accuracy": 0.35553125,
      "eval_arxiv_bleu_score": 4.431887569962282,
      "eval_arxiv_bleu_score_sem": 0.12944506366506656,
      "eval_arxiv_emb_cos_sim": 0.7818551063537598,
      "eval_arxiv_emb_cos_sim_sem": 0.007109343577707764,
      "eval_arxiv_emb_top1_equal": 0.25,
      "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3221285343170166,
      "eval_arxiv_n_ngrams_match_1": 15.592,
      "eval_arxiv_n_ngrams_match_2": 3.074,
      "eval_arxiv_n_ngrams_match_3": 0.674,
      "eval_arxiv_num_pred_words": 40.842,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.71928926818173,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3727703280568793,
      "eval_arxiv_runtime": 12.6899,
      "eval_arxiv_samples_per_second": 39.401,
      "eval_arxiv_steps_per_second": 0.079,
      "eval_arxiv_token_set_f1": 0.365927603584925,
      "eval_arxiv_token_set_f1_sem": 0.004416122318664511,
      "eval_arxiv_token_set_precision": 0.31787081954965296,
      "eval_arxiv_token_set_recall": 0.4491077653743621,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 251875
    },
    {
      "epoch": 48.36,
      "eval_python_code_alpaca_accuracy": 0.162875,
      "eval_python_code_alpaca_bleu_score": 4.868752082978907,
      "eval_python_code_alpaca_bleu_score_sem": 0.1440595166960755,
      "eval_python_code_alpaca_emb_cos_sim": 0.7802065014839172,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007584125192004953,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8391098976135254,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.272,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.138,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.102,
      "eval_python_code_alpaca_num_pred_words": 44.376,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.100537531642352,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3472426663093965,
      "eval_python_code_alpaca_runtime": 33.8483,
      "eval_python_code_alpaca_samples_per_second": 14.772,
      "eval_python_code_alpaca_steps_per_second": 0.03,
      "eval_python_code_alpaca_token_set_f1": 0.4900903739555282,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00512187924133402,
      "eval_python_code_alpaca_token_set_precision": 0.565810531328908,
      "eval_python_code_alpaca_token_set_recall": 0.4511917160087212,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 251875
    },
    {
      "epoch": 48.36,
      "eval_wikibio_accuracy": 0.32878125,
      "eval_wikibio_bleu_score": 6.279184295600841,
      "eval_wikibio_bleu_score_sem": 0.2193481791221487,
      "eval_wikibio_emb_cos_sim": 0.7526933550834656,
      "eval_wikibio_emb_cos_sim_sem": 0.009144129544996453,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.632040500640869,
      "eval_wikibio_n_ngrams_match_1": 10.266,
      "eval_wikibio_n_ngrams_match_2": 3.538,
      "eval_wikibio_n_ngrams_match_3": 1.366,
      "eval_wikibio_num_pred_words": 36.494,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.789848208436084,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36355306803507004,
      "eval_wikibio_runtime": 29.2206,
      "eval_wikibio_samples_per_second": 17.111,
      "eval_wikibio_steps_per_second": 0.034,
      "eval_wikibio_token_set_f1": 0.32665042392141497,
      "eval_wikibio_token_set_f1_sem": 0.005293649857900133,
      "eval_wikibio_token_set_precision": 0.3353163458613141,
      "eval_wikibio_token_set_recall": 0.3362724738346447,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 251875
    },
    {
      "epoch": 48.36,
      "eval_nq_accuracy": 0.54034375,
      "eval_nq_bleu_score": 12.420310795175435,
      "eval_nq_bleu_score_sem": 0.5042513808363712,
      "eval_nq_emb_cos_sim": 0.8430113196372986,
      "eval_nq_emb_cos_sim_sem": 0.007010841782926249,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.104539632797241,
      "eval_nq_n_ngrams_match_1": 23.758,
      "eval_nq_n_ngrams_match_2": 8.79,
      "eval_nq_n_ngrams_match_3": 4.178,
      "eval_nq_num_pred_words": 49.278,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.203325598105424,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45959830713726046,
      "eval_nq_runtime": 23.7373,
      "eval_nq_samples_per_second": 21.064,
      "eval_nq_steps_per_second": 0.042,
      "eval_nq_token_set_f1": 0.47252964948873705,
      "eval_nq_token_set_f1_sem": 0.004844632569834099,
      "eval_nq_token_set_precision": 0.4319054093187382,
      "eval_nq_token_set_recall": 0.5295300013559052,
      "eval_nq_true_num_tokens": 64.0,
      "step": 251875
    },
    {
      "epoch": 48.36,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 251880
    },
    {
      "epoch": 48.37,
      "learning_rate": 0.001,
      "loss": 2.472,
      "step": 251892
    },
    {
      "epoch": 48.37,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 251904
    },
    {
      "epoch": 48.37,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 251916
    },
    {
      "epoch": 48.37,
      "learning_rate": 0.001,
      "loss": 2.4759,
      "step": 251928
    },
    {
      "epoch": 48.38,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 251940
    },
    {
      "epoch": 48.38,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 251952
    },
    {
      "epoch": 48.38,
      "learning_rate": 0.001,
      "loss": 2.4752,
      "step": 251964
    },
    {
      "epoch": 48.38,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 251976
    },
    {
      "epoch": 48.38,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 251988
    },
    {
      "epoch": 48.39,
      "learning_rate": 0.001,
      "loss": 2.4756,
      "step": 252000
    },
    {
      "epoch": 48.39,
      "learning_rate": 0.001,
      "loss": 2.4763,
      "step": 252012
    },
    {
      "epoch": 48.39,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 252024
    },
    {
      "epoch": 48.39,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 252036
    },
    {
      "epoch": 48.4,
      "learning_rate": 0.001,
      "loss": 2.4734,
      "step": 252048
    },
    {
      "epoch": 48.4,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 252060
    },
    {
      "epoch": 48.4,
      "learning_rate": 0.001,
      "loss": 2.4739,
      "step": 252072
    },
    {
      "epoch": 48.4,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 252084
    },
    {
      "epoch": 48.41,
      "learning_rate": 0.001,
      "loss": 2.4686,
      "step": 252096
    },
    {
      "epoch": 48.41,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 252108
    },
    {
      "epoch": 48.41,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 252120
    },
    {
      "epoch": 48.41,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 252132
    },
    {
      "epoch": 48.41,
      "learning_rate": 0.001,
      "loss": 2.4696,
      "step": 252144
    },
    {
      "epoch": 48.42,
      "learning_rate": 0.001,
      "loss": 2.4748,
      "step": 252156
    },
    {
      "epoch": 48.42,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 252168
    },
    {
      "epoch": 48.42,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 252180
    },
    {
      "epoch": 48.42,
      "learning_rate": 0.001,
      "loss": 2.4745,
      "step": 252192
    },
    {
      "epoch": 48.43,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 252204
    },
    {
      "epoch": 48.43,
      "learning_rate": 0.001,
      "loss": 2.4711,
      "step": 252216
    },
    {
      "epoch": 48.43,
      "learning_rate": 0.001,
      "loss": 2.4754,
      "step": 252228
    },
    {
      "epoch": 48.43,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 252240
    },
    {
      "epoch": 48.44,
      "learning_rate": 0.001,
      "loss": 2.4768,
      "step": 252252
    },
    {
      "epoch": 48.44,
      "learning_rate": 0.001,
      "loss": 2.4689,
      "step": 252264
    },
    {
      "epoch": 48.44,
      "learning_rate": 0.001,
      "loss": 2.4686,
      "step": 252276
    },
    {
      "epoch": 48.44,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 252288
    },
    {
      "epoch": 48.44,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 252300
    },
    {
      "epoch": 48.45,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 252312
    },
    {
      "epoch": 48.45,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 252324
    },
    {
      "epoch": 48.45,
      "learning_rate": 0.001,
      "loss": 2.4688,
      "step": 252336
    },
    {
      "epoch": 48.45,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 252348
    },
    {
      "epoch": 48.46,
      "learning_rate": 0.001,
      "loss": 2.4771,
      "step": 252360
    },
    {
      "epoch": 48.46,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 252372
    },
    {
      "epoch": 48.46,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 252384
    },
    {
      "epoch": 48.46,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 252396
    },
    {
      "epoch": 48.47,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 252408
    },
    {
      "epoch": 48.47,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 252420
    },
    {
      "epoch": 48.47,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 252432
    },
    {
      "epoch": 48.47,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 252444
    },
    {
      "epoch": 48.47,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 252456
    },
    {
      "epoch": 48.48,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 252468
    },
    {
      "epoch": 48.48,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 252480
    },
    {
      "epoch": 48.48,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 252492
    },
    {
      "epoch": 48.48,
      "eval_ag_news_accuracy": 0.332375,
      "eval_ag_news_bleu_score": 5.138620462052554,
      "eval_ag_news_bleu_score_sem": 0.1663385760860313,
      "eval_ag_news_emb_cos_sim": 0.8251314163208008,
      "eval_ag_news_emb_cos_sim_sem": 0.0064455965681119816,
      "eval_ag_news_emb_top1_equal": 0.203125,
      "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.445012092590332,
      "eval_ag_news_n_ngrams_match_1": 14.558,
      "eval_ag_news_n_ngrams_match_2": 3.372,
      "eval_ag_news_n_ngrams_match_3": 0.916,
      "eval_ag_news_num_pred_words": 46.234,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.34366247044745,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36562157530401873,
      "eval_ag_news_runtime": 11.5372,
      "eval_ag_news_samples_per_second": 43.338,
      "eval_ag_news_steps_per_second": 0.087,
      "eval_ag_news_token_set_f1": 0.36167660406611235,
      "eval_ag_news_token_set_f1_sem": 0.004403739658015286,
      "eval_ag_news_token_set_precision": 0.3477815205037834,
      "eval_ag_news_token_set_recall": 0.3913349886249815,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 252500
    },
    {
      "epoch": 48.48,
      "eval_anthropic_toxic_prompts_accuracy": 0.11615625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3158997955492695,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1309423280689943,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6888794302940369,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008748511566003868,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.192425012588501,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.686,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.347398659470723,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2204264884245279,
      "eval_anthropic_toxic_prompts_runtime": 11.2014,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.637,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35525893389393404,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006543885851610156,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4463478376988283,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32304658677546483,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 252500
    },
    {
      "epoch": 48.48,
      "eval_arxiv_accuracy": 0.35434375,
      "eval_arxiv_bleu_score": 4.55302470016922,
      "eval_arxiv_bleu_score_sem": 0.14336370871717433,
      "eval_arxiv_emb_cos_sim": 0.7756081819534302,
      "eval_arxiv_emb_cos_sim_sem": 0.0082969792070165,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3071327209472656,
      "eval_arxiv_n_ngrams_match_1": 15.46,
      "eval_arxiv_n_ngrams_match_2": 3.122,
      "eval_arxiv_n_ngrams_match_3": 0.75,
      "eval_arxiv_num_pred_words": 39.726,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.30671713828552,
      "eval_arxiv_pred_num_tokens": 62.9921875,
      "eval_arxiv_rouge_score": 0.37337024335822466,
      "eval_arxiv_runtime": 15.7946,
      "eval_arxiv_samples_per_second": 31.656,
      "eval_arxiv_steps_per_second": 0.063,
      "eval_arxiv_token_set_f1": 0.36420785366806485,
      "eval_arxiv_token_set_f1_sem": 0.0044603440265640225,
      "eval_arxiv_token_set_precision": 0.31483497585554,
      "eval_arxiv_token_set_recall": 0.45624563278599656,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 252500
    },
    {
      "epoch": 48.48,
      "eval_python_code_alpaca_accuracy": 0.16315625,
      "eval_python_code_alpaca_bleu_score": 4.80265066334131,
      "eval_python_code_alpaca_bleu_score_sem": 0.15329768739829774,
      "eval_python_code_alpaca_emb_cos_sim": 0.7780901193618774,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007707415260611447,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8433306217193604,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.146,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.126,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.08,
      "eval_python_code_alpaca_num_pred_words": 43.854,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.17286671601518,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3441052162234529,
      "eval_python_code_alpaca_runtime": 11.0685,
      "eval_python_code_alpaca_samples_per_second": 45.173,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.4788373856917405,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005535633109350059,
      "eval_python_code_alpaca_token_set_precision": 0.5559542974393263,
      "eval_python_code_alpaca_token_set_recall": 0.4426920524993888,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 252500
    },
    {
      "epoch": 48.48,
      "eval_wikibio_accuracy": 0.333,
      "eval_wikibio_bleu_score": 6.275643764195638,
      "eval_wikibio_bleu_score_sem": 0.22007016783032315,
      "eval_wikibio_emb_cos_sim": 0.7408552169799805,
      "eval_wikibio_emb_cos_sim_sem": 0.009536931587672907,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.5935592651367188,
      "eval_wikibio_n_ngrams_match_1": 10.122,
      "eval_wikibio_n_ngrams_match_2": 3.506,
      "eval_wikibio_n_ngrams_match_3": 1.344,
      "eval_wikibio_num_pred_words": 35.854,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.36327239553451,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3579663194467216,
      "eval_wikibio_runtime": 11.1135,
      "eval_wikibio_samples_per_second": 44.99,
      "eval_wikibio_steps_per_second": 0.09,
      "eval_wikibio_token_set_f1": 0.32318602179276845,
      "eval_wikibio_token_set_f1_sem": 0.0056420714798284,
      "eval_wikibio_token_set_precision": 0.3318421320096968,
      "eval_wikibio_token_set_recall": 0.3329391706574089,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 252500
    },
    {
      "epoch": 48.48,
      "eval_nq_accuracy": 0.5394375,
      "eval_nq_bleu_score": 12.532029776140453,
      "eval_nq_bleu_score_sem": 0.48641532656614345,
      "eval_nq_emb_cos_sim": 0.8417807221412659,
      "eval_nq_emb_cos_sim_sem": 0.006886138477805626,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1042640209198,
      "eval_nq_n_ngrams_match_1": 23.658,
      "eval_nq_n_ngrams_match_2": 9.02,
      "eval_nq_n_ngrams_match_3": 4.224,
      "eval_nq_num_pred_words": 48.74,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.201064975677573,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4632701590166841,
      "eval_nq_runtime": 11.792,
      "eval_nq_samples_per_second": 42.402,
      "eval_nq_steps_per_second": 0.085,
      "eval_nq_token_set_f1": 0.4745589005744199,
      "eval_nq_token_set_f1_sem": 0.004988652119626734,
      "eval_nq_token_set_precision": 0.43380942006969636,
      "eval_nq_token_set_recall": 0.530726400324404,
      "eval_nq_true_num_tokens": 64.0,
      "step": 252500
    },
    {
      "epoch": 48.48,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 252504
    },
    {
      "epoch": 48.49,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 252516
    },
    {
      "epoch": 48.49,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 252528
    },
    {
      "epoch": 48.49,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 252540
    },
    {
      "epoch": 48.49,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 252552
    },
    {
      "epoch": 48.5,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 252564
    },
    {
      "epoch": 48.5,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 252576
    },
    {
      "epoch": 48.5,
      "learning_rate": 0.001,
      "loss": 2.467,
      "step": 252588
    },
    {
      "epoch": 48.5,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 252600
    },
    {
      "epoch": 48.5,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 252612
    },
    {
      "epoch": 48.51,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 252624
    },
    {
      "epoch": 48.51,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 252636
    },
    {
      "epoch": 48.51,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 252648
    },
    {
      "epoch": 48.51,
      "learning_rate": 0.001,
      "loss": 2.4722,
      "step": 252660
    },
    {
      "epoch": 48.52,
      "learning_rate": 0.001,
      "loss": 2.4729,
      "step": 252672
    },
    {
      "epoch": 48.52,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 252684
    },
    {
      "epoch": 48.52,
      "learning_rate": 0.001,
      "loss": 2.4904,
      "step": 252696
    },
    {
      "epoch": 48.52,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 252708
    },
    {
      "epoch": 48.53,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 252720
    },
    {
      "epoch": 48.53,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 252732
    },
    {
      "epoch": 48.53,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 252744
    },
    {
      "epoch": 48.53,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 252756
    },
    {
      "epoch": 48.53,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 252768
    },
    {
      "epoch": 48.54,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 252780
    },
    {
      "epoch": 48.54,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 252792
    },
    {
      "epoch": 48.54,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 252804
    },
    {
      "epoch": 48.54,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 252816
    },
    {
      "epoch": 48.55,
      "learning_rate": 0.001,
      "loss": 2.4814,
      "step": 252828
    },
    {
      "epoch": 48.55,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 252840
    },
    {
      "epoch": 48.55,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 252852
    },
    {
      "epoch": 48.55,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 252864
    },
    {
      "epoch": 48.56,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 252876
    },
    {
      "epoch": 48.56,
      "learning_rate": 0.001,
      "loss": 2.4763,
      "step": 252888
    },
    {
      "epoch": 48.56,
      "learning_rate": 0.001,
      "loss": 2.474,
      "step": 252900
    },
    {
      "epoch": 48.56,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 252912
    },
    {
      "epoch": 48.56,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 252924
    },
    {
      "epoch": 48.57,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 252936
    },
    {
      "epoch": 48.57,
      "learning_rate": 0.001,
      "loss": 2.4963,
      "step": 252948
    },
    {
      "epoch": 48.57,
      "learning_rate": 0.001,
      "loss": 2.4779,
      "step": 252960
    },
    {
      "epoch": 48.57,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 252972
    },
    {
      "epoch": 48.58,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 252984
    },
    {
      "epoch": 48.58,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 252996
    },
    {
      "epoch": 48.58,
      "learning_rate": 0.001,
      "loss": 2.4782,
      "step": 253008
    },
    {
      "epoch": 48.58,
      "learning_rate": 0.001,
      "loss": 2.4742,
      "step": 253020
    },
    {
      "epoch": 48.59,
      "learning_rate": 0.001,
      "loss": 2.4752,
      "step": 253032
    },
    {
      "epoch": 48.59,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 253044
    },
    {
      "epoch": 48.59,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 253056
    },
    {
      "epoch": 48.59,
      "learning_rate": 0.001,
      "loss": 2.477,
      "step": 253068
    },
    {
      "epoch": 48.59,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 253080
    },
    {
      "epoch": 48.6,
      "learning_rate": 0.001,
      "loss": 2.476,
      "step": 253092
    },
    {
      "epoch": 48.6,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 253104
    },
    {
      "epoch": 48.6,
      "learning_rate": 0.001,
      "loss": 2.4679,
      "step": 253116
    },
    {
      "epoch": 48.6,
      "eval_ag_news_accuracy": 0.33071875,
      "eval_ag_news_bleu_score": 5.029018791774612,
      "eval_ag_news_bleu_score_sem": 0.16025323196700453,
      "eval_ag_news_emb_cos_sim": 0.8215942978858948,
      "eval_ag_news_emb_cos_sim_sem": 0.006803333749916387,
      "eval_ag_news_emb_top1_equal": 0.28125,
      "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.456071138381958,
      "eval_ag_news_n_ngrams_match_1": 14.636,
      "eval_ag_news_n_ngrams_match_2": 3.3,
      "eval_ag_news_n_ngrams_match_3": 0.926,
      "eval_ag_news_num_pred_words": 46.904,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.692217258245563,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3637170365856054,
      "eval_ag_news_runtime": 15.4867,
      "eval_ag_news_samples_per_second": 32.286,
      "eval_ag_news_steps_per_second": 0.065,
      "eval_ag_news_token_set_f1": 0.3642033948761011,
      "eval_ag_news_token_set_f1_sem": 0.0044956921091983135,
      "eval_ag_news_token_set_precision": 0.3522736066559957,
      "eval_ag_news_token_set_recall": 0.39160754827718897,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 253125
    },
    {
      "epoch": 48.6,
      "eval_anthropic_toxic_prompts_accuracy": 0.1160625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2493823455764965,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12203842006298647,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6819679737091064,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010042318399434084,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2215282917022705,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.304,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.978,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.51,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.066399693414002,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9921875,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2174435310219331,
      "eval_anthropic_toxic_prompts_runtime": 13.5509,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.898,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.074,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3573758256908113,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006767660534730145,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4446148497948846,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3240845534948466,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 253125
    },
    {
      "epoch": 48.6,
      "eval_arxiv_accuracy": 0.3534375,
      "eval_arxiv_bleu_score": 4.550651971693835,
      "eval_arxiv_bleu_score_sem": 0.13280610259382022,
      "eval_arxiv_emb_cos_sim": 0.786454439163208,
      "eval_arxiv_emb_cos_sim_sem": 0.007206302402707076,
      "eval_arxiv_emb_top1_equal": 0.296875,
      "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3219754695892334,
      "eval_arxiv_n_ngrams_match_1": 15.536,
      "eval_arxiv_n_ngrams_match_2": 3.094,
      "eval_arxiv_n_ngrams_match_3": 0.686,
      "eval_arxiv_num_pred_words": 40.352,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.71504674741398,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37486797961873203,
      "eval_arxiv_runtime": 16.6703,
      "eval_arxiv_samples_per_second": 29.993,
      "eval_arxiv_steps_per_second": 0.06,
      "eval_arxiv_token_set_f1": 0.36440504973071813,
      "eval_arxiv_token_set_f1_sem": 0.004194924982756976,
      "eval_arxiv_token_set_precision": 0.31769162676906054,
      "eval_arxiv_token_set_recall": 0.44514689265712953,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 253125
    },
    {
      "epoch": 48.6,
      "eval_python_code_alpaca_accuracy": 0.1634375,
      "eval_python_code_alpaca_bleu_score": 4.654156693089146,
      "eval_python_code_alpaca_bleu_score_sem": 0.13700916934822985,
      "eval_python_code_alpaca_emb_cos_sim": 0.7764959335327148,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007400505202477656,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8254692554473877,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.194,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.044,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.0,
      "eval_python_code_alpaca_num_pred_words": 44.298,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.868858933058,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34306904025356433,
      "eval_python_code_alpaca_runtime": 50.4345,
      "eval_python_code_alpaca_samples_per_second": 9.914,
      "eval_python_code_alpaca_steps_per_second": 0.02,
      "eval_python_code_alpaca_token_set_f1": 0.48697602189721617,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005473215237270407,
      "eval_python_code_alpaca_token_set_precision": 0.5592901350802559,
      "eval_python_code_alpaca_token_set_recall": 0.4544569818030442,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 253125
    },
    {
      "epoch": 48.6,
      "eval_wikibio_accuracy": 0.33134375,
      "eval_wikibio_bleu_score": 6.2896605784173705,
      "eval_wikibio_bleu_score_sem": 0.22623901816344533,
      "eval_wikibio_emb_cos_sim": 0.7492214441299438,
      "eval_wikibio_emb_cos_sim_sem": 0.009473250333750977,
      "eval_wikibio_emb_top1_equal": 0.2109375,
      "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6325645446777344,
      "eval_wikibio_n_ngrams_match_1": 10.512,
      "eval_wikibio_n_ngrams_match_2": 3.582,
      "eval_wikibio_n_ngrams_match_3": 1.37,
      "eval_wikibio_num_pred_words": 36.928,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.80965694291502,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3676459807421379,
      "eval_wikibio_runtime": 40.8156,
      "eval_wikibio_samples_per_second": 12.25,
      "eval_wikibio_steps_per_second": 0.025,
      "eval_wikibio_token_set_f1": 0.3288924977014137,
      "eval_wikibio_token_set_f1_sem": 0.005279235262117307,
      "eval_wikibio_token_set_precision": 0.33927496964631826,
      "eval_wikibio_token_set_recall": 0.33376851739270497,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 253125
    },
    {
      "epoch": 48.6,
      "eval_nq_accuracy": 0.53753125,
      "eval_nq_bleu_score": 12.373496132029944,
      "eval_nq_bleu_score_sem": 0.49336341376410425,
      "eval_nq_emb_cos_sim": 0.8392571210861206,
      "eval_nq_emb_cos_sim_sem": 0.007294463731624247,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1036689281463623,
      "eval_nq_n_ngrams_match_1": 23.858,
      "eval_nq_n_ngrams_match_2": 8.906,
      "eval_nq_n_ngrams_match_3": 4.15,
      "eval_nq_num_pred_words": 49.086,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.196186033031795,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.46387491758727206,
      "eval_nq_runtime": 13.0131,
      "eval_nq_samples_per_second": 38.423,
      "eval_nq_steps_per_second": 0.077,
      "eval_nq_token_set_f1": 0.4749037839251745,
      "eval_nq_token_set_f1_sem": 0.004927637796246293,
      "eval_nq_token_set_precision": 0.43430281628408124,
      "eval_nq_token_set_recall": 0.5315837021105687,
      "eval_nq_true_num_tokens": 64.0,
      "step": 253125
    },
    {
      "epoch": 48.6,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 253128
    },
    {
      "epoch": 48.61,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 253140
    },
    {
      "epoch": 48.61,
      "learning_rate": 0.001,
      "loss": 2.4695,
      "step": 253152
    },
    {
      "epoch": 48.61,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 253164
    },
    {
      "epoch": 48.61,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 253176
    },
    {
      "epoch": 48.62,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 253188
    },
    {
      "epoch": 48.62,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 253200
    },
    {
      "epoch": 48.62,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 253212
    },
    {
      "epoch": 48.62,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 253224
    },
    {
      "epoch": 48.62,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 253236
    },
    {
      "epoch": 48.63,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 253248
    },
    {
      "epoch": 48.63,
      "learning_rate": 0.001,
      "loss": 2.4762,
      "step": 253260
    },
    {
      "epoch": 48.63,
      "learning_rate": 0.001,
      "loss": 2.48,
      "step": 253272
    },
    {
      "epoch": 48.63,
      "learning_rate": 0.001,
      "loss": 2.4763,
      "step": 253284
    },
    {
      "epoch": 48.64,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 253296
    },
    {
      "epoch": 48.64,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 253308
    },
    {
      "epoch": 48.64,
      "learning_rate": 0.001,
      "loss": 2.4819,
      "step": 253320
    },
    {
      "epoch": 48.64,
      "learning_rate": 0.001,
      "loss": 2.4723,
      "step": 253332
    },
    {
      "epoch": 48.65,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 253344
    },
    {
      "epoch": 48.65,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 253356
    },
    {
      "epoch": 48.65,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 253368
    },
    {
      "epoch": 48.65,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 253380
    },
    {
      "epoch": 48.65,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 253392
    },
    {
      "epoch": 48.66,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 253404
    },
    {
      "epoch": 48.66,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 253416
    },
    {
      "epoch": 48.66,
      "learning_rate": 0.001,
      "loss": 2.48,
      "step": 253428
    },
    {
      "epoch": 48.66,
      "learning_rate": 0.001,
      "loss": 2.4908,
      "step": 253440
    },
    {
      "epoch": 48.67,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 253452
    },
    {
      "epoch": 48.67,
      "learning_rate": 0.001,
      "loss": 2.4782,
      "step": 253464
    },
    {
      "epoch": 48.67,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 253476
    },
    {
      "epoch": 48.67,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 253488
    },
    {
      "epoch": 48.68,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 253500
    },
    {
      "epoch": 48.68,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 253512
    },
    {
      "epoch": 48.68,
      "learning_rate": 0.001,
      "loss": 2.4915,
      "step": 253524
    },
    {
      "epoch": 48.68,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 253536
    },
    {
      "epoch": 48.68,
      "learning_rate": 0.001,
      "loss": 2.4744,
      "step": 253548
    },
    {
      "epoch": 48.69,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 253560
    },
    {
      "epoch": 48.69,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 253572
    },
    {
      "epoch": 48.69,
      "learning_rate": 0.001,
      "loss": 2.4916,
      "step": 253584
    },
    {
      "epoch": 48.69,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 253596
    },
    {
      "epoch": 48.7,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 253608
    },
    {
      "epoch": 48.7,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 253620
    },
    {
      "epoch": 48.7,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 253632
    },
    {
      "epoch": 48.7,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 253644
    },
    {
      "epoch": 48.71,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 253656
    },
    {
      "epoch": 48.71,
      "learning_rate": 0.001,
      "loss": 2.493,
      "step": 253668
    },
    {
      "epoch": 48.71,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 253680
    },
    {
      "epoch": 48.71,
      "learning_rate": 0.001,
      "loss": 2.4866,
      "step": 253692
    },
    {
      "epoch": 48.71,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 253704
    },
    {
      "epoch": 48.72,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 253716
    },
    {
      "epoch": 48.72,
      "learning_rate": 0.001,
      "loss": 2.4891,
      "step": 253728
    },
    {
      "epoch": 48.72,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 253740
    },
    {
      "epoch": 48.72,
      "eval_ag_news_accuracy": 0.3313125,
      "eval_ag_news_bleu_score": 5.128968358415727,
      "eval_ag_news_bleu_score_sem": 0.16366944669656422,
      "eval_ag_news_emb_cos_sim": 0.8214734792709351,
      "eval_ag_news_emb_cos_sim_sem": 0.006868129261145822,
      "eval_ag_news_emb_top1_equal": 0.2265625,
      "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.45040225982666,
      "eval_ag_news_n_ngrams_match_1": 14.59,
      "eval_ag_news_n_ngrams_match_2": 3.376,
      "eval_ag_news_n_ngrams_match_3": 0.95,
      "eval_ag_news_num_pred_words": 46.888,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.51306620002553,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3623553388552514,
      "eval_ag_news_runtime": 12.2011,
      "eval_ag_news_samples_per_second": 40.98,
      "eval_ag_news_steps_per_second": 0.082,
      "eval_ag_news_token_set_f1": 0.36379130812307414,
      "eval_ag_news_token_set_f1_sem": 0.004417966401312443,
      "eval_ag_news_token_set_precision": 0.34925352804666904,
      "eval_ag_news_token_set_recall": 0.3930812574418512,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 253750
    },
    {
      "epoch": 48.72,
      "eval_anthropic_toxic_prompts_accuracy": 0.1166875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.4002281031583914,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12658116029746078,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6897055506706238,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008663798376130493,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1953988075256348,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.106,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.836,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.184,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.419910594711585,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21717328956466744,
      "eval_anthropic_toxic_prompts_runtime": 11.4752,
      "eval_anthropic_toxic_prompts_samples_per_second": 43.572,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.087,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36254640208013383,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006471987774614313,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4531194103882615,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32826765874453734,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 253750
    },
    {
      "epoch": 48.72,
      "eval_arxiv_accuracy": 0.3555625,
      "eval_arxiv_bleu_score": 4.508033467346459,
      "eval_arxiv_bleu_score_sem": 0.13802875036011597,
      "eval_arxiv_emb_cos_sim": 0.7842447757720947,
      "eval_arxiv_emb_cos_sim_sem": 0.006670468444163825,
      "eval_arxiv_emb_top1_equal": 0.2734375,
      "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.308542013168335,
      "eval_arxiv_n_ngrams_match_1": 15.66,
      "eval_arxiv_n_ngrams_match_2": 3.112,
      "eval_arxiv_n_ngrams_match_3": 0.696,
      "eval_arxiv_num_pred_words": 40.804,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.3452274120723,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37419834359180304,
      "eval_arxiv_runtime": 11.3403,
      "eval_arxiv_samples_per_second": 44.091,
      "eval_arxiv_steps_per_second": 0.088,
      "eval_arxiv_token_set_f1": 0.3675089206463676,
      "eval_arxiv_token_set_f1_sem": 0.004260756206432361,
      "eval_arxiv_token_set_precision": 0.320717927771569,
      "eval_arxiv_token_set_recall": 0.4491744764609272,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 253750
    },
    {
      "epoch": 48.72,
      "eval_python_code_alpaca_accuracy": 0.16365625,
      "eval_python_code_alpaca_bleu_score": 4.753768023089755,
      "eval_python_code_alpaca_bleu_score_sem": 0.14112334567919935,
      "eval_python_code_alpaca_emb_cos_sim": 0.778369665145874,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007066850334785672,
      "eval_python_code_alpaca_emb_top1_equal": 0.1484375,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.837441921234131,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.174,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.074,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.004,
      "eval_python_code_alpaca_num_pred_words": 43.51,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.07203801383337,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3494550184851903,
      "eval_python_code_alpaca_runtime": 10.8989,
      "eval_python_code_alpaca_samples_per_second": 45.876,
      "eval_python_code_alpaca_steps_per_second": 0.092,
      "eval_python_code_alpaca_token_set_f1": 0.4892069168098178,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00532658545556864,
      "eval_python_code_alpaca_token_set_precision": 0.5602916041459619,
      "eval_python_code_alpaca_token_set_recall": 0.45590776024631197,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 253750
    },
    {
      "epoch": 48.72,
      "eval_wikibio_accuracy": 0.33384375,
      "eval_wikibio_bleu_score": 6.01514441990093,
      "eval_wikibio_bleu_score_sem": 0.2116492784977639,
      "eval_wikibio_emb_cos_sim": 0.7581247091293335,
      "eval_wikibio_emb_cos_sim_sem": 0.00824277506710895,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.57375431060791,
      "eval_wikibio_n_ngrams_match_1": 10.182,
      "eval_wikibio_n_ngrams_match_2": 3.41,
      "eval_wikibio_n_ngrams_match_3": 1.256,
      "eval_wikibio_num_pred_words": 36.986,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 35.65018408807742,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3586691305485913,
      "eval_wikibio_runtime": 11.3856,
      "eval_wikibio_samples_per_second": 43.915,
      "eval_wikibio_steps_per_second": 0.088,
      "eval_wikibio_token_set_f1": 0.3239858067130597,
      "eval_wikibio_token_set_f1_sem": 0.005147906260231054,
      "eval_wikibio_token_set_precision": 0.33242034267173876,
      "eval_wikibio_token_set_recall": 0.3335561853714311,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 253750
    },
    {
      "epoch": 48.72,
      "eval_nq_accuracy": 0.54,
      "eval_nq_bleu_score": 12.526305871999059,
      "eval_nq_bleu_score_sem": 0.49175494701577427,
      "eval_nq_emb_cos_sim": 0.8404859304428101,
      "eval_nq_emb_cos_sim_sem": 0.0073053751555770124,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1025004386901855,
      "eval_nq_n_ngrams_match_1": 23.64,
      "eval_nq_n_ngrams_match_2": 8.892,
      "eval_nq_n_ngrams_match_3": 4.208,
      "eval_nq_num_pred_words": 49.04,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.186614469296044,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.457992451046238,
      "eval_nq_runtime": 11.6494,
      "eval_nq_samples_per_second": 42.921,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.47238334226022505,
      "eval_nq_token_set_f1_sem": 0.005001456740180658,
      "eval_nq_token_set_precision": 0.431945326249031,
      "eval_nq_token_set_recall": 0.5287395962647848,
      "eval_nq_true_num_tokens": 64.0,
      "step": 253750
    },
    {
      "epoch": 48.72,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 253752
    },
    {
      "epoch": 48.73,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 253764
    },
    {
      "epoch": 48.73,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 253776
    },
    {
      "epoch": 48.73,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 253788
    },
    {
      "epoch": 48.73,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 253800
    },
    {
      "epoch": 48.74,
      "learning_rate": 0.001,
      "loss": 2.4867,
      "step": 253812
    },
    {
      "epoch": 48.74,
      "learning_rate": 0.001,
      "loss": 2.4955,
      "step": 253824
    },
    {
      "epoch": 48.74,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 253836
    },
    {
      "epoch": 48.74,
      "learning_rate": 0.001,
      "loss": 2.4698,
      "step": 253848
    },
    {
      "epoch": 48.74,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 253860
    },
    {
      "epoch": 48.75,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 253872
    },
    {
      "epoch": 48.75,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 253884
    },
    {
      "epoch": 48.75,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 253896
    },
    {
      "epoch": 48.75,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 253908
    },
    {
      "epoch": 48.76,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 253920
    },
    {
      "epoch": 48.76,
      "learning_rate": 0.001,
      "loss": 2.4874,
      "step": 253932
    },
    {
      "epoch": 48.76,
      "learning_rate": 0.001,
      "loss": 2.4777,
      "step": 253944
    },
    {
      "epoch": 48.76,
      "learning_rate": 0.001,
      "loss": 2.4949,
      "step": 253956
    },
    {
      "epoch": 48.76,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 253968
    },
    {
      "epoch": 48.77,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 253980
    },
    {
      "epoch": 48.77,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 253992
    },
    {
      "epoch": 48.77,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 254004
    },
    {
      "epoch": 48.77,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 254016
    },
    {
      "epoch": 48.78,
      "learning_rate": 0.001,
      "loss": 2.4958,
      "step": 254028
    },
    {
      "epoch": 48.78,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 254040
    },
    {
      "epoch": 48.78,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 254052
    },
    {
      "epoch": 48.78,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 254064
    },
    {
      "epoch": 48.79,
      "learning_rate": 0.001,
      "loss": 2.4918,
      "step": 254076
    },
    {
      "epoch": 48.79,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 254088
    },
    {
      "epoch": 48.79,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 254100
    },
    {
      "epoch": 48.79,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 254112
    },
    {
      "epoch": 48.79,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 254124
    },
    {
      "epoch": 48.8,
      "learning_rate": 0.001,
      "loss": 2.4921,
      "step": 254136
    },
    {
      "epoch": 48.8,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 254148
    },
    {
      "epoch": 48.8,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 254160
    },
    {
      "epoch": 48.8,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 254172
    },
    {
      "epoch": 48.81,
      "learning_rate": 0.001,
      "loss": 2.4878,
      "step": 254184
    },
    {
      "epoch": 48.81,
      "learning_rate": 0.001,
      "loss": 2.4907,
      "step": 254196
    },
    {
      "epoch": 48.81,
      "learning_rate": 0.001,
      "loss": 2.4877,
      "step": 254208
    },
    {
      "epoch": 48.81,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 254220
    },
    {
      "epoch": 48.82,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 254232
    },
    {
      "epoch": 48.82,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 254244
    },
    {
      "epoch": 48.82,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 254256
    },
    {
      "epoch": 48.82,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 254268
    },
    {
      "epoch": 48.82,
      "learning_rate": 0.001,
      "loss": 2.4925,
      "step": 254280
    },
    {
      "epoch": 48.83,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 254292
    },
    {
      "epoch": 48.83,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 254304
    },
    {
      "epoch": 48.83,
      "learning_rate": 0.001,
      "loss": 2.4852,
      "step": 254316
    },
    {
      "epoch": 48.83,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 254328
    },
    {
      "epoch": 48.84,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 254340
    },
    {
      "epoch": 48.84,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 254352
    },
    {
      "epoch": 48.84,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 254364
    },
    {
      "epoch": 48.84,
      "eval_ag_news_accuracy": 0.33259375,
      "eval_ag_news_bleu_score": 5.116390845398218,
      "eval_ag_news_bleu_score_sem": 0.16607483030992765,
      "eval_ag_news_emb_cos_sim": 0.8242052793502808,
      "eval_ag_news_emb_cos_sim_sem": 0.006755547126239128,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4539828300476074,
      "eval_ag_news_n_ngrams_match_1": 14.506,
      "eval_ag_news_n_ngrams_match_2": 3.362,
      "eval_ag_news_n_ngrams_match_3": 0.998,
      "eval_ag_news_num_pred_words": 46.708,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.626103194113817,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3621275434508225,
      "eval_ag_news_runtime": 21.7164,
      "eval_ag_news_samples_per_second": 23.024,
      "eval_ag_news_steps_per_second": 0.046,
      "eval_ag_news_token_set_f1": 0.3609943472548123,
      "eval_ag_news_token_set_f1_sem": 0.004383250753844021,
      "eval_ag_news_token_set_precision": 0.3462977448534223,
      "eval_ag_news_token_set_recall": 0.39312052325829416,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 254375
    },
    {
      "epoch": 48.84,
      "eval_anthropic_toxic_prompts_accuracy": 0.11665625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2670446914870928,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12414597670877574,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6821401715278625,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009851487347544376,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.2194252014160156,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.376,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.948,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 25.013738187068732,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21795243524092545,
      "eval_anthropic_toxic_prompts_runtime": 11.1921,
      "eval_anthropic_toxic_prompts_samples_per_second": 44.674,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.089,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3576999860582102,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006463571248950621,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4497737653146231,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3212514680656723,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 254375
    },
    {
      "epoch": 48.84,
      "eval_arxiv_accuracy": 0.3536875,
      "eval_arxiv_bleu_score": 4.522572548241414,
      "eval_arxiv_bleu_score_sem": 0.1386361293207542,
      "eval_arxiv_emb_cos_sim": 0.7847319841384888,
      "eval_arxiv_emb_cos_sim_sem": 0.007399415969861453,
      "eval_arxiv_emb_top1_equal": 0.2578125,
      "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3165972232818604,
      "eval_arxiv_n_ngrams_match_1": 15.6,
      "eval_arxiv_n_ngrams_match_2": 3.14,
      "eval_arxiv_n_ngrams_match_3": 0.75,
      "eval_arxiv_num_pred_words": 40.546,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.56638851820688,
      "eval_arxiv_pred_num_tokens": 62.96875,
      "eval_arxiv_rouge_score": 0.37230089354554563,
      "eval_arxiv_runtime": 16.4873,
      "eval_arxiv_samples_per_second": 30.326,
      "eval_arxiv_steps_per_second": 0.061,
      "eval_arxiv_token_set_f1": 0.36531037581461423,
      "eval_arxiv_token_set_f1_sem": 0.004353938343634288,
      "eval_arxiv_token_set_precision": 0.3176507963641678,
      "eval_arxiv_token_set_recall": 0.45086158988561276,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 254375
    },
    {
      "epoch": 48.84,
      "eval_python_code_alpaca_accuracy": 0.16309375,
      "eval_python_code_alpaca_bleu_score": 4.8138126980068465,
      "eval_python_code_alpaca_bleu_score_sem": 0.1538142144557821,
      "eval_python_code_alpaca_emb_cos_sim": 0.77247154712677,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007147478573076028,
      "eval_python_code_alpaca_emb_top1_equal": 0.1875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.809692144393921,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.982,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.006,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.052,
      "eval_python_code_alpaca_num_pred_words": 43.218,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.60480554936888,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3493759692055528,
      "eval_python_code_alpaca_runtime": 11.6555,
      "eval_python_code_alpaca_samples_per_second": 42.898,
      "eval_python_code_alpaca_steps_per_second": 0.086,
      "eval_python_code_alpaca_token_set_f1": 0.4826573502048842,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005559391738856014,
      "eval_python_code_alpaca_token_set_precision": 0.5483371128134638,
      "eval_python_code_alpaca_token_set_recall": 0.45333285934503614,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 254375
    },
    {
      "epoch": 48.84,
      "eval_wikibio_accuracy": 0.33096875,
      "eval_wikibio_bleu_score": 6.045653172373738,
      "eval_wikibio_bleu_score_sem": 0.21311411770047786,
      "eval_wikibio_emb_cos_sim": 0.7322331070899963,
      "eval_wikibio_emb_cos_sim_sem": 0.009280145986219712,
      "eval_wikibio_emb_top1_equal": 0.21875,
      "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.5873866081237793,
      "eval_wikibio_n_ngrams_match_1": 9.956,
      "eval_wikibio_n_ngrams_match_2": 3.374,
      "eval_wikibio_n_ngrams_match_3": 1.212,
      "eval_wikibio_num_pred_words": 35.502,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.13950571514363,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35297284421981323,
      "eval_wikibio_runtime": 11.8409,
      "eval_wikibio_samples_per_second": 42.226,
      "eval_wikibio_steps_per_second": 0.084,
      "eval_wikibio_token_set_f1": 0.3200043641537511,
      "eval_wikibio_token_set_f1_sem": 0.005632981453349366,
      "eval_wikibio_token_set_precision": 0.32535151519833405,
      "eval_wikibio_token_set_recall": 0.3329054724100076,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 254375
    },
    {
      "epoch": 48.84,
      "eval_nq_accuracy": 0.53865625,
      "eval_nq_bleu_score": 12.199795292352317,
      "eval_nq_bleu_score_sem": 0.46594130850674187,
      "eval_nq_emb_cos_sim": 0.8400046825408936,
      "eval_nq_emb_cos_sim_sem": 0.007104173344775592,
      "eval_nq_emb_top1_equal": 0.3359375,
      "eval_nq_emb_top1_equal_sem": 0.04191137143408563,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1046581268310547,
      "eval_nq_n_ngrams_match_1": 23.63,
      "eval_nq_n_ngrams_match_2": 8.756,
      "eval_nq_n_ngrams_match_3": 4.078,
      "eval_nq_num_pred_words": 49.026,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.204297700839279,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4593181281504545,
      "eval_nq_runtime": 11.844,
      "eval_nq_samples_per_second": 42.215,
      "eval_nq_steps_per_second": 0.084,
      "eval_nq_token_set_f1": 0.470378723385126,
      "eval_nq_token_set_f1_sem": 0.004948314345017564,
      "eval_nq_token_set_precision": 0.43025516421183024,
      "eval_nq_token_set_recall": 0.526909176280272,
      "eval_nq_true_num_tokens": 64.0,
      "step": 254375
    },
    {
      "epoch": 48.84,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 254376
    },
    {
      "epoch": 48.85,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 254388
    },
    {
      "epoch": 48.85,
      "learning_rate": 0.001,
      "loss": 2.4946,
      "step": 254400
    },
    {
      "epoch": 48.85,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 254412
    },
    {
      "epoch": 48.85,
      "learning_rate": 0.001,
      "loss": 2.4957,
      "step": 254424
    },
    {
      "epoch": 48.85,
      "learning_rate": 0.001,
      "loss": 2.4964,
      "step": 254436
    },
    {
      "epoch": 48.86,
      "learning_rate": 0.001,
      "loss": 2.4998,
      "step": 254448
    },
    {
      "epoch": 48.86,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 254460
    },
    {
      "epoch": 48.86,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 254472
    },
    {
      "epoch": 48.86,
      "learning_rate": 0.001,
      "loss": 2.4986,
      "step": 254484
    },
    {
      "epoch": 48.87,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 254496
    },
    {
      "epoch": 48.87,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 254508
    },
    {
      "epoch": 48.87,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 254520
    },
    {
      "epoch": 48.87,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 254532
    },
    {
      "epoch": 48.88,
      "learning_rate": 0.001,
      "loss": 2.4741,
      "step": 254544
    },
    {
      "epoch": 48.88,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 254556
    },
    {
      "epoch": 48.88,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 254568
    },
    {
      "epoch": 48.88,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 254580
    },
    {
      "epoch": 48.88,
      "learning_rate": 0.001,
      "loss": 2.492,
      "step": 254592
    },
    {
      "epoch": 48.89,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 254604
    },
    {
      "epoch": 48.89,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 254616
    },
    {
      "epoch": 48.89,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 254628
    },
    {
      "epoch": 48.89,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 254640
    },
    {
      "epoch": 48.9,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 254652
    },
    {
      "epoch": 48.9,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 254664
    },
    {
      "epoch": 48.9,
      "learning_rate": 0.001,
      "loss": 2.4928,
      "step": 254676
    },
    {
      "epoch": 48.9,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 254688
    },
    {
      "epoch": 48.91,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 254700
    },
    {
      "epoch": 48.91,
      "learning_rate": 0.001,
      "loss": 2.5007,
      "step": 254712
    },
    {
      "epoch": 48.91,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 254724
    },
    {
      "epoch": 48.91,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 254736
    },
    {
      "epoch": 48.91,
      "learning_rate": 0.001,
      "loss": 2.4911,
      "step": 254748
    },
    {
      "epoch": 48.92,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 254760
    },
    {
      "epoch": 48.92,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 254772
    },
    {
      "epoch": 48.92,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 254784
    },
    {
      "epoch": 48.92,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 254796
    },
    {
      "epoch": 48.93,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 254808
    },
    {
      "epoch": 48.93,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 254820
    },
    {
      "epoch": 48.93,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 254832
    },
    {
      "epoch": 48.93,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 254844
    },
    {
      "epoch": 48.94,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 254856
    },
    {
      "epoch": 48.94,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 254868
    },
    {
      "epoch": 48.94,
      "learning_rate": 0.001,
      "loss": 2.4952,
      "step": 254880
    },
    {
      "epoch": 48.94,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 254892
    },
    {
      "epoch": 48.94,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 254904
    },
    {
      "epoch": 48.95,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 254916
    },
    {
      "epoch": 48.95,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 254928
    },
    {
      "epoch": 48.95,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 254940
    },
    {
      "epoch": 48.95,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 254952
    },
    {
      "epoch": 48.96,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 254964
    },
    {
      "epoch": 48.96,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 254976
    },
    {
      "epoch": 48.96,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 254988
    },
    {
      "epoch": 48.96,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 255000
    },
    {
      "epoch": 48.96,
      "eval_ag_news_accuracy": 0.33328125,
      "eval_ag_news_bleu_score": 5.085752596456417,
      "eval_ag_news_bleu_score_sem": 0.16053073564002043,
      "eval_ag_news_emb_cos_sim": 0.823728621006012,
      "eval_ag_news_emb_cos_sim_sem": 0.0070037736247220705,
      "eval_ag_news_emb_top1_equal": 0.296875,
      "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.454833507537842,
      "eval_ag_news_n_ngrams_match_1": 14.626,
      "eval_ag_news_n_ngrams_match_2": 3.326,
      "eval_ag_news_n_ngrams_match_3": 0.932,
      "eval_ag_news_num_pred_words": 46.364,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.653018254599836,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3645419732953368,
      "eval_ag_news_runtime": 12.7888,
      "eval_ag_news_samples_per_second": 39.097,
      "eval_ag_news_steps_per_second": 0.078,
      "eval_ag_news_token_set_f1": 0.3638430978439053,
      "eval_ag_news_token_set_f1_sem": 0.004393774169987551,
      "eval_ag_news_token_set_precision": 0.3491185340997753,
      "eval_ag_news_token_set_recall": 0.3948944478224903,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 255000
    },
    {
      "epoch": 48.96,
      "eval_anthropic_toxic_prompts_accuracy": 0.11721875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2375065412788535,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1186138332351007,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6916943788528442,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008727303798668791,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1852755546569824,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.314,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.046,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.896,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.173948732311807,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21522095019795162,
      "eval_anthropic_toxic_prompts_runtime": 10.923,
      "eval_anthropic_toxic_prompts_samples_per_second": 45.775,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.092,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36031047607084526,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006670312847901974,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4415682902984979,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3310342872590095,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 255000
    },
    {
      "epoch": 48.96,
      "eval_arxiv_accuracy": 0.35359375,
      "eval_arxiv_bleu_score": 4.6744568097240755,
      "eval_arxiv_bleu_score_sem": 0.13816459704929035,
      "eval_arxiv_emb_cos_sim": 0.7932637929916382,
      "eval_arxiv_emb_cos_sim_sem": 0.005929044134217957,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3140530586242676,
      "eval_arxiv_n_ngrams_match_1": 15.706,
      "eval_arxiv_n_ngrams_match_2": 3.182,
      "eval_arxiv_n_ngrams_match_3": 0.752,
      "eval_arxiv_num_pred_words": 40.334,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.496344226688596,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3779389021059821,
      "eval_arxiv_runtime": 12.3162,
      "eval_arxiv_samples_per_second": 40.597,
      "eval_arxiv_steps_per_second": 0.081,
      "eval_arxiv_token_set_f1": 0.36883365446540173,
      "eval_arxiv_token_set_f1_sem": 0.004326562442193468,
      "eval_arxiv_token_set_precision": 0.3222304183839358,
      "eval_arxiv_token_set_recall": 0.4513081623526788,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 255000
    },
    {
      "epoch": 48.96,
      "eval_python_code_alpaca_accuracy": 0.16196875,
      "eval_python_code_alpaca_bleu_score": 4.733534700409421,
      "eval_python_code_alpaca_bleu_score_sem": 0.14786379444792305,
      "eval_python_code_alpaca_emb_cos_sim": 0.7603433132171631,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007492076474857161,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.855957508087158,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.898,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.944,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.006,
      "eval_python_code_alpaca_num_pred_words": 42.93,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.391081338965936,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3439203879910345,
      "eval_python_code_alpaca_runtime": 13.0871,
      "eval_python_code_alpaca_samples_per_second": 38.206,
      "eval_python_code_alpaca_steps_per_second": 0.076,
      "eval_python_code_alpaca_token_set_f1": 0.4798061564333848,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005485128761968899,
      "eval_python_code_alpaca_token_set_precision": 0.5429240085197976,
      "eval_python_code_alpaca_token_set_recall": 0.4544728813680131,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 255000
    },
    {
      "epoch": 48.96,
      "eval_wikibio_accuracy": 0.33103125,
      "eval_wikibio_bleu_score": 6.146821760889383,
      "eval_wikibio_bleu_score_sem": 0.21852813109526867,
      "eval_wikibio_emb_cos_sim": 0.7664906978607178,
      "eval_wikibio_emb_cos_sim_sem": 0.0083355132803566,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.584001064300537,
      "eval_wikibio_n_ngrams_match_1": 10.072,
      "eval_wikibio_n_ngrams_match_2": 3.35,
      "eval_wikibio_n_ngrams_match_3": 1.264,
      "eval_wikibio_num_pred_words": 35.88,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.017360715087804,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3561500289438334,
      "eval_wikibio_runtime": 11.5179,
      "eval_wikibio_samples_per_second": 43.411,
      "eval_wikibio_steps_per_second": 0.087,
      "eval_wikibio_token_set_f1": 0.3210639942421022,
      "eval_wikibio_token_set_f1_sem": 0.005447391853149522,
      "eval_wikibio_token_set_precision": 0.32915487126781606,
      "eval_wikibio_token_set_recall": 0.3292660788137592,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 255000
    },
    {
      "epoch": 48.96,
      "eval_nq_accuracy": 0.5411875,
      "eval_nq_bleu_score": 12.414931991591992,
      "eval_nq_bleu_score_sem": 0.497635814101261,
      "eval_nq_emb_cos_sim": 0.8404495716094971,
      "eval_nq_emb_cos_sim_sem": 0.006607508296478137,
      "eval_nq_emb_top1_equal": 0.2890625,
      "eval_nq_emb_top1_equal_sem": 0.04022626667363519,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1004555225372314,
      "eval_nq_n_ngrams_match_1": 23.684,
      "eval_nq_n_ngrams_match_2": 8.868,
      "eval_nq_n_ngrams_match_3": 4.182,
      "eval_nq_num_pred_words": 49.044,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.169890634377667,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45974699431206145,
      "eval_nq_runtime": 15.948,
      "eval_nq_samples_per_second": 31.352,
      "eval_nq_steps_per_second": 0.063,
      "eval_nq_token_set_f1": 0.47324117221861284,
      "eval_nq_token_set_f1_sem": 0.005170891797397959,
      "eval_nq_token_set_precision": 0.431889991612914,
      "eval_nq_token_set_recall": 0.5309646132809431,
      "eval_nq_true_num_tokens": 64.0,
      "step": 255000
    },
    {
      "epoch": 48.97,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 255012
    },
    {
      "epoch": 48.97,
      "learning_rate": 0.001,
      "loss": 2.4865,
      "step": 255024
    },
    {
      "epoch": 48.97,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 255036
    },
    {
      "epoch": 48.97,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 255048
    },
    {
      "epoch": 48.97,
      "learning_rate": 0.001,
      "loss": 2.4807,
      "step": 255060
    },
    {
      "epoch": 48.98,
      "learning_rate": 0.001,
      "loss": 2.4985,
      "step": 255072
    },
    {
      "epoch": 48.98,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 255084
    },
    {
      "epoch": 48.98,
      "learning_rate": 0.001,
      "loss": 2.5027,
      "step": 255096
    },
    {
      "epoch": 48.98,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 255108
    },
    {
      "epoch": 48.99,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 255120
    },
    {
      "epoch": 48.99,
      "learning_rate": 0.001,
      "loss": 2.4716,
      "step": 255132
    },
    {
      "epoch": 48.99,
      "learning_rate": 0.001,
      "loss": 2.4967,
      "step": 255144
    },
    {
      "epoch": 48.99,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 255156
    },
    {
      "epoch": 49.0,
      "learning_rate": 0.001,
      "loss": 2.4861,
      "step": 255168
    },
    {
      "epoch": 49.0,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 255180
    },
    {
      "epoch": 49.0,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 255192
    },
    {
      "epoch": 49.0,
      "learning_rate": 0.001,
      "loss": 2.4663,
      "step": 255204
    },
    {
      "epoch": 49.0,
      "learning_rate": 0.001,
      "loss": 2.4718,
      "step": 255216
    },
    {
      "epoch": 49.01,
      "learning_rate": 0.001,
      "loss": 2.4728,
      "step": 255228
    },
    {
      "epoch": 49.01,
      "learning_rate": 0.001,
      "loss": 2.4671,
      "step": 255240
    },
    {
      "epoch": 49.01,
      "learning_rate": 0.001,
      "loss": 2.4731,
      "step": 255252
    },
    {
      "epoch": 49.01,
      "learning_rate": 0.001,
      "loss": 2.4735,
      "step": 255264
    },
    {
      "epoch": 49.02,
      "learning_rate": 0.001,
      "loss": 2.4709,
      "step": 255276
    },
    {
      "epoch": 49.02,
      "learning_rate": 0.001,
      "loss": 2.4797,
      "step": 255288
    },
    {
      "epoch": 49.02,
      "learning_rate": 0.001,
      "loss": 2.4698,
      "step": 255300
    },
    {
      "epoch": 49.02,
      "learning_rate": 0.001,
      "loss": 2.4625,
      "step": 255312
    },
    {
      "epoch": 49.03,
      "learning_rate": 0.001,
      "loss": 2.4648,
      "step": 255324
    },
    {
      "epoch": 49.03,
      "learning_rate": 0.001,
      "loss": 2.474,
      "step": 255336
    },
    {
      "epoch": 49.03,
      "learning_rate": 0.001,
      "loss": 2.4684,
      "step": 255348
    },
    {
      "epoch": 49.03,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 255360
    },
    {
      "epoch": 49.03,
      "learning_rate": 0.001,
      "loss": 2.4724,
      "step": 255372
    },
    {
      "epoch": 49.04,
      "learning_rate": 0.001,
      "loss": 2.4723,
      "step": 255384
    },
    {
      "epoch": 49.04,
      "learning_rate": 0.001,
      "loss": 2.476,
      "step": 255396
    },
    {
      "epoch": 49.04,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 255408
    },
    {
      "epoch": 49.04,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 255420
    },
    {
      "epoch": 49.05,
      "learning_rate": 0.001,
      "loss": 2.4744,
      "step": 255432
    },
    {
      "epoch": 49.05,
      "learning_rate": 0.001,
      "loss": 2.4734,
      "step": 255444
    },
    {
      "epoch": 49.05,
      "learning_rate": 0.001,
      "loss": 2.4715,
      "step": 255456
    },
    {
      "epoch": 49.05,
      "learning_rate": 0.001,
      "loss": 2.4721,
      "step": 255468
    },
    {
      "epoch": 49.06,
      "learning_rate": 0.001,
      "loss": 2.4661,
      "step": 255480
    },
    {
      "epoch": 49.06,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 255492
    },
    {
      "epoch": 49.06,
      "learning_rate": 0.001,
      "loss": 2.4758,
      "step": 255504
    },
    {
      "epoch": 49.06,
      "learning_rate": 0.001,
      "loss": 2.4684,
      "step": 255516
    },
    {
      "epoch": 49.06,
      "learning_rate": 0.001,
      "loss": 2.4705,
      "step": 255528
    },
    {
      "epoch": 49.07,
      "learning_rate": 0.001,
      "loss": 2.4683,
      "step": 255540
    },
    {
      "epoch": 49.07,
      "learning_rate": 0.001,
      "loss": 2.4738,
      "step": 255552
    },
    {
      "epoch": 49.07,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 255564
    },
    {
      "epoch": 49.07,
      "learning_rate": 0.001,
      "loss": 2.4771,
      "step": 255576
    },
    {
      "epoch": 49.08,
      "learning_rate": 0.001,
      "loss": 2.4737,
      "step": 255588
    },
    {
      "epoch": 49.08,
      "learning_rate": 0.001,
      "loss": 2.4719,
      "step": 255600
    },
    {
      "epoch": 49.08,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 255612
    },
    {
      "epoch": 49.08,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 255624
    },
    {
      "epoch": 49.08,
      "eval_ag_news_accuracy": 0.3303125,
      "eval_ag_news_bleu_score": 5.100494217804903,
      "eval_ag_news_bleu_score_sem": 0.1571130840546052,
      "eval_ag_news_emb_cos_sim": 0.8211169838905334,
      "eval_ag_news_emb_cos_sim_sem": 0.006613904080739665,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.455061674118042,
      "eval_ag_news_n_ngrams_match_1": 14.588,
      "eval_ag_news_n_ngrams_match_2": 3.402,
      "eval_ag_news_n_ngrams_match_3": 0.996,
      "eval_ag_news_num_pred_words": 46.588,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.66024123951855,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36327022297672823,
      "eval_ag_news_runtime": 11.8886,
      "eval_ag_news_samples_per_second": 42.057,
      "eval_ag_news_steps_per_second": 0.084,
      "eval_ag_news_token_set_f1": 0.36385899409076766,
      "eval_ag_news_token_set_f1_sem": 0.004306569554235365,
      "eval_ag_news_token_set_precision": 0.3495863481706578,
      "eval_ag_news_token_set_recall": 0.39296495283245336,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 255625
    },
    {
      "epoch": 49.08,
      "eval_anthropic_toxic_prompts_accuracy": 0.11721875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.275562210930755,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12351720417569237,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6904870271682739,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008971926741604052,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.17292857170105,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.488,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.754,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.21,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.87730847395135,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 62.953125,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21946768835170108,
      "eval_anthropic_toxic_prompts_runtime": 12.004,
      "eval_anthropic_toxic_prompts_samples_per_second": 41.653,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.083,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3602042240177149,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006441531191770978,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.455426196114472,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3248023174474306,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 255625
    },
    {
      "epoch": 49.08,
      "eval_arxiv_accuracy": 0.35478125,
      "eval_arxiv_bleu_score": 4.629254039295774,
      "eval_arxiv_bleu_score_sem": 0.13831173033533106,
      "eval_arxiv_emb_cos_sim": 0.7929853200912476,
      "eval_arxiv_emb_cos_sim_sem": 0.0063699120234430515,
      "eval_arxiv_emb_top1_equal": 0.265625,
      "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3138437271118164,
      "eval_arxiv_n_ngrams_match_1": 16.01,
      "eval_arxiv_n_ngrams_match_2": 3.21,
      "eval_arxiv_n_ngrams_match_3": 0.732,
      "eval_arxiv_num_pred_words": 41.314,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.49058897776324,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.38063996866515315,
      "eval_arxiv_runtime": 12.0681,
      "eval_arxiv_samples_per_second": 41.431,
      "eval_arxiv_steps_per_second": 0.083,
      "eval_arxiv_token_set_f1": 0.37258514545758065,
      "eval_arxiv_token_set_f1_sem": 0.004185401543605356,
      "eval_arxiv_token_set_precision": 0.32784967190899983,
      "eval_arxiv_token_set_recall": 0.44341501940445516,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 255625
    },
    {
      "epoch": 49.08,
      "eval_python_code_alpaca_accuracy": 0.16346875,
      "eval_python_code_alpaca_bleu_score": 4.906324059278739,
      "eval_python_code_alpaca_bleu_score_sem": 0.16284075585793573,
      "eval_python_code_alpaca_emb_cos_sim": 0.7793626189231873,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00683757740578698,
      "eval_python_code_alpaca_emb_top1_equal": 0.15625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8239521980285645,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.236,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.068,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.114,
      "eval_python_code_alpaca_num_pred_words": 44.132,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.84328730718324,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.35001021614155914,
      "eval_python_code_alpaca_runtime": 11.173,
      "eval_python_code_alpaca_samples_per_second": 44.751,
      "eval_python_code_alpaca_steps_per_second": 0.09,
      "eval_python_code_alpaca_token_set_f1": 0.4887993173008514,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005267455232967129,
      "eval_python_code_alpaca_token_set_precision": 0.5630063929275726,
      "eval_python_code_alpaca_token_set_recall": 0.44813212032920374,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 255625
    },
    {
      "epoch": 49.08,
      "eval_wikibio_accuracy": 0.334,
      "eval_wikibio_bleu_score": 6.109185382063926,
      "eval_wikibio_bleu_score_sem": 0.20418145808930296,
      "eval_wikibio_emb_cos_sim": 0.7559391260147095,
      "eval_wikibio_emb_cos_sim_sem": 0.008285049673254392,
      "eval_wikibio_emb_top1_equal": 0.171875,
      "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6052134037017822,
      "eval_wikibio_n_ngrams_match_1": 10.216,
      "eval_wikibio_n_ngrams_match_2": 3.42,
      "eval_wikibio_n_ngrams_match_3": 1.262,
      "eval_wikibio_num_pred_words": 35.988,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.789534042336584,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3625460417161077,
      "eval_wikibio_runtime": 11.6569,
      "eval_wikibio_samples_per_second": 42.893,
      "eval_wikibio_steps_per_second": 0.086,
      "eval_wikibio_token_set_f1": 0.3287405463006294,
      "eval_wikibio_token_set_f1_sem": 0.005024926415774864,
      "eval_wikibio_token_set_precision": 0.3349647816156045,
      "eval_wikibio_token_set_recall": 0.3411954359495211,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 255625
    },
    {
      "epoch": 49.08,
      "eval_nq_accuracy": 0.539625,
      "eval_nq_bleu_score": 12.596793891130252,
      "eval_nq_bleu_score_sem": 0.5054312577362579,
      "eval_nq_emb_cos_sim": 0.8442134857177734,
      "eval_nq_emb_cos_sim_sem": 0.006530488882091526,
      "eval_nq_emb_top1_equal": 0.2734375,
      "eval_nq_emb_top1_equal_sem": 0.03955156411760461,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1003105640411377,
      "eval_nq_n_ngrams_match_1": 23.644,
      "eval_nq_n_ngrams_match_2": 8.9,
      "eval_nq_n_ngrams_match_3": 4.272,
      "eval_nq_num_pred_words": 48.948,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.168706425150726,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4600487435592311,
      "eval_nq_runtime": 11.601,
      "eval_nq_samples_per_second": 43.1,
      "eval_nq_steps_per_second": 0.086,
      "eval_nq_token_set_f1": 0.4725121750221839,
      "eval_nq_token_set_f1_sem": 0.0051769751972101105,
      "eval_nq_token_set_precision": 0.43366092015912555,
      "eval_nq_token_set_recall": 0.5262507692564834,
      "eval_nq_true_num_tokens": 64.0,
      "step": 255625
    },
    {
      "epoch": 49.09,
      "learning_rate": 0.001,
      "loss": 2.4712,
      "step": 255636
    },
    {
      "epoch": 49.09,
      "learning_rate": 0.001,
      "loss": 2.4722,
      "step": 255648
    },
    {
      "epoch": 49.09,
      "learning_rate": 0.001,
      "loss": 2.479,
      "step": 255660
    },
    {
      "epoch": 49.09,
      "learning_rate": 0.001,
      "loss": 2.4771,
      "step": 255672
    },
    {
      "epoch": 49.09,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 255684
    },
    {
      "epoch": 49.1,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 255696
    },
    {
      "epoch": 49.1,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 255708
    },
    {
      "epoch": 49.1,
      "learning_rate": 0.001,
      "loss": 2.4775,
      "step": 255720
    },
    {
      "epoch": 49.1,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 255732
    },
    {
      "epoch": 49.11,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 255744
    },
    {
      "epoch": 49.11,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 255756
    },
    {
      "epoch": 49.11,
      "learning_rate": 0.001,
      "loss": 2.4757,
      "step": 255768
    },
    {
      "epoch": 49.11,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 255780
    },
    {
      "epoch": 49.12,
      "learning_rate": 0.001,
      "loss": 2.4806,
      "step": 255792
    },
    {
      "epoch": 49.12,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 255804
    },
    {
      "epoch": 49.12,
      "learning_rate": 0.001,
      "loss": 2.4719,
      "step": 255816
    },
    {
      "epoch": 49.12,
      "learning_rate": 0.001,
      "loss": 2.4788,
      "step": 255828
    },
    {
      "epoch": 49.12,
      "learning_rate": 0.001,
      "loss": 2.4759,
      "step": 255840
    },
    {
      "epoch": 49.13,
      "learning_rate": 0.001,
      "loss": 2.4758,
      "step": 255852
    },
    {
      "epoch": 49.13,
      "learning_rate": 0.001,
      "loss": 2.4688,
      "step": 255864
    },
    {
      "epoch": 49.13,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 255876
    },
    {
      "epoch": 49.13,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 255888
    },
    {
      "epoch": 49.14,
      "learning_rate": 0.001,
      "loss": 2.4724,
      "step": 255900
    },
    {
      "epoch": 49.14,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 255912
    },
    {
      "epoch": 49.14,
      "learning_rate": 0.001,
      "loss": 2.4724,
      "step": 255924
    },
    {
      "epoch": 49.14,
      "learning_rate": 0.001,
      "loss": 2.4847,
      "step": 255936
    },
    {
      "epoch": 49.15,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 255948
    },
    {
      "epoch": 49.15,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 255960
    },
    {
      "epoch": 49.15,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 255972
    },
    {
      "epoch": 49.15,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 255984
    },
    {
      "epoch": 49.15,
      "learning_rate": 0.001,
      "loss": 2.4678,
      "step": 255996
    },
    {
      "epoch": 49.16,
      "learning_rate": 0.001,
      "loss": 2.4643,
      "step": 256008
    },
    {
      "epoch": 49.16,
      "learning_rate": 0.001,
      "loss": 2.4641,
      "step": 256020
    },
    {
      "epoch": 49.16,
      "learning_rate": 0.001,
      "loss": 2.4667,
      "step": 256032
    },
    {
      "epoch": 49.16,
      "learning_rate": 0.001,
      "loss": 2.474,
      "step": 256044
    },
    {
      "epoch": 49.17,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 256056
    },
    {
      "epoch": 49.17,
      "learning_rate": 0.001,
      "loss": 2.4708,
      "step": 256068
    },
    {
      "epoch": 49.17,
      "learning_rate": 0.001,
      "loss": 2.4842,
      "step": 256080
    },
    {
      "epoch": 49.17,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 256092
    },
    {
      "epoch": 49.18,
      "learning_rate": 0.001,
      "loss": 2.4721,
      "step": 256104
    },
    {
      "epoch": 49.18,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 256116
    },
    {
      "epoch": 49.18,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 256128
    },
    {
      "epoch": 49.18,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 256140
    },
    {
      "epoch": 49.18,
      "learning_rate": 0.001,
      "loss": 2.4758,
      "step": 256152
    },
    {
      "epoch": 49.19,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 256164
    },
    {
      "epoch": 49.19,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 256176
    },
    {
      "epoch": 49.19,
      "learning_rate": 0.001,
      "loss": 2.4713,
      "step": 256188
    },
    {
      "epoch": 49.19,
      "learning_rate": 0.001,
      "loss": 2.4827,
      "step": 256200
    },
    {
      "epoch": 49.2,
      "learning_rate": 0.001,
      "loss": 2.4864,
      "step": 256212
    },
    {
      "epoch": 49.2,
      "learning_rate": 0.001,
      "loss": 2.4741,
      "step": 256224
    },
    {
      "epoch": 49.2,
      "learning_rate": 0.001,
      "loss": 2.4691,
      "step": 256236
    },
    {
      "epoch": 49.2,
      "learning_rate": 0.001,
      "loss": 2.4699,
      "step": 256248
    },
    {
      "epoch": 49.2,
      "eval_ag_news_accuracy": 0.33046875,
      "eval_ag_news_bleu_score": 5.043217042776179,
      "eval_ag_news_bleu_score_sem": 0.16174832329162295,
      "eval_ag_news_emb_cos_sim": 0.8152117133140564,
      "eval_ag_news_emb_cos_sim_sem": 0.008054775756603568,
      "eval_ag_news_emb_top1_equal": 0.2734375,
      "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.45247745513916,
      "eval_ag_news_n_ngrams_match_1": 14.53,
      "eval_ag_news_n_ngrams_match_2": 3.302,
      "eval_ag_news_n_ngrams_match_3": 0.922,
      "eval_ag_news_num_pred_words": 46.728,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.578529868742635,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3640949688473055,
      "eval_ag_news_runtime": 14.2536,
      "eval_ag_news_samples_per_second": 35.079,
      "eval_ag_news_steps_per_second": 0.07,
      "eval_ag_news_token_set_f1": 0.3626868877968545,
      "eval_ag_news_token_set_f1_sem": 0.004380139390985463,
      "eval_ag_news_token_set_precision": 0.3481912033393453,
      "eval_ag_news_token_set_recall": 0.3915673255987398,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 256250
    },
    {
      "epoch": 49.2,
      "eval_anthropic_toxic_prompts_accuracy": 0.1165625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2949912416774136,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12462337117179827,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.686430811882019,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009695480129236794,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.185387134552002,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.462,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.07,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.808,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.364,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.176646209462877,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.22147314383432543,
      "eval_anthropic_toxic_prompts_runtime": 17.6559,
      "eval_anthropic_toxic_prompts_samples_per_second": 28.319,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.057,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3610745672460277,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006413178292314354,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45573239268142074,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32517712727149634,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 256250
    },
    {
      "epoch": 49.2,
      "eval_arxiv_accuracy": 0.3531875,
      "eval_arxiv_bleu_score": 4.495392834026501,
      "eval_arxiv_bleu_score_sem": 0.12826398561178315,
      "eval_arxiv_emb_cos_sim": 0.778884768486023,
      "eval_arxiv_emb_cos_sim_sem": 0.007722691517563122,
      "eval_arxiv_emb_top1_equal": 0.2109375,
      "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3233225345611572,
      "eval_arxiv_n_ngrams_match_1": 15.682,
      "eval_arxiv_n_ngrams_match_2": 3.118,
      "eval_arxiv_n_ngrams_match_3": 0.692,
      "eval_arxiv_num_pred_words": 41.268,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.75240587301812,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37126008651175524,
      "eval_arxiv_runtime": 14.5949,
      "eval_arxiv_samples_per_second": 34.259,
      "eval_arxiv_steps_per_second": 0.069,
      "eval_arxiv_token_set_f1": 0.3653475091302374,
      "eval_arxiv_token_set_f1_sem": 0.004507638298671149,
      "eval_arxiv_token_set_precision": 0.31921645517267927,
      "eval_arxiv_token_set_recall": 0.44357360213561625,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 256250
    },
    {
      "epoch": 49.2,
      "eval_python_code_alpaca_accuracy": 0.16465625,
      "eval_python_code_alpaca_bleu_score": 4.872121200975159,
      "eval_python_code_alpaca_bleu_score_sem": 0.15571547758094378,
      "eval_python_code_alpaca_emb_cos_sim": 0.7787461280822754,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006888881974225882,
      "eval_python_code_alpaca_emb_top1_equal": 0.140625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8354156017303467,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.244,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.092,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.064,
      "eval_python_code_alpaca_num_pred_words": 43.936,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.037479635219537,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3501267549648855,
      "eval_python_code_alpaca_runtime": 14.0163,
      "eval_python_code_alpaca_samples_per_second": 35.673,
      "eval_python_code_alpaca_steps_per_second": 0.071,
      "eval_python_code_alpaca_token_set_f1": 0.49198952511198535,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00534944324439524,
      "eval_python_code_alpaca_token_set_precision": 0.565114902902067,
      "eval_python_code_alpaca_token_set_recall": 0.45752985917392913,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 256250
    },
    {
      "epoch": 49.2,
      "eval_wikibio_accuracy": 0.33040625,
      "eval_wikibio_bleu_score": 6.092429866180376,
      "eval_wikibio_bleu_score_sem": 0.21055146410116587,
      "eval_wikibio_emb_cos_sim": 0.7502623796463013,
      "eval_wikibio_emb_cos_sim_sem": 0.009124294842960522,
      "eval_wikibio_emb_top1_equal": 0.25,
      "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.584160089492798,
      "eval_wikibio_n_ngrams_match_1": 10.184,
      "eval_wikibio_n_ngrams_match_2": 3.438,
      "eval_wikibio_n_ngrams_match_3": 1.234,
      "eval_wikibio_num_pred_words": 36.03,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.02308883824612,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.36075220562791344,
      "eval_wikibio_runtime": 13.6901,
      "eval_wikibio_samples_per_second": 36.523,
      "eval_wikibio_steps_per_second": 0.073,
      "eval_wikibio_token_set_f1": 0.32352363354992836,
      "eval_wikibio_token_set_f1_sem": 0.005260468138923508,
      "eval_wikibio_token_set_precision": 0.329763042866111,
      "eval_wikibio_token_set_recall": 0.33496254819562765,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 256250
    },
    {
      "epoch": 49.2,
      "eval_nq_accuracy": 0.53921875,
      "eval_nq_bleu_score": 12.427637148300843,
      "eval_nq_bleu_score_sem": 0.5037686049432977,
      "eval_nq_emb_cos_sim": 0.8388968706130981,
      "eval_nq_emb_cos_sim_sem": 0.007309439692280282,
      "eval_nq_emb_top1_equal": 0.25,
      "eval_nq_emb_top1_equal_sem": 0.03842366440207048,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1017231941223145,
      "eval_nq_n_ngrams_match_1": 23.72,
      "eval_nq_n_ngrams_match_2": 8.876,
      "eval_nq_n_ngrams_match_3": 4.19,
      "eval_nq_num_pred_words": 49.212,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.18025393983422,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4613722903285064,
      "eval_nq_runtime": 19.4728,
      "eval_nq_samples_per_second": 25.677,
      "eval_nq_steps_per_second": 0.051,
      "eval_nq_token_set_f1": 0.47590787459483735,
      "eval_nq_token_set_f1_sem": 0.00493083699606327,
      "eval_nq_token_set_precision": 0.4348087147899886,
      "eval_nq_token_set_recall": 0.5331275817310658,
      "eval_nq_true_num_tokens": 64.0,
      "step": 256250
    },
    {
      "epoch": 49.21,
      "learning_rate": 0.001,
      "loss": 2.4738,
      "step": 256260
    },
    {
      "epoch": 49.21,
      "learning_rate": 0.001,
      "loss": 2.4732,
      "step": 256272
    },
    {
      "epoch": 49.21,
      "learning_rate": 0.001,
      "loss": 2.4752,
      "step": 256284
    },
    {
      "epoch": 49.21,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 256296
    },
    {
      "epoch": 49.21,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 256308
    },
    {
      "epoch": 49.22,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 256320
    },
    {
      "epoch": 49.22,
      "learning_rate": 0.001,
      "loss": 2.4786,
      "step": 256332
    },
    {
      "epoch": 49.22,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 256344
    },
    {
      "epoch": 49.22,
      "learning_rate": 0.001,
      "loss": 2.4704,
      "step": 256356
    },
    {
      "epoch": 49.23,
      "learning_rate": 0.001,
      "loss": 2.4768,
      "step": 256368
    },
    {
      "epoch": 49.23,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 256380
    },
    {
      "epoch": 49.23,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 256392
    },
    {
      "epoch": 49.23,
      "learning_rate": 0.001,
      "loss": 2.4707,
      "step": 256404
    },
    {
      "epoch": 49.24,
      "learning_rate": 0.001,
      "loss": 2.4768,
      "step": 256416
    },
    {
      "epoch": 49.24,
      "learning_rate": 0.001,
      "loss": 2.4696,
      "step": 256428
    },
    {
      "epoch": 49.24,
      "learning_rate": 0.001,
      "loss": 2.4713,
      "step": 256440
    },
    {
      "epoch": 49.24,
      "learning_rate": 0.001,
      "loss": 2.4768,
      "step": 256452
    },
    {
      "epoch": 49.24,
      "learning_rate": 0.001,
      "loss": 2.4846,
      "step": 256464
    },
    {
      "epoch": 49.25,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 256476
    },
    {
      "epoch": 49.25,
      "learning_rate": 0.001,
      "loss": 2.4816,
      "step": 256488
    },
    {
      "epoch": 49.25,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 256500
    },
    {
      "epoch": 49.25,
      "learning_rate": 0.001,
      "loss": 2.4687,
      "step": 256512
    },
    {
      "epoch": 49.26,
      "learning_rate": 0.001,
      "loss": 2.4684,
      "step": 256524
    },
    {
      "epoch": 49.26,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 256536
    },
    {
      "epoch": 49.26,
      "learning_rate": 0.001,
      "loss": 2.478,
      "step": 256548
    },
    {
      "epoch": 49.26,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 256560
    },
    {
      "epoch": 49.26,
      "learning_rate": 0.001,
      "loss": 2.4771,
      "step": 256572
    },
    {
      "epoch": 49.27,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 256584
    },
    {
      "epoch": 49.27,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 256596
    },
    {
      "epoch": 49.27,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 256608
    },
    {
      "epoch": 49.27,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 256620
    },
    {
      "epoch": 49.28,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 256632
    },
    {
      "epoch": 49.28,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 256644
    },
    {
      "epoch": 49.28,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 256656
    },
    {
      "epoch": 49.28,
      "learning_rate": 0.001,
      "loss": 2.4763,
      "step": 256668
    },
    {
      "epoch": 49.29,
      "learning_rate": 0.001,
      "loss": 2.476,
      "step": 256680
    },
    {
      "epoch": 49.29,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 256692
    },
    {
      "epoch": 49.29,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 256704
    },
    {
      "epoch": 49.29,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 256716
    },
    {
      "epoch": 49.29,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 256728
    },
    {
      "epoch": 49.3,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 256740
    },
    {
      "epoch": 49.3,
      "learning_rate": 0.001,
      "loss": 2.4718,
      "step": 256752
    },
    {
      "epoch": 49.3,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 256764
    },
    {
      "epoch": 49.3,
      "learning_rate": 0.001,
      "loss": 2.4875,
      "step": 256776
    },
    {
      "epoch": 49.31,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 256788
    },
    {
      "epoch": 49.31,
      "learning_rate": 0.001,
      "loss": 2.4863,
      "step": 256800
    },
    {
      "epoch": 49.31,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 256812
    },
    {
      "epoch": 49.31,
      "learning_rate": 0.001,
      "loss": 2.4817,
      "step": 256824
    },
    {
      "epoch": 49.32,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 256836
    },
    {
      "epoch": 49.32,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 256848
    },
    {
      "epoch": 49.32,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 256860
    },
    {
      "epoch": 49.32,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 256872
    },
    {
      "epoch": 49.32,
      "eval_ag_news_accuracy": 0.3318125,
      "eval_ag_news_bleu_score": 5.081396200302893,
      "eval_ag_news_bleu_score_sem": 0.16451155846949,
      "eval_ag_news_emb_cos_sim": 0.8254844546318054,
      "eval_ag_news_emb_cos_sim_sem": 0.007121095962997224,
      "eval_ag_news_emb_top1_equal": 0.2578125,
      "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4433860778808594,
      "eval_ag_news_n_ngrams_match_1": 14.642,
      "eval_ag_news_n_ngrams_match_2": 3.322,
      "eval_ag_news_n_ngrams_match_3": 0.932,
      "eval_ag_news_num_pred_words": 46.838,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.292738626900984,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3649736249923148,
      "eval_ag_news_runtime": 14.5676,
      "eval_ag_news_samples_per_second": 34.323,
      "eval_ag_news_steps_per_second": 0.069,
      "eval_ag_news_token_set_f1": 0.36144229924221694,
      "eval_ag_news_token_set_f1_sem": 0.004469218352070889,
      "eval_ag_news_token_set_precision": 0.35119472441975647,
      "eval_ag_news_token_set_recall": 0.3872363475860789,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 256875
    },
    {
      "epoch": 49.32,
      "eval_anthropic_toxic_prompts_accuracy": 0.1185625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.30944831317759,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12618017653442176,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6850172877311707,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010458037132443828,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.175732135772705,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.408,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.056,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.79,
      "eval_anthropic_toxic_prompts_num_pred_words": 46.322,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.944343963353475,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.2200586302775161,
      "eval_anthropic_toxic_prompts_runtime": 13.8099,
      "eval_anthropic_toxic_prompts_samples_per_second": 36.206,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.072,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.35854321758104724,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006503358291102491,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.44649057659363245,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32697149018921445,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 256875
    },
    {
      "epoch": 49.32,
      "eval_arxiv_accuracy": 0.35403125,
      "eval_arxiv_bleu_score": 4.461321904384291,
      "eval_arxiv_bleu_score_sem": 0.12213479557784862,
      "eval_arxiv_emb_cos_sim": 0.7896615266799927,
      "eval_arxiv_emb_cos_sim_sem": 0.007116109463982779,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3053104877471924,
      "eval_arxiv_n_ngrams_match_1": 15.728,
      "eval_arxiv_n_ngrams_match_2": 3.056,
      "eval_arxiv_n_ngrams_match_3": 0.642,
      "eval_arxiv_num_pred_words": 40.716,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.25700324064491,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.37917725386830603,
      "eval_arxiv_runtime": 14.577,
      "eval_arxiv_samples_per_second": 34.301,
      "eval_arxiv_steps_per_second": 0.069,
      "eval_arxiv_token_set_f1": 0.3675406337866314,
      "eval_arxiv_token_set_f1_sem": 0.0041793149078806395,
      "eval_arxiv_token_set_precision": 0.3228675605668348,
      "eval_arxiv_token_set_recall": 0.4407260843846051,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 256875
    },
    {
      "epoch": 49.32,
      "eval_python_code_alpaca_accuracy": 0.162625,
      "eval_python_code_alpaca_bleu_score": 5.005547956255304,
      "eval_python_code_alpaca_bleu_score_sem": 0.16610661730934456,
      "eval_python_code_alpaca_emb_cos_sim": 0.7768363952636719,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007135297475311871,
      "eval_python_code_alpaca_emb_top1_equal": 0.1875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8392891883850098,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.842,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.058,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.09,
      "eval_python_code_alpaca_num_pred_words": 42.426,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.10360377507556,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3457762888173629,
      "eval_python_code_alpaca_runtime": 13.6367,
      "eval_python_code_alpaca_samples_per_second": 36.666,
      "eval_python_code_alpaca_steps_per_second": 0.073,
      "eval_python_code_alpaca_token_set_f1": 0.4778991604897977,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005398598328638675,
      "eval_python_code_alpaca_token_set_precision": 0.5438141225217484,
      "eval_python_code_alpaca_token_set_recall": 0.4527064567341806,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 256875
    },
    {
      "epoch": 49.32,
      "eval_wikibio_accuracy": 0.330125,
      "eval_wikibio_bleu_score": 6.222894586384821,
      "eval_wikibio_bleu_score_sem": 0.211048056856142,
      "eval_wikibio_emb_cos_sim": 0.7671869993209839,
      "eval_wikibio_emb_cos_sim_sem": 0.008162788251169369,
      "eval_wikibio_emb_top1_equal": 0.234375,
      "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.607555389404297,
      "eval_wikibio_n_ngrams_match_1": 10.55,
      "eval_wikibio_n_ngrams_match_2": 3.602,
      "eval_wikibio_n_ngrams_match_3": 1.32,
      "eval_wikibio_num_pred_words": 36.912,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.8757955772787,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3698757084550667,
      "eval_wikibio_runtime": 13.7057,
      "eval_wikibio_samples_per_second": 36.481,
      "eval_wikibio_steps_per_second": 0.073,
      "eval_wikibio_token_set_f1": 0.3324199898823237,
      "eval_wikibio_token_set_f1_sem": 0.005267986338693477,
      "eval_wikibio_token_set_precision": 0.3445027595237636,
      "eval_wikibio_token_set_recall": 0.3352381215160292,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 256875
    },
    {
      "epoch": 49.32,
      "eval_nq_accuracy": 0.54053125,
      "eval_nq_bleu_score": 12.30907177544018,
      "eval_nq_bleu_score_sem": 0.4960400321429855,
      "eval_nq_emb_cos_sim": 0.8414657115936279,
      "eval_nq_emb_cos_sim_sem": 0.007097317896447492,
      "eval_nq_emb_top1_equal": 0.3359375,
      "eval_nq_emb_top1_equal_sem": 0.04191137143408563,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.09922456741333,
      "eval_nq_n_ngrams_match_1": 23.626,
      "eval_nq_n_ngrams_match_2": 8.816,
      "eval_nq_n_ngrams_match_3": 4.188,
      "eval_nq_num_pred_words": 48.93,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.159840052816099,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4596799378507059,
      "eval_nq_runtime": 14.1124,
      "eval_nq_samples_per_second": 35.43,
      "eval_nq_steps_per_second": 0.071,
      "eval_nq_token_set_f1": 0.47371893101131696,
      "eval_nq_token_set_f1_sem": 0.005024055977908796,
      "eval_nq_token_set_precision": 0.43442789850436325,
      "eval_nq_token_set_recall": 0.5293189250752252,
      "eval_nq_true_num_tokens": 64.0,
      "step": 256875
    },
    {
      "epoch": 49.32,
      "learning_rate": 0.001,
      "loss": 2.4675,
      "step": 256884
    },
    {
      "epoch": 49.33,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 256896
    },
    {
      "epoch": 49.33,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 256908
    },
    {
      "epoch": 49.33,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 256920
    },
    {
      "epoch": 49.33,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 256932
    },
    {
      "epoch": 49.34,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 256944
    },
    {
      "epoch": 49.34,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 256956
    },
    {
      "epoch": 49.34,
      "learning_rate": 0.001,
      "loss": 2.4732,
      "step": 256968
    },
    {
      "epoch": 49.34,
      "learning_rate": 0.001,
      "loss": 2.4762,
      "step": 256980
    },
    {
      "epoch": 49.35,
      "learning_rate": 0.001,
      "loss": 2.4756,
      "step": 256992
    },
    {
      "epoch": 49.35,
      "learning_rate": 0.001,
      "loss": 2.4712,
      "step": 257004
    },
    {
      "epoch": 49.35,
      "learning_rate": 0.001,
      "loss": 2.4754,
      "step": 257016
    },
    {
      "epoch": 49.35,
      "learning_rate": 0.001,
      "loss": 2.4709,
      "step": 257028
    },
    {
      "epoch": 49.35,
      "learning_rate": 0.001,
      "loss": 2.4749,
      "step": 257040
    },
    {
      "epoch": 49.36,
      "learning_rate": 0.001,
      "loss": 2.4712,
      "step": 257052
    },
    {
      "epoch": 49.36,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 257064
    },
    {
      "epoch": 49.36,
      "learning_rate": 0.001,
      "loss": 2.4763,
      "step": 257076
    },
    {
      "epoch": 49.36,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 257088
    },
    {
      "epoch": 49.37,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 257100
    },
    {
      "epoch": 49.37,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 257112
    },
    {
      "epoch": 49.37,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 257124
    },
    {
      "epoch": 49.37,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 257136
    },
    {
      "epoch": 49.38,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 257148
    },
    {
      "epoch": 49.38,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 257160
    },
    {
      "epoch": 49.38,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 257172
    },
    {
      "epoch": 49.38,
      "learning_rate": 0.001,
      "loss": 2.4757,
      "step": 257184
    },
    {
      "epoch": 49.38,
      "learning_rate": 0.001,
      "loss": 2.4735,
      "step": 257196
    },
    {
      "epoch": 49.39,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 257208
    },
    {
      "epoch": 49.39,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 257220
    },
    {
      "epoch": 49.39,
      "learning_rate": 0.001,
      "loss": 2.4844,
      "step": 257232
    },
    {
      "epoch": 49.39,
      "learning_rate": 0.001,
      "loss": 2.4943,
      "step": 257244
    },
    {
      "epoch": 49.4,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 257256
    },
    {
      "epoch": 49.4,
      "learning_rate": 0.001,
      "loss": 2.4738,
      "step": 257268
    },
    {
      "epoch": 49.4,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 257280
    },
    {
      "epoch": 49.4,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 257292
    },
    {
      "epoch": 49.41,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 257304
    },
    {
      "epoch": 49.41,
      "learning_rate": 0.001,
      "loss": 2.4754,
      "step": 257316
    },
    {
      "epoch": 49.41,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 257328
    },
    {
      "epoch": 49.41,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 257340
    },
    {
      "epoch": 49.41,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 257352
    },
    {
      "epoch": 49.42,
      "learning_rate": 0.001,
      "loss": 2.4735,
      "step": 257364
    },
    {
      "epoch": 49.42,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 257376
    },
    {
      "epoch": 49.42,
      "learning_rate": 0.001,
      "loss": 2.4753,
      "step": 257388
    },
    {
      "epoch": 49.42,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 257400
    },
    {
      "epoch": 49.43,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 257412
    },
    {
      "epoch": 49.43,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 257424
    },
    {
      "epoch": 49.43,
      "learning_rate": 0.001,
      "loss": 2.4724,
      "step": 257436
    },
    {
      "epoch": 49.43,
      "learning_rate": 0.001,
      "loss": 2.4747,
      "step": 257448
    },
    {
      "epoch": 49.44,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 257460
    },
    {
      "epoch": 49.44,
      "learning_rate": 0.001,
      "loss": 2.4787,
      "step": 257472
    },
    {
      "epoch": 49.44,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 257484
    },
    {
      "epoch": 49.44,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 257496
    },
    {
      "epoch": 49.44,
      "eval_ag_news_accuracy": 0.33103125,
      "eval_ag_news_bleu_score": 5.012001864411936,
      "eval_ag_news_bleu_score_sem": 0.16031987311286103,
      "eval_ag_news_emb_cos_sim": 0.8189452886581421,
      "eval_ag_news_emb_cos_sim_sem": 0.007686430739864577,
      "eval_ag_news_emb_top1_equal": 0.25,
      "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4618349075317383,
      "eval_ag_news_n_ngrams_match_1": 14.518,
      "eval_ag_news_n_ngrams_match_2": 3.324,
      "eval_ag_news_n_ngrams_match_3": 0.948,
      "eval_ag_news_num_pred_words": 46.752,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.875411319347933,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3610674022987158,
      "eval_ag_news_runtime": 14.4649,
      "eval_ag_news_samples_per_second": 34.566,
      "eval_ag_news_steps_per_second": 0.069,
      "eval_ag_news_token_set_f1": 0.36011668877367287,
      "eval_ag_news_token_set_f1_sem": 0.004720647887973495,
      "eval_ag_news_token_set_precision": 0.3480136119382942,
      "eval_ag_news_token_set_recall": 0.38732049186993767,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 257500
    },
    {
      "epoch": 49.44,
      "eval_anthropic_toxic_prompts_accuracy": 0.11740625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.2862728924611435,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12005320503349222,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6906065940856934,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008941596157199893,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.1902427673339844,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.438,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.026,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.354,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.29432459566063,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21911415891886715,
      "eval_anthropic_toxic_prompts_runtime": 13.3755,
      "eval_anthropic_toxic_prompts_samples_per_second": 37.382,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.075,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3646844160771978,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006502895282041204,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4555083787849245,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3308919202481369,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 257500
    },
    {
      "epoch": 49.44,
      "eval_arxiv_accuracy": 0.35446875,
      "eval_arxiv_bleu_score": 4.492802702590087,
      "eval_arxiv_bleu_score_sem": 0.1295615135734006,
      "eval_arxiv_emb_cos_sim": 0.7812449932098389,
      "eval_arxiv_emb_cos_sim_sem": 0.0065135906300166895,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3121416568756104,
      "eval_arxiv_n_ngrams_match_1": 15.396,
      "eval_arxiv_n_ngrams_match_2": 3.108,
      "eval_arxiv_n_ngrams_match_3": 0.7,
      "eval_arxiv_num_pred_words": 40.67,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.443837862615883,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3653685230464675,
      "eval_arxiv_runtime": 14.6278,
      "eval_arxiv_samples_per_second": 34.181,
      "eval_arxiv_steps_per_second": 0.068,
      "eval_arxiv_token_set_f1": 0.3605950477836636,
      "eval_arxiv_token_set_f1_sem": 0.004239692448617283,
      "eval_arxiv_token_set_precision": 0.3126675176317515,
      "eval_arxiv_token_set_recall": 0.4469840507072583,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 257500
    },
    {
      "epoch": 49.44,
      "eval_python_code_alpaca_accuracy": 0.1636875,
      "eval_python_code_alpaca_bleu_score": 4.626496892207398,
      "eval_python_code_alpaca_bleu_score_sem": 0.14452358955931308,
      "eval_python_code_alpaca_emb_cos_sim": 0.7660123109817505,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.006986079850881591,
      "eval_python_code_alpaca_emb_top1_equal": 0.1875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.842632293701172,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.07,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.9,
      "eval_python_code_alpaca_n_ngrams_match_3": 0.95,
      "eval_python_code_alpaca_num_pred_words": 43.02,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 17.160878608337686,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34704052699175136,
      "eval_python_code_alpaca_runtime": 14.2226,
      "eval_python_code_alpaca_samples_per_second": 35.155,
      "eval_python_code_alpaca_steps_per_second": 0.07,
      "eval_python_code_alpaca_token_set_f1": 0.48346910977841,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005342612114974879,
      "eval_python_code_alpaca_token_set_precision": 0.5536433886337334,
      "eval_python_code_alpaca_token_set_recall": 0.4544745018469489,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 257500
    },
    {
      "epoch": 49.44,
      "eval_wikibio_accuracy": 0.3296875,
      "eval_wikibio_bleu_score": 6.220462834287385,
      "eval_wikibio_bleu_score_sem": 0.2210614598981559,
      "eval_wikibio_emb_cos_sim": 0.7541302442550659,
      "eval_wikibio_emb_cos_sim_sem": 0.008750735470807324,
      "eval_wikibio_emb_top1_equal": 0.1640625,
      "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.635528326034546,
      "eval_wikibio_n_ngrams_match_1": 10.426,
      "eval_wikibio_n_ngrams_match_2": 3.488,
      "eval_wikibio_n_ngrams_match_3": 1.32,
      "eval_wikibio_num_pred_words": 36.842,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 37.92188272345868,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3709975040511946,
      "eval_wikibio_runtime": 13.815,
      "eval_wikibio_samples_per_second": 36.192,
      "eval_wikibio_steps_per_second": 0.072,
      "eval_wikibio_token_set_f1": 0.32911650091476735,
      "eval_wikibio_token_set_f1_sem": 0.0053852770162088155,
      "eval_wikibio_token_set_precision": 0.3389797088123273,
      "eval_wikibio_token_set_recall": 0.33577738209664915,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 257500
    },
    {
      "epoch": 49.44,
      "eval_nq_accuracy": 0.53878125,
      "eval_nq_bleu_score": 12.172312054505728,
      "eval_nq_bleu_score_sem": 0.4938659788837319,
      "eval_nq_emb_cos_sim": 0.83785080909729,
      "eval_nq_emb_cos_sim_sem": 0.007079568804868342,
      "eval_nq_emb_top1_equal": 0.296875,
      "eval_nq_emb_top1_equal_sem": 0.04054163310179599,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1025428771972656,
      "eval_nq_n_ngrams_match_1": 23.614,
      "eval_nq_n_ngrams_match_2": 8.724,
      "eval_nq_n_ngrams_match_3": 4.104,
      "eval_nq_num_pred_words": 48.942,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.186961904364422,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4597328167668949,
      "eval_nq_runtime": 14.1255,
      "eval_nq_samples_per_second": 35.397,
      "eval_nq_steps_per_second": 0.071,
      "eval_nq_token_set_f1": 0.4718487581369448,
      "eval_nq_token_set_f1_sem": 0.0050176425751479094,
      "eval_nq_token_set_precision": 0.43057553402009285,
      "eval_nq_token_set_recall": 0.5304307727196864,
      "eval_nq_true_num_tokens": 64.0,
      "step": 257500
    },
    {
      "epoch": 49.44,
      "learning_rate": 0.001,
      "loss": 2.4736,
      "step": 257508
    },
    {
      "epoch": 49.45,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 257520
    },
    {
      "epoch": 49.45,
      "learning_rate": 0.001,
      "loss": 2.4764,
      "step": 257532
    },
    {
      "epoch": 49.45,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 257544
    },
    {
      "epoch": 49.45,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 257556
    },
    {
      "epoch": 49.46,
      "learning_rate": 0.001,
      "loss": 2.4792,
      "step": 257568
    },
    {
      "epoch": 49.46,
      "learning_rate": 0.001,
      "loss": 2.4751,
      "step": 257580
    },
    {
      "epoch": 49.46,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 257592
    },
    {
      "epoch": 49.46,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 257604
    },
    {
      "epoch": 49.47,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 257616
    },
    {
      "epoch": 49.47,
      "learning_rate": 0.001,
      "loss": 2.475,
      "step": 257628
    },
    {
      "epoch": 49.47,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 257640
    },
    {
      "epoch": 49.47,
      "learning_rate": 0.001,
      "loss": 2.4839,
      "step": 257652
    },
    {
      "epoch": 49.47,
      "learning_rate": 0.001,
      "loss": 2.4791,
      "step": 257664
    },
    {
      "epoch": 49.48,
      "learning_rate": 0.001,
      "loss": 2.4771,
      "step": 257676
    },
    {
      "epoch": 49.48,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 257688
    },
    {
      "epoch": 49.48,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 257700
    },
    {
      "epoch": 49.48,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 257712
    },
    {
      "epoch": 49.49,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 257724
    },
    {
      "epoch": 49.49,
      "learning_rate": 0.001,
      "loss": 2.4759,
      "step": 257736
    },
    {
      "epoch": 49.49,
      "learning_rate": 0.001,
      "loss": 2.4765,
      "step": 257748
    },
    {
      "epoch": 49.49,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 257760
    },
    {
      "epoch": 49.5,
      "learning_rate": 0.001,
      "loss": 2.4809,
      "step": 257772
    },
    {
      "epoch": 49.5,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 257784
    },
    {
      "epoch": 49.5,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 257796
    },
    {
      "epoch": 49.5,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 257808
    },
    {
      "epoch": 49.5,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 257820
    },
    {
      "epoch": 49.51,
      "learning_rate": 0.001,
      "loss": 2.4882,
      "step": 257832
    },
    {
      "epoch": 49.51,
      "learning_rate": 0.001,
      "loss": 2.4732,
      "step": 257844
    },
    {
      "epoch": 49.51,
      "learning_rate": 0.001,
      "loss": 2.4754,
      "step": 257856
    },
    {
      "epoch": 49.51,
      "learning_rate": 0.001,
      "loss": 2.4673,
      "step": 257868
    },
    {
      "epoch": 49.52,
      "learning_rate": 0.001,
      "loss": 2.4715,
      "step": 257880
    },
    {
      "epoch": 49.52,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 257892
    },
    {
      "epoch": 49.52,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 257904
    },
    {
      "epoch": 49.52,
      "learning_rate": 0.001,
      "loss": 2.4784,
      "step": 257916
    },
    {
      "epoch": 49.53,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 257928
    },
    {
      "epoch": 49.53,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 257940
    },
    {
      "epoch": 49.53,
      "learning_rate": 0.001,
      "loss": 2.4854,
      "step": 257952
    },
    {
      "epoch": 49.53,
      "learning_rate": 0.001,
      "loss": 2.4762,
      "step": 257964
    },
    {
      "epoch": 49.53,
      "learning_rate": 0.001,
      "loss": 2.4646,
      "step": 257976
    },
    {
      "epoch": 49.54,
      "learning_rate": 0.001,
      "loss": 2.485,
      "step": 257988
    },
    {
      "epoch": 49.54,
      "learning_rate": 0.001,
      "loss": 2.4802,
      "step": 258000
    },
    {
      "epoch": 49.54,
      "learning_rate": 0.001,
      "loss": 2.4811,
      "step": 258012
    },
    {
      "epoch": 49.54,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 258024
    },
    {
      "epoch": 49.55,
      "learning_rate": 0.001,
      "loss": 2.4818,
      "step": 258036
    },
    {
      "epoch": 49.55,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 258048
    },
    {
      "epoch": 49.55,
      "learning_rate": 0.001,
      "loss": 2.4782,
      "step": 258060
    },
    {
      "epoch": 49.55,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 258072
    },
    {
      "epoch": 49.56,
      "learning_rate": 0.001,
      "loss": 2.4815,
      "step": 258084
    },
    {
      "epoch": 49.56,
      "learning_rate": 0.001,
      "loss": 2.487,
      "step": 258096
    },
    {
      "epoch": 49.56,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 258108
    },
    {
      "epoch": 49.56,
      "learning_rate": 0.001,
      "loss": 2.4834,
      "step": 258120
    },
    {
      "epoch": 49.56,
      "eval_ag_news_accuracy": 0.33271875,
      "eval_ag_news_bleu_score": 5.045128992550928,
      "eval_ag_news_bleu_score_sem": 0.1529287698263182,
      "eval_ag_news_emb_cos_sim": 0.8254880905151367,
      "eval_ag_news_emb_cos_sim_sem": 0.00673620106422812,
      "eval_ag_news_emb_top1_equal": 0.2890625,
      "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.440908670425415,
      "eval_ag_news_n_ngrams_match_1": 14.736,
      "eval_ag_news_n_ngrams_match_2": 3.296,
      "eval_ag_news_n_ngrams_match_3": 0.944,
      "eval_ag_news_num_pred_words": 47.0,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.215309714010427,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.3667432944430328,
      "eval_ag_news_runtime": 17.6279,
      "eval_ag_news_samples_per_second": 28.364,
      "eval_ag_news_steps_per_second": 0.057,
      "eval_ag_news_token_set_f1": 0.3644557274858799,
      "eval_ag_news_token_set_f1_sem": 0.004389778163579907,
      "eval_ag_news_token_set_precision": 0.3523713704177665,
      "eval_ag_news_token_set_recall": 0.39104628547716985,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 258125
    },
    {
      "epoch": 49.56,
      "eval_anthropic_toxic_prompts_accuracy": 0.1185625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.197570500488512,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12216970018893475,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6847611665725708,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01041867263447916,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.142594814300537,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.346,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.008,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.75,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.774,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.163894950038728,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.216561625281712,
      "eval_anthropic_toxic_prompts_runtime": 19.4697,
      "eval_anthropic_toxic_prompts_samples_per_second": 25.681,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.051,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.36554749152382593,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006791511000486857,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4515452745725378,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.3347656964259751,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 258125
    },
    {
      "epoch": 49.56,
      "eval_arxiv_accuracy": 0.35440625,
      "eval_arxiv_bleu_score": 4.444683078472756,
      "eval_arxiv_bleu_score_sem": 0.13296104559803884,
      "eval_arxiv_emb_cos_sim": 0.7827038764953613,
      "eval_arxiv_emb_cos_sim_sem": 0.006676158927257304,
      "eval_arxiv_emb_top1_equal": 0.28125,
      "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.307297945022583,
      "eval_arxiv_n_ngrams_match_1": 15.374,
      "eval_arxiv_n_ngrams_match_2": 3.066,
      "eval_arxiv_n_ngrams_match_3": 0.7,
      "eval_arxiv_num_pred_words": 40.528,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.311229238118145,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.36843238097775527,
      "eval_arxiv_runtime": 21.8423,
      "eval_arxiv_samples_per_second": 22.891,
      "eval_arxiv_steps_per_second": 0.046,
      "eval_arxiv_token_set_f1": 0.3612567192170928,
      "eval_arxiv_token_set_f1_sem": 0.00428581343570969,
      "eval_arxiv_token_set_precision": 0.3151330291346013,
      "eval_arxiv_token_set_recall": 0.43962445118036747,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 258125
    },
    {
      "epoch": 49.56,
      "eval_python_code_alpaca_accuracy": 0.1643125,
      "eval_python_code_alpaca_bleu_score": 4.831610690207038,
      "eval_python_code_alpaca_bleu_score_sem": 0.1472396786805451,
      "eval_python_code_alpaca_emb_cos_sim": 0.7790913581848145,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.00708565770738915,
      "eval_python_code_alpaca_emb_top1_equal": 0.203125,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03570055125142555,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8174359798431396,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.196,
      "eval_python_code_alpaca_n_ngrams_match_2": 3.06,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.036,
      "eval_python_code_alpaca_num_pred_words": 43.82,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.733889588925756,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3475238271830434,
      "eval_python_code_alpaca_runtime": 15.5486,
      "eval_python_code_alpaca_samples_per_second": 32.157,
      "eval_python_code_alpaca_steps_per_second": 0.064,
      "eval_python_code_alpaca_token_set_f1": 0.4887089110297281,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005312195430781894,
      "eval_python_code_alpaca_token_set_precision": 0.5612977019349719,
      "eval_python_code_alpaca_token_set_recall": 0.4552000565625454,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 258125
    },
    {
      "epoch": 49.56,
      "eval_wikibio_accuracy": 0.3281875,
      "eval_wikibio_bleu_score": 5.823480451513563,
      "eval_wikibio_bleu_score_sem": 0.20514314672935266,
      "eval_wikibio_emb_cos_sim": 0.7469199895858765,
      "eval_wikibio_emb_cos_sim_sem": 0.00945570604951381,
      "eval_wikibio_emb_top1_equal": 0.203125,
      "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6378750801086426,
      "eval_wikibio_n_ngrams_match_1": 9.896,
      "eval_wikibio_n_ngrams_match_2": 3.324,
      "eval_wikibio_n_ngrams_match_3": 1.204,
      "eval_wikibio_num_pred_words": 35.426,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 38.010980560703274,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.35008114938468204,
      "eval_wikibio_runtime": 30.5772,
      "eval_wikibio_samples_per_second": 16.352,
      "eval_wikibio_steps_per_second": 0.033,
      "eval_wikibio_token_set_f1": 0.3117331201833853,
      "eval_wikibio_token_set_f1_sem": 0.0058107660718428435,
      "eval_wikibio_token_set_precision": 0.32022907286668956,
      "eval_wikibio_token_set_recall": 0.32288410213510443,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 258125
    },
    {
      "epoch": 49.56,
      "eval_nq_accuracy": 0.54128125,
      "eval_nq_bleu_score": 12.260514324446934,
      "eval_nq_bleu_score_sem": 0.49662803297191177,
      "eval_nq_emb_cos_sim": 0.8366309404373169,
      "eval_nq_emb_cos_sim_sem": 0.007653960544353982,
      "eval_nq_emb_top1_equal": 0.2265625,
      "eval_nq_emb_top1_equal_sem": 0.03714537682851538,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.09952974319458,
      "eval_nq_n_ngrams_match_1": 23.684,
      "eval_nq_n_ngrams_match_2": 8.744,
      "eval_nq_n_ngrams_match_3": 4.088,
      "eval_nq_num_pred_words": 49.208,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.162330618389909,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.45968197880089,
      "eval_nq_runtime": 18.5772,
      "eval_nq_samples_per_second": 26.915,
      "eval_nq_steps_per_second": 0.054,
      "eval_nq_token_set_f1": 0.4712345158338733,
      "eval_nq_token_set_f1_sem": 0.005061503177211867,
      "eval_nq_token_set_precision": 0.43193471309365156,
      "eval_nq_token_set_recall": 0.5267336983653503,
      "eval_nq_true_num_tokens": 64.0,
      "step": 258125
    },
    {
      "epoch": 49.56,
      "learning_rate": 0.001,
      "loss": 2.4889,
      "step": 258132
    },
    {
      "epoch": 49.57,
      "learning_rate": 0.001,
      "loss": 2.4841,
      "step": 258144
    },
    {
      "epoch": 49.57,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 258156
    },
    {
      "epoch": 49.57,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 258168
    },
    {
      "epoch": 49.57,
      "learning_rate": 0.001,
      "loss": 2.4803,
      "step": 258180
    },
    {
      "epoch": 49.58,
      "learning_rate": 0.001,
      "loss": 2.4734,
      "step": 258192
    },
    {
      "epoch": 49.58,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 258204
    },
    {
      "epoch": 49.58,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 258216
    },
    {
      "epoch": 49.58,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 258228
    },
    {
      "epoch": 49.59,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 258240
    },
    {
      "epoch": 49.59,
      "learning_rate": 0.001,
      "loss": 2.4752,
      "step": 258252
    },
    {
      "epoch": 49.59,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 258264
    },
    {
      "epoch": 49.59,
      "learning_rate": 0.001,
      "loss": 2.4729,
      "step": 258276
    },
    {
      "epoch": 49.59,
      "learning_rate": 0.001,
      "loss": 2.4853,
      "step": 258288
    },
    {
      "epoch": 49.6,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 258300
    },
    {
      "epoch": 49.6,
      "learning_rate": 0.001,
      "loss": 2.4838,
      "step": 258312
    },
    {
      "epoch": 49.6,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 258324
    },
    {
      "epoch": 49.6,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 258336
    },
    {
      "epoch": 49.61,
      "learning_rate": 0.001,
      "loss": 2.4777,
      "step": 258348
    },
    {
      "epoch": 49.61,
      "learning_rate": 0.001,
      "loss": 2.4761,
      "step": 258360
    },
    {
      "epoch": 49.61,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 258372
    },
    {
      "epoch": 49.61,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 258384
    },
    {
      "epoch": 49.62,
      "learning_rate": 0.001,
      "loss": 2.4886,
      "step": 258396
    },
    {
      "epoch": 49.62,
      "learning_rate": 0.001,
      "loss": 2.497,
      "step": 258408
    },
    {
      "epoch": 49.62,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 258420
    },
    {
      "epoch": 49.62,
      "learning_rate": 0.001,
      "loss": 2.4783,
      "step": 258432
    },
    {
      "epoch": 49.62,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 258444
    },
    {
      "epoch": 49.63,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 258456
    },
    {
      "epoch": 49.63,
      "learning_rate": 0.001,
      "loss": 2.4778,
      "step": 258468
    },
    {
      "epoch": 49.63,
      "learning_rate": 0.001,
      "loss": 2.4707,
      "step": 258480
    },
    {
      "epoch": 49.63,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 258492
    },
    {
      "epoch": 49.64,
      "learning_rate": 0.001,
      "loss": 2.4858,
      "step": 258504
    },
    {
      "epoch": 49.64,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 258516
    },
    {
      "epoch": 49.64,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 258528
    },
    {
      "epoch": 49.64,
      "learning_rate": 0.001,
      "loss": 2.4764,
      "step": 258540
    },
    {
      "epoch": 49.65,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 258552
    },
    {
      "epoch": 49.65,
      "learning_rate": 0.001,
      "loss": 2.4883,
      "step": 258564
    },
    {
      "epoch": 49.65,
      "learning_rate": 0.001,
      "loss": 2.4755,
      "step": 258576
    },
    {
      "epoch": 49.65,
      "learning_rate": 0.001,
      "loss": 2.4718,
      "step": 258588
    },
    {
      "epoch": 49.65,
      "learning_rate": 0.001,
      "loss": 2.4801,
      "step": 258600
    },
    {
      "epoch": 49.66,
      "learning_rate": 0.001,
      "loss": 2.4772,
      "step": 258612
    },
    {
      "epoch": 49.66,
      "learning_rate": 0.001,
      "loss": 2.4808,
      "step": 258624
    },
    {
      "epoch": 49.66,
      "learning_rate": 0.001,
      "loss": 2.491,
      "step": 258636
    },
    {
      "epoch": 49.66,
      "learning_rate": 0.001,
      "loss": 2.4972,
      "step": 258648
    },
    {
      "epoch": 49.67,
      "learning_rate": 0.001,
      "loss": 2.4771,
      "step": 258660
    },
    {
      "epoch": 49.67,
      "learning_rate": 0.001,
      "loss": 2.4763,
      "step": 258672
    },
    {
      "epoch": 49.67,
      "learning_rate": 0.001,
      "loss": 2.4794,
      "step": 258684
    },
    {
      "epoch": 49.67,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 258696
    },
    {
      "epoch": 49.68,
      "learning_rate": 0.001,
      "loss": 2.4757,
      "step": 258708
    },
    {
      "epoch": 49.68,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 258720
    },
    {
      "epoch": 49.68,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 258732
    },
    {
      "epoch": 49.68,
      "learning_rate": 0.001,
      "loss": 2.489,
      "step": 258744
    },
    {
      "epoch": 49.68,
      "eval_ag_news_accuracy": 0.3313125,
      "eval_ag_news_bleu_score": 5.026388187917658,
      "eval_ag_news_bleu_score_sem": 0.15984461876813788,
      "eval_ag_news_emb_cos_sim": 0.8151041269302368,
      "eval_ag_news_emb_cos_sim_sem": 0.007203880934551224,
      "eval_ag_news_emb_top1_equal": 0.2109375,
      "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.4551503658294678,
      "eval_ag_news_n_ngrams_match_1": 14.622,
      "eval_ag_news_n_ngrams_match_2": 3.35,
      "eval_ag_news_n_ngrams_match_3": 0.968,
      "eval_ag_news_num_pred_words": 47.042,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.663049365025124,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36154966371497654,
      "eval_ag_news_runtime": 18.308,
      "eval_ag_news_samples_per_second": 27.31,
      "eval_ag_news_steps_per_second": 0.055,
      "eval_ag_news_token_set_f1": 0.3623605357325836,
      "eval_ag_news_token_set_f1_sem": 0.004426005081012672,
      "eval_ag_news_token_set_precision": 0.34952832898902786,
      "eval_ag_news_token_set_recall": 0.39070160009853905,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 258750
    },
    {
      "epoch": 49.68,
      "eval_anthropic_toxic_prompts_accuracy": 0.118625,
      "eval_anthropic_toxic_prompts_bleu_score": 3.3133452280034077,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12967709966693836,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6886178851127625,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008527596883900493,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.141610860824585,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.398,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.04,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.876,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 23.141113964638908,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21812681908562342,
      "eval_anthropic_toxic_prompts_runtime": 18.9633,
      "eval_anthropic_toxic_prompts_samples_per_second": 26.367,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.053,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3671589512071575,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006774948452226139,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.45554466526800835,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.33826798163114935,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 258750
    },
    {
      "epoch": 49.68,
      "eval_arxiv_accuracy": 0.35453125,
      "eval_arxiv_bleu_score": 4.425632613346473,
      "eval_arxiv_bleu_score_sem": 0.1313574698536943,
      "eval_arxiv_emb_cos_sim": 0.7889786958694458,
      "eval_arxiv_emb_cos_sim_sem": 0.006259094129273244,
      "eval_arxiv_emb_top1_equal": 0.2890625,
      "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.316903829574585,
      "eval_arxiv_n_ngrams_match_1": 15.586,
      "eval_arxiv_n_ngrams_match_2": 3.072,
      "eval_arxiv_n_ngrams_match_3": 0.668,
      "eval_arxiv_num_pred_words": 40.736,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.574841842249207,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.372673177400283,
      "eval_arxiv_runtime": 15.0144,
      "eval_arxiv_samples_per_second": 33.301,
      "eval_arxiv_steps_per_second": 0.067,
      "eval_arxiv_token_set_f1": 0.3663227317283526,
      "eval_arxiv_token_set_f1_sem": 0.004295528750382863,
      "eval_arxiv_token_set_precision": 0.3185767354422295,
      "eval_arxiv_token_set_recall": 0.4488085183419965,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 258750
    },
    {
      "epoch": 49.68,
      "eval_python_code_alpaca_accuracy": 0.1651875,
      "eval_python_code_alpaca_bleu_score": 4.7027181483117,
      "eval_python_code_alpaca_bleu_score_sem": 0.15003966165738836,
      "eval_python_code_alpaca_emb_cos_sim": 0.7729647159576416,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007068172692157701,
      "eval_python_code_alpaca_emb_top1_equal": 0.1640625,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.80320143699646,
      "eval_python_code_alpaca_n_ngrams_match_1": 10.058,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.944,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.004,
      "eval_python_code_alpaca_num_pred_words": 43.774,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.497377633893844,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.3432791431731794,
      "eval_python_code_alpaca_runtime": 19.2727,
      "eval_python_code_alpaca_samples_per_second": 25.943,
      "eval_python_code_alpaca_steps_per_second": 0.052,
      "eval_python_code_alpaca_token_set_f1": 0.4815326733307023,
      "eval_python_code_alpaca_token_set_f1_sem": 0.005268090701621846,
      "eval_python_code_alpaca_token_set_precision": 0.5475949658199415,
      "eval_python_code_alpaca_token_set_recall": 0.4494336779185127,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 258750
    },
    {
      "epoch": 49.68,
      "eval_wikibio_accuracy": 0.3321875,
      "eval_wikibio_bleu_score": 6.285614792164227,
      "eval_wikibio_bleu_score_sem": 0.21793175034967222,
      "eval_wikibio_emb_cos_sim": 0.7413852214813232,
      "eval_wikibio_emb_cos_sim_sem": 0.010085725702133258,
      "eval_wikibio_emb_top1_equal": 0.25,
      "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.5919065475463867,
      "eval_wikibio_n_ngrams_match_1": 10.02,
      "eval_wikibio_n_ngrams_match_2": 3.498,
      "eval_wikibio_n_ngrams_match_3": 1.346,
      "eval_wikibio_num_pred_words": 35.98,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.303223810948815,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3532924181443103,
      "eval_wikibio_runtime": 14.1132,
      "eval_wikibio_samples_per_second": 35.428,
      "eval_wikibio_steps_per_second": 0.071,
      "eval_wikibio_token_set_f1": 0.320521546391255,
      "eval_wikibio_token_set_f1_sem": 0.005959177613992733,
      "eval_wikibio_token_set_precision": 0.32700298192726723,
      "eval_wikibio_token_set_recall": 0.3307985375555874,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 258750
    },
    {
      "epoch": 49.68,
      "eval_nq_accuracy": 0.5409375,
      "eval_nq_bleu_score": 12.177007682206469,
      "eval_nq_bleu_score_sem": 0.4982037183398605,
      "eval_nq_emb_cos_sim": 0.8359046578407288,
      "eval_nq_emb_cos_sim_sem": 0.007212094301654784,
      "eval_nq_emb_top1_equal": 0.3125,
      "eval_nq_emb_top1_equal_sem": 0.041130074229814934,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1012213230133057,
      "eval_nq_n_ngrams_match_1": 23.598,
      "eval_nq_n_ngrams_match_2": 8.808,
      "eval_nq_n_ngrams_match_3": 4.068,
      "eval_nq_num_pred_words": 49.248,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.176149536744276,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.456941885633945,
      "eval_nq_runtime": 14.5098,
      "eval_nq_samples_per_second": 34.459,
      "eval_nq_steps_per_second": 0.069,
      "eval_nq_token_set_f1": 0.4701322877336389,
      "eval_nq_token_set_f1_sem": 0.004956829214956096,
      "eval_nq_token_set_precision": 0.4292128193509934,
      "eval_nq_token_set_recall": 0.5278939867536141,
      "eval_nq_true_num_tokens": 64.0,
      "step": 258750
    },
    {
      "epoch": 49.68,
      "learning_rate": 0.001,
      "loss": 2.483,
      "step": 258756
    },
    {
      "epoch": 49.69,
      "learning_rate": 0.001,
      "loss": 2.4855,
      "step": 258768
    },
    {
      "epoch": 49.69,
      "learning_rate": 0.001,
      "loss": 2.4879,
      "step": 258780
    },
    {
      "epoch": 49.69,
      "learning_rate": 0.001,
      "loss": 2.4785,
      "step": 258792
    },
    {
      "epoch": 49.69,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 258804
    },
    {
      "epoch": 49.7,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 258816
    },
    {
      "epoch": 49.7,
      "learning_rate": 0.001,
      "loss": 2.4975,
      "step": 258828
    },
    {
      "epoch": 49.7,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 258840
    },
    {
      "epoch": 49.7,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 258852
    },
    {
      "epoch": 49.71,
      "learning_rate": 0.001,
      "loss": 2.4936,
      "step": 258864
    },
    {
      "epoch": 49.71,
      "learning_rate": 0.001,
      "loss": 2.4829,
      "step": 258876
    },
    {
      "epoch": 49.71,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 258888
    },
    {
      "epoch": 49.71,
      "learning_rate": 0.001,
      "loss": 2.4774,
      "step": 258900
    },
    {
      "epoch": 49.71,
      "learning_rate": 0.001,
      "loss": 2.4945,
      "step": 258912
    },
    {
      "epoch": 49.72,
      "learning_rate": 0.001,
      "loss": 2.4826,
      "step": 258924
    },
    {
      "epoch": 49.72,
      "learning_rate": 0.001,
      "loss": 2.4836,
      "step": 258936
    },
    {
      "epoch": 49.72,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 258948
    },
    {
      "epoch": 49.72,
      "learning_rate": 0.001,
      "loss": 2.4884,
      "step": 258960
    },
    {
      "epoch": 49.73,
      "learning_rate": 0.001,
      "loss": 2.4926,
      "step": 258972
    },
    {
      "epoch": 49.73,
      "learning_rate": 0.001,
      "loss": 2.4903,
      "step": 258984
    },
    {
      "epoch": 49.73,
      "learning_rate": 0.001,
      "loss": 2.4931,
      "step": 258996
    },
    {
      "epoch": 49.73,
      "learning_rate": 0.001,
      "loss": 2.4871,
      "step": 259008
    },
    {
      "epoch": 49.74,
      "learning_rate": 0.001,
      "loss": 2.4821,
      "step": 259020
    },
    {
      "epoch": 49.74,
      "learning_rate": 0.001,
      "loss": 2.4979,
      "step": 259032
    },
    {
      "epoch": 49.74,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 259044
    },
    {
      "epoch": 49.74,
      "learning_rate": 0.001,
      "loss": 2.49,
      "step": 259056
    },
    {
      "epoch": 49.74,
      "learning_rate": 0.001,
      "loss": 2.4857,
      "step": 259068
    },
    {
      "epoch": 49.75,
      "learning_rate": 0.001,
      "loss": 2.4773,
      "step": 259080
    },
    {
      "epoch": 49.75,
      "learning_rate": 0.001,
      "loss": 2.4712,
      "step": 259092
    },
    {
      "epoch": 49.75,
      "learning_rate": 0.001,
      "loss": 2.4902,
      "step": 259104
    },
    {
      "epoch": 49.75,
      "learning_rate": 0.001,
      "loss": 2.4849,
      "step": 259116
    },
    {
      "epoch": 49.76,
      "learning_rate": 0.001,
      "loss": 2.4862,
      "step": 259128
    },
    {
      "epoch": 49.76,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 259140
    },
    {
      "epoch": 49.76,
      "learning_rate": 0.001,
      "loss": 2.4901,
      "step": 259152
    },
    {
      "epoch": 49.76,
      "learning_rate": 0.001,
      "loss": 2.4776,
      "step": 259164
    },
    {
      "epoch": 49.76,
      "learning_rate": 0.001,
      "loss": 2.4766,
      "step": 259176
    },
    {
      "epoch": 49.77,
      "learning_rate": 0.001,
      "loss": 2.4835,
      "step": 259188
    },
    {
      "epoch": 49.77,
      "learning_rate": 0.001,
      "loss": 2.4888,
      "step": 259200
    },
    {
      "epoch": 49.77,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 259212
    },
    {
      "epoch": 49.77,
      "learning_rate": 0.001,
      "loss": 2.4942,
      "step": 259224
    },
    {
      "epoch": 49.78,
      "learning_rate": 0.001,
      "loss": 2.4833,
      "step": 259236
    },
    {
      "epoch": 49.78,
      "learning_rate": 0.001,
      "loss": 2.488,
      "step": 259248
    },
    {
      "epoch": 49.78,
      "learning_rate": 0.001,
      "loss": 2.4824,
      "step": 259260
    },
    {
      "epoch": 49.78,
      "learning_rate": 0.001,
      "loss": 2.4831,
      "step": 259272
    },
    {
      "epoch": 49.79,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 259284
    },
    {
      "epoch": 49.79,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 259296
    },
    {
      "epoch": 49.79,
      "learning_rate": 0.001,
      "loss": 2.4951,
      "step": 259308
    },
    {
      "epoch": 49.79,
      "learning_rate": 0.001,
      "loss": 2.481,
      "step": 259320
    },
    {
      "epoch": 49.79,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 259332
    },
    {
      "epoch": 49.8,
      "learning_rate": 0.001,
      "loss": 2.4745,
      "step": 259344
    },
    {
      "epoch": 49.8,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 259356
    },
    {
      "epoch": 49.8,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 259368
    },
    {
      "epoch": 49.8,
      "eval_ag_news_accuracy": 0.33125,
      "eval_ag_news_bleu_score": 4.9935354565193615,
      "eval_ag_news_bleu_score_sem": 0.15510571057256645,
      "eval_ag_news_emb_cos_sim": 0.8198114633560181,
      "eval_ag_news_emb_cos_sim_sem": 0.006765341658706355,
      "eval_ag_news_emb_top1_equal": 0.234375,
      "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201,
      "eval_ag_news_exact_match": 0.0,
      "eval_ag_news_exact_match_sem": 0.0,
      "eval_ag_news_loss": 3.45509934425354,
      "eval_ag_news_n_ngrams_match_1": 14.532,
      "eval_ag_news_n_ngrams_match_2": 3.326,
      "eval_ag_news_n_ngrams_match_3": 0.92,
      "eval_ag_news_num_pred_words": 47.01,
      "eval_ag_news_num_true_words": 39.994,
      "eval_ag_news_perplexity": 31.661433907559786,
      "eval_ag_news_pred_num_tokens": 63.0,
      "eval_ag_news_rouge_score": 0.36059860284351575,
      "eval_ag_news_runtime": 14.4972,
      "eval_ag_news_samples_per_second": 34.489,
      "eval_ag_news_steps_per_second": 0.069,
      "eval_ag_news_token_set_f1": 0.36093227554643503,
      "eval_ag_news_token_set_f1_sem": 0.004427950073237712,
      "eval_ag_news_token_set_precision": 0.34603615110360586,
      "eval_ag_news_token_set_recall": 0.3929845565245582,
      "eval_ag_news_true_num_tokens": 56.09375,
      "step": 259375
    },
    {
      "epoch": 49.8,
      "eval_anthropic_toxic_prompts_accuracy": 0.1176875,
      "eval_anthropic_toxic_prompts_bleu_score": 3.253135907178858,
      "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11851330226531899,
      "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6837367415428162,
      "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00865816518640921,
      "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125,
      "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397,
      "eval_anthropic_toxic_prompts_exact_match": 0.0,
      "eval_anthropic_toxic_prompts_exact_match_sem": 0.0,
      "eval_anthropic_toxic_prompts_loss": 3.197890520095825,
      "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.346,
      "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.998,
      "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782,
      "eval_anthropic_toxic_prompts_num_pred_words": 47.42,
      "eval_anthropic_toxic_prompts_num_true_words": 14.584,
      "eval_anthropic_toxic_prompts_perplexity": 24.480833863019242,
      "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0,
      "eval_anthropic_toxic_prompts_rouge_score": 0.21850487522570472,
      "eval_anthropic_toxic_prompts_runtime": 14.4457,
      "eval_anthropic_toxic_prompts_samples_per_second": 34.612,
      "eval_anthropic_toxic_prompts_steps_per_second": 0.069,
      "eval_anthropic_toxic_prompts_token_set_f1": 0.3598482503018601,
      "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006412825510789877,
      "eval_anthropic_toxic_prompts_token_set_precision": 0.4436025870010036,
      "eval_anthropic_toxic_prompts_token_set_recall": 0.32932732711768936,
      "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625,
      "step": 259375
    },
    {
      "epoch": 49.8,
      "eval_arxiv_accuracy": 0.354375,
      "eval_arxiv_bleu_score": 4.40920830892549,
      "eval_arxiv_bleu_score_sem": 0.13379961042053165,
      "eval_arxiv_emb_cos_sim": 0.7760727405548096,
      "eval_arxiv_emb_cos_sim_sem": 0.006924069324376414,
      "eval_arxiv_emb_top1_equal": 0.3046875,
      "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665,
      "eval_arxiv_exact_match": 0.0,
      "eval_arxiv_exact_match_sem": 0.0,
      "eval_arxiv_loss": 3.3212099075317383,
      "eval_arxiv_n_ngrams_match_1": 15.464,
      "eval_arxiv_n_ngrams_match_2": 3.02,
      "eval_arxiv_n_ngrams_match_3": 0.674,
      "eval_arxiv_num_pred_words": 40.57,
      "eval_arxiv_num_true_words": 43.592,
      "eval_arxiv_perplexity": 27.693837278820542,
      "eval_arxiv_pred_num_tokens": 63.0,
      "eval_arxiv_rouge_score": 0.3711305269417794,
      "eval_arxiv_runtime": 14.6293,
      "eval_arxiv_samples_per_second": 34.178,
      "eval_arxiv_steps_per_second": 0.068,
      "eval_arxiv_token_set_f1": 0.3646692453807495,
      "eval_arxiv_token_set_f1_sem": 0.004317602221869201,
      "eval_arxiv_token_set_precision": 0.3161505585548265,
      "eval_arxiv_token_set_recall": 0.44766296422486834,
      "eval_arxiv_true_num_tokens": 64.0,
      "step": 259375
    },
    {
      "epoch": 49.8,
      "eval_python_code_alpaca_accuracy": 0.164,
      "eval_python_code_alpaca_bleu_score": 4.690671898848345,
      "eval_python_code_alpaca_bleu_score_sem": 0.14029483247587066,
      "eval_python_code_alpaca_emb_cos_sim": 0.7661554217338562,
      "eval_python_code_alpaca_emb_cos_sim_sem": 0.007425235524385931,
      "eval_python_code_alpaca_emb_top1_equal": 0.171875,
      "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371,
      "eval_python_code_alpaca_exact_match": 0.0,
      "eval_python_code_alpaca_exact_match_sem": 0.0,
      "eval_python_code_alpaca_loss": 2.8167998790740967,
      "eval_python_code_alpaca_n_ngrams_match_1": 9.954,
      "eval_python_code_alpaca_n_ngrams_match_2": 2.926,
      "eval_python_code_alpaca_n_ngrams_match_3": 1.0,
      "eval_python_code_alpaca_num_pred_words": 43.148,
      "eval_python_code_alpaca_num_true_words": 18.128,
      "eval_python_code_alpaca_perplexity": 16.723248533639687,
      "eval_python_code_alpaca_pred_num_tokens": 63.0,
      "eval_python_code_alpaca_rouge_score": 0.34367616277343627,
      "eval_python_code_alpaca_runtime": 13.8569,
      "eval_python_code_alpaca_samples_per_second": 36.083,
      "eval_python_code_alpaca_steps_per_second": 0.072,
      "eval_python_code_alpaca_token_set_f1": 0.47920856608859286,
      "eval_python_code_alpaca_token_set_f1_sem": 0.00520049240427338,
      "eval_python_code_alpaca_token_set_precision": 0.545327103607184,
      "eval_python_code_alpaca_token_set_recall": 0.44997447735357915,
      "eval_python_code_alpaca_true_num_tokens": 23.359375,
      "step": 259375
    },
    {
      "epoch": 49.8,
      "eval_wikibio_accuracy": 0.33165625,
      "eval_wikibio_bleu_score": 6.246138423663437,
      "eval_wikibio_bleu_score_sem": 0.22274419811649707,
      "eval_wikibio_emb_cos_sim": 0.7574476599693298,
      "eval_wikibio_emb_cos_sim_sem": 0.00904951184572302,
      "eval_wikibio_emb_top1_equal": 0.2265625,
      "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538,
      "eval_wikibio_exact_match": 0.0,
      "eval_wikibio_exact_match_sem": 0.0,
      "eval_wikibio_loss": 3.6061134338378906,
      "eval_wikibio_n_ngrams_match_1": 10.386,
      "eval_wikibio_n_ngrams_match_2": 3.528,
      "eval_wikibio_n_ngrams_match_3": 1.322,
      "eval_wikibio_num_pred_words": 36.602,
      "eval_wikibio_num_true_words": 29.41,
      "eval_wikibio_perplexity": 36.82266063689858,
      "eval_wikibio_pred_num_tokens": 63.0,
      "eval_wikibio_rouge_score": 0.3647958944360802,
      "eval_wikibio_runtime": 13.4607,
      "eval_wikibio_samples_per_second": 37.145,
      "eval_wikibio_steps_per_second": 0.074,
      "eval_wikibio_token_set_f1": 0.3278060448010174,
      "eval_wikibio_token_set_f1_sem": 0.005511555343504312,
      "eval_wikibio_token_set_precision": 0.33910962220973623,
      "eval_wikibio_token_set_recall": 0.3326095488671017,
      "eval_wikibio_true_num_tokens": 61.1328125,
      "step": 259375
    },
    {
      "epoch": 49.8,
      "eval_nq_accuracy": 0.54090625,
      "eval_nq_bleu_score": 12.494611504861831,
      "eval_nq_bleu_score_sem": 0.5066426279855258,
      "eval_nq_emb_cos_sim": 0.8411279916763306,
      "eval_nq_emb_cos_sim_sem": 0.006997197399265355,
      "eval_nq_emb_top1_equal": 0.328125,
      "eval_nq_emb_top1_equal_sem": 0.041664103776406315,
      "eval_nq_exact_match": 0.0,
      "eval_nq_exact_match_sem": 0.0,
      "eval_nq_loss": 2.1031692028045654,
      "eval_nq_n_ngrams_match_1": 23.658,
      "eval_nq_n_ngrams_match_2": 8.892,
      "eval_nq_n_ngrams_match_3": 4.2,
      "eval_nq_num_pred_words": 49.114,
      "eval_nq_num_true_words": 49.824,
      "eval_nq_perplexity": 8.192091214392544,
      "eval_nq_pred_num_tokens": 63.0,
      "eval_nq_rouge_score": 0.4595867352122004,
      "eval_nq_runtime": 14.3062,
      "eval_nq_samples_per_second": 34.95,
      "eval_nq_steps_per_second": 0.07,
      "eval_nq_token_set_f1": 0.47280429920137734,
      "eval_nq_token_set_f1_sem": 0.004972447546981815,
      "eval_nq_token_set_precision": 0.431889686886597,
      "eval_nq_token_set_recall": 0.5302696249153123,
      "eval_nq_true_num_tokens": 64.0,
      "step": 259375
    },
    {
      "epoch": 49.8,
      "learning_rate": 0.001,
      "loss": 2.4856,
      "step": 259380
    },
    {
      "epoch": 49.81,
      "learning_rate": 0.001,
      "loss": 2.4897,
      "step": 259392
    },
    {
      "epoch": 49.81,
      "learning_rate": 0.001,
      "loss": 2.4851,
      "step": 259404
    },
    {
      "epoch": 49.81,
      "learning_rate": 0.001,
      "loss": 2.4956,
      "step": 259416
    },
    {
      "epoch": 49.81,
      "learning_rate": 0.001,
      "loss": 2.4822,
      "step": 259428
    },
    {
      "epoch": 49.82,
      "learning_rate": 0.001,
      "loss": 2.4881,
      "step": 259440
    },
    {
      "epoch": 49.82,
      "learning_rate": 0.001,
      "loss": 2.4893,
      "step": 259452
    },
    {
      "epoch": 49.82,
      "learning_rate": 0.001,
      "loss": 2.4687,
      "step": 259464
    },
    {
      "epoch": 49.82,
      "learning_rate": 0.001,
      "loss": 2.4813,
      "step": 259476
    },
    {
      "epoch": 49.82,
      "learning_rate": 0.001,
      "loss": 2.4905,
      "step": 259488
    },
    {
      "epoch": 49.83,
      "learning_rate": 0.001,
      "loss": 2.484,
      "step": 259500
    },
    {
      "epoch": 49.83,
      "learning_rate": 0.001,
      "loss": 2.4895,
      "step": 259512
    },
    {
      "epoch": 49.83,
      "learning_rate": 0.001,
      "loss": 2.4789,
      "step": 259524
    },
    {
      "epoch": 49.83,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 259536
    },
    {
      "epoch": 49.84,
      "learning_rate": 0.001,
      "loss": 2.4929,
      "step": 259548
    },
    {
      "epoch": 49.84,
      "learning_rate": 0.001,
      "loss": 2.4837,
      "step": 259560
    },
    {
      "epoch": 49.84,
      "learning_rate": 0.001,
      "loss": 2.4848,
      "step": 259572
    },
    {
      "epoch": 49.84,
      "learning_rate": 0.001,
      "loss": 2.4914,
      "step": 259584
    },
    {
      "epoch": 49.85,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 259596
    },
    {
      "epoch": 49.85,
      "learning_rate": 0.001,
      "loss": 2.4892,
      "step": 259608
    },
    {
      "epoch": 49.85,
      "learning_rate": 0.001,
      "loss": 2.4799,
      "step": 259620
    },
    {
      "epoch": 49.85,
      "learning_rate": 0.001,
      "loss": 2.486,
      "step": 259632
    },
    {
      "epoch": 49.85,
      "learning_rate": 0.001,
      "loss": 2.4845,
      "step": 259644
    },
    {
      "epoch": 49.86,
      "learning_rate": 0.001,
      "loss": 2.4896,
      "step": 259656
    },
    {
      "epoch": 49.86,
      "learning_rate": 0.001,
      "loss": 2.4804,
      "step": 259668
    },
    {
      "epoch": 49.86,
      "learning_rate": 0.001,
      "loss": 2.4968,
      "step": 259680
    },
    {
      "epoch": 49.86,
      "learning_rate": 0.001,
      "loss": 2.4859,
      "step": 259692
    },
    {
      "epoch": 49.87,
      "learning_rate": 0.001,
      "loss": 2.4805,
      "step": 259704
    },
    {
      "epoch": 49.87,
      "learning_rate": 0.001,
      "loss": 2.4899,
      "step": 259716
    },
    {
      "epoch": 49.87,
      "learning_rate": 0.001,
      "loss": 2.4795,
      "step": 259728
    },
    {
      "epoch": 49.87,
      "learning_rate": 0.001,
      "loss": 2.4885,
      "step": 259740
    },
    {
      "epoch": 49.88,
      "learning_rate": 0.001,
      "loss": 2.4909,
      "step": 259752
    },
    {
      "epoch": 49.88,
      "learning_rate": 0.001,
      "loss": 2.4825,
      "step": 259764
    },
    {
      "epoch": 49.88,
      "learning_rate": 0.001,
      "loss": 2.482,
      "step": 259776
    },
    {
      "epoch": 49.88,
      "learning_rate": 0.001,
      "loss": 2.4869,
      "step": 259788
    },
    {
      "epoch": 49.88,
      "learning_rate": 0.001,
      "loss": 2.4828,
      "step": 259800
    },
    {
      "epoch": 49.89,
      "learning_rate": 0.001,
      "loss": 2.4728,
      "step": 259812
    },
    {
      "epoch": 49.89,
      "learning_rate": 0.001,
      "loss": 2.4868,
      "step": 259824
    },
    {
      "epoch": 49.89,
      "learning_rate": 0.001,
      "loss": 2.4823,
      "step": 259836
    },
    {
      "epoch": 49.89,
      "learning_rate": 0.001,
      "loss": 2.4937,
      "step": 259848
    },
    {
      "epoch": 49.9,
      "learning_rate": 0.001,
      "loss": 2.4793,
      "step": 259860
    },
    {
      "epoch": 49.9,
      "learning_rate": 0.001,
      "loss": 2.4939,
      "step": 259872
    },
    {
      "epoch": 49.9,
      "learning_rate": 0.001,
      "loss": 2.4843,
      "step": 259884
    },
    {
      "epoch": 49.9,
      "learning_rate": 0.001,
      "loss": 2.4732,
      "step": 259896
    },
    {
      "epoch": 49.91,
      "learning_rate": 0.001,
      "loss": 2.4932,
      "step": 259908
    },
    {
      "epoch": 49.91,
      "learning_rate": 0.001,
      "loss": 2.4767,
      "step": 259920
    },
    {
      "epoch": 49.91,
      "learning_rate": 0.001,
      "loss": 2.4781,
      "step": 259932
    },
    {
      "epoch": 49.91,
      "learning_rate": 0.001,
      "loss": 2.4872,
      "step": 259944
    },
    {
      "epoch": 49.91,
      "learning_rate": 0.001,
      "loss": 2.4894,
      "step": 259956
    }
  ],
  "logging_steps": 12,
  "max_steps": 494760,
  "num_train_epochs": 95,
  "save_steps": 62,
  "total_flos": 1.3889668107139744e+19,
  "trial_name": null,
  "trial_params": null
}