{ "best_metric": 1.7094721794128418, "best_model_checkpoint": "/home/nlp/matan_avitan/git/vec2text/saves/train_on_bios/output-checkpoin-259966/checkpoint-2232", "epoch": 9.832599118942731, "eval_steps": 31, "global_step": 2232, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 0.001, "loss": 2.7429, "step": 12 }, { "epoch": 0.11, "learning_rate": 0.001, "loss": 2.5522, "step": 24 }, { "epoch": 0.14, "eval_ag_news_accuracy": 0.31696875, "eval_ag_news_bleu_score": 4.693673734147203, "eval_ag_news_bleu_score_sem": 0.15842407029281247, "eval_ag_news_emb_cos_sim": 0.8161767721176147, "eval_ag_news_emb_cos_sim_sem": 0.004149520028885719, "eval_ag_news_emb_top1_equal": 0.2919999957084656, "eval_ag_news_emb_top1_equal_sem": 0.020354375386612028, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4767088890075684, "eval_ag_news_n_ngrams_match_1": 13.35, "eval_ag_news_n_ngrams_match_2": 2.99, "eval_ag_news_n_ngrams_match_3": 0.828, "eval_ag_news_num_pred_words": 40.846, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.35306912734109, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3556909202875407, "eval_ag_news_runtime": 10.7445, "eval_ag_news_samples_per_second": 46.536, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3507447087858559, "eval_ag_news_token_set_f1_sem": 0.00481906834298769, "eval_ag_news_token_set_precision": 0.32442194612796943, "eval_ag_news_token_set_recall": 0.40673198714311515, "eval_ag_news_true_num_tokens": 56.09375, "step": 31 }, { "epoch": 0.14, "eval_anthropic_toxic_prompts_accuracy": 0.11178125, "eval_anthropic_toxic_prompts_bleu_score": 3.5514153333761613, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1376082181977126, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6868857741355896, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004651350024453609, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12800000607967377, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014955912783191019, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0755512714385986, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.14, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.952, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, "eval_anthropic_toxic_prompts_num_pred_words": 41.272, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 21.661820163365203, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9296875, "eval_anthropic_toxic_prompts_rouge_score": 0.24930881983630065, "eval_anthropic_toxic_prompts_runtime": 6.6738, "eval_anthropic_toxic_prompts_samples_per_second": 74.92, "eval_anthropic_toxic_prompts_steps_per_second": 0.15, "eval_anthropic_toxic_prompts_token_set_f1": 0.34382614982619525, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006466915071716929, "eval_anthropic_toxic_prompts_token_set_precision": 0.4350558077494649, "eval_anthropic_toxic_prompts_token_set_recall": 0.3121233769279072, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 31 }, { "epoch": 0.14, "eval_arxiv_accuracy": 0.40021875, "eval_arxiv_bleu_score": 4.158222701612027, "eval_arxiv_bleu_score_sem": 0.13321196613584635, "eval_arxiv_emb_cos_sim": 0.7486079335212708, "eval_arxiv_emb_cos_sim_sem": 0.006440433274933822, "eval_arxiv_emb_top1_equal": 0.2460000067949295, "eval_arxiv_emb_top1_equal_sem": 0.01927982107793324, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.041785717010498, "eval_arxiv_n_ngrams_match_1": 14.34, "eval_arxiv_n_ngrams_match_2": 2.772, "eval_arxiv_n_ngrams_match_3": 0.628, "eval_arxiv_num_pred_words": 36.496, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.94260743456068, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3550765439293301, "eval_arxiv_runtime": 7.8486, "eval_arxiv_samples_per_second": 63.705, "eval_arxiv_steps_per_second": 0.127, "eval_arxiv_token_set_f1": 0.35341010605025447, "eval_arxiv_token_set_f1_sem": 0.005413458965286179, "eval_arxiv_token_set_precision": 0.3008597523478743, "eval_arxiv_token_set_recall": 0.46438469321665715, "eval_arxiv_true_num_tokens": 64.0, "step": 31 }, { "epoch": 0.14, "eval_python_code_alpaca_accuracy": 0.1539375, "eval_python_code_alpaca_bleu_score": 4.375912111669264, "eval_python_code_alpaca_bleu_score_sem": 0.14286387437922407, "eval_python_code_alpaca_emb_cos_sim": 0.733892560005188, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006095502538702667, "eval_python_code_alpaca_emb_top1_equal": 0.16200000047683716, "eval_python_code_alpaca_emb_top1_equal_sem": 0.016494123019099097, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7376487255096436, "eval_python_code_alpaca_n_ngrams_match_1": 8.704, "eval_python_code_alpaca_n_ngrams_match_2": 2.286, "eval_python_code_alpaca_n_ngrams_match_3": 0.656, "eval_python_code_alpaca_num_pred_words": 35.038, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.450613719652043, "eval_python_code_alpaca_pred_num_tokens": 62.984375, "eval_python_code_alpaca_rouge_score": 0.3670313706522858, "eval_python_code_alpaca_runtime": 6.6362, "eval_python_code_alpaca_samples_per_second": 75.344, "eval_python_code_alpaca_steps_per_second": 0.151, "eval_python_code_alpaca_token_set_f1": 0.43887632040645924, "eval_python_code_alpaca_token_set_f1_sem": 0.00656663675900497, "eval_python_code_alpaca_token_set_precision": 0.4829332550075322, "eval_python_code_alpaca_token_set_recall": 0.4324919006662028, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 31 }, { "epoch": 0.14, "eval_wikibio_accuracy": 0.35665625, "eval_wikibio_bleu_score": 5.6953724260148775, "eval_wikibio_bleu_score_sem": 0.2116118501994311, "eval_wikibio_emb_cos_sim": 0.7341107130050659, "eval_wikibio_emb_cos_sim_sem": 0.005868252744375891, "eval_wikibio_emb_top1_equal": 0.18400000035762787, "eval_wikibio_emb_top1_equal_sem": 0.017346172969186033, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.472095489501953, "eval_wikibio_n_ngrams_match_1": 9.376, "eval_wikibio_n_ngrams_match_2": 3.148, "eval_wikibio_n_ngrams_match_3": 1.21, "eval_wikibio_num_pred_words": 34.388, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 32.20415525792636, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3396970245008327, "eval_wikibio_runtime": 7.8165, "eval_wikibio_samples_per_second": 63.967, "eval_wikibio_steps_per_second": 0.128, "eval_wikibio_token_set_f1": 0.30196565654624985, "eval_wikibio_token_set_f1_sem": 0.006037672926570891, "eval_wikibio_token_set_precision": 0.30395106669379673, "eval_wikibio_token_set_recall": 0.3225063022698392, "eval_wikibio_true_num_tokens": 61.1328125, "step": 31 }, { "epoch": 0.14, "eval_bias-bios_accuracy": 0.43903125, "eval_bias-bios_bleu_score": 9.97709652864065, "eval_bias-bios_bleu_score_sem": 0.3171072933989879, "eval_bias-bios_emb_cos_sim": 0.846991240978241, "eval_bias-bios_emb_cos_sim_sem": 0.0033278412673423867, "eval_bias-bios_emb_top1_equal": 0.257999986410141, "eval_bias-bios_emb_top1_equal_sem": 0.019586710359463095, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 2.272756814956665, "eval_bias-bios_n_ngrams_match_1": 19.672, "eval_bias-bios_n_ngrams_match_2": 6.802, "eval_bias-bios_n_ngrams_match_3": 2.788, "eval_bias-bios_num_pred_words": 46.306, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 9.706121946694221, "eval_bias-bios_pred_num_tokens": 62.9921875, "eval_bias-bios_rouge_score": 0.45800074150132053, "eval_bias-bios_runtime": 7.6527, "eval_bias-bios_samples_per_second": 65.336, "eval_bias-bios_steps_per_second": 0.131, "eval_bias-bios_token_set_f1": 0.48702588662994584, "eval_bias-bios_token_set_f1_sem": 0.004756417593574572, "eval_bias-bios_token_set_precision": 0.4642910225543334, "eval_bias-bios_token_set_recall": 0.5280679233880424, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 31 }, { "epoch": 0.16, "learning_rate": 0.001, "loss": 2.5171, "step": 36 }, { "epoch": 0.21, "learning_rate": 0.001, "loss": 2.2642, "step": 48 }, { "epoch": 0.26, "learning_rate": 0.001, "loss": 2.8679, "step": 60 }, { "epoch": 0.27, "eval_ag_news_accuracy": 0.304875, "eval_ag_news_bleu_score": 5.060497511269132, "eval_ag_news_bleu_score_sem": 0.1588487365663699, "eval_ag_news_emb_cos_sim": 0.8225099444389343, "eval_ag_news_emb_cos_sim_sem": 0.0037236120070751506, "eval_ag_news_emb_top1_equal": 0.31200000643730164, "eval_ag_news_emb_top1_equal_sem": 0.020740595612058172, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4802517890930176, "eval_ag_news_n_ngrams_match_1": 13.956, "eval_ag_news_n_ngrams_match_2": 3.176, "eval_ag_news_n_ngrams_match_3": 0.898, "eval_ag_news_num_pred_words": 42.868, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.467896108867876, "eval_ag_news_pred_num_tokens": 62.4140625, "eval_ag_news_rouge_score": 0.3692136347481344, "eval_ag_news_runtime": 7.1641, "eval_ag_news_samples_per_second": 69.793, "eval_ag_news_steps_per_second": 0.14, "eval_ag_news_token_set_f1": 0.36540067573157586, "eval_ag_news_token_set_f1_sem": 0.004698136906103602, "eval_ag_news_token_set_precision": 0.3389609317064429, "eval_ag_news_token_set_recall": 0.42054779375425144, "eval_ag_news_true_num_tokens": 56.09375, "step": 62 }, { "epoch": 0.27, "eval_anthropic_toxic_prompts_accuracy": 0.10921875, "eval_anthropic_toxic_prompts_bleu_score": 3.514336801925988, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1289825795751676, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7021579742431641, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004256657187024065, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1340000033378601, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015249692640233114, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.930422306060791, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.578, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.156, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.834, "eval_anthropic_toxic_prompts_num_pred_words": 45.428, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 18.735540958736387, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.0078125, "eval_anthropic_toxic_prompts_rouge_score": 0.23798547473021991, "eval_anthropic_toxic_prompts_runtime": 6.8257, "eval_anthropic_toxic_prompts_samples_per_second": 73.252, "eval_anthropic_toxic_prompts_steps_per_second": 0.147, "eval_anthropic_toxic_prompts_token_set_f1": 0.3503551059292943, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0060456741682062545, "eval_anthropic_toxic_prompts_token_set_precision": 0.4663104252542219, "eval_anthropic_toxic_prompts_token_set_recall": 0.3055933594976192, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 62 }, { "epoch": 0.27, "eval_arxiv_accuracy": 0.40415625, "eval_arxiv_bleu_score": 4.32018626719132, "eval_arxiv_bleu_score_sem": 0.12063743691178247, "eval_arxiv_emb_cos_sim": 0.7646932601928711, "eval_arxiv_emb_cos_sim_sem": 0.004794754346794133, "eval_arxiv_emb_top1_equal": 0.23199999332427979, "eval_arxiv_emb_top1_equal_sem": 0.018896193149216322, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0367958545684814, "eval_arxiv_n_ngrams_match_1": 14.982, "eval_arxiv_n_ngrams_match_2": 2.936, "eval_arxiv_n_ngrams_match_3": 0.628, "eval_arxiv_num_pred_words": 39.184, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.83836699330422, "eval_arxiv_pred_num_tokens": 61.8125, "eval_arxiv_rouge_score": 0.36987343880561163, "eval_arxiv_runtime": 7.1902, "eval_arxiv_samples_per_second": 69.539, "eval_arxiv_steps_per_second": 0.139, "eval_arxiv_token_set_f1": 0.37145593312975383, "eval_arxiv_token_set_f1_sem": 0.004854050823526144, "eval_arxiv_token_set_precision": 0.31398716496476536, "eval_arxiv_token_set_recall": 0.4893776089225949, "eval_arxiv_true_num_tokens": 64.0, "step": 62 }, { "epoch": 0.27, "eval_python_code_alpaca_accuracy": 0.153875, "eval_python_code_alpaca_bleu_score": 5.218403720836803, "eval_python_code_alpaca_bleu_score_sem": 0.16420659278980215, "eval_python_code_alpaca_emb_cos_sim": 0.7770482301712036, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004089995498141099, "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.5997347831726074, "eval_python_code_alpaca_n_ngrams_match_1": 10.17, "eval_python_code_alpaca_n_ngrams_match_2": 3.094, "eval_python_code_alpaca_n_ngrams_match_3": 1.046, "eval_python_code_alpaca_num_pred_words": 39.394, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.460167698592779, "eval_python_code_alpaca_pred_num_tokens": 61.890625, "eval_python_code_alpaca_rouge_score": 0.3838419247498926, "eval_python_code_alpaca_runtime": 6.9264, "eval_python_code_alpaca_samples_per_second": 72.187, "eval_python_code_alpaca_steps_per_second": 0.144, "eval_python_code_alpaca_token_set_f1": 0.4857091392562985, "eval_python_code_alpaca_token_set_f1_sem": 0.005462245820208245, "eval_python_code_alpaca_token_set_precision": 0.5659594722840119, "eval_python_code_alpaca_token_set_recall": 0.44529530817081914, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 62 }, { "epoch": 0.27, "eval_wikibio_accuracy": 0.34871875, "eval_wikibio_bleu_score": 5.131162518306524, "eval_wikibio_bleu_score_sem": 0.20245051705727532, "eval_wikibio_emb_cos_sim": 0.7161974310874939, "eval_wikibio_emb_cos_sim_sem": 0.006247003956520955, "eval_wikibio_emb_top1_equal": 0.1420000046491623, "eval_wikibio_emb_top1_equal_sem": 0.015625630310786714, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4194374084472656, "eval_wikibio_n_ngrams_match_1": 8.386, "eval_wikibio_n_ngrams_match_2": 2.642, "eval_wikibio_n_ngrams_match_3": 0.982, "eval_wikibio_num_pred_words": 31.434, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 30.55222176323163, "eval_wikibio_pred_num_tokens": 62.9140625, "eval_wikibio_rouge_score": 0.31856747306238054, "eval_wikibio_runtime": 6.9661, "eval_wikibio_samples_per_second": 71.777, "eval_wikibio_steps_per_second": 0.144, "eval_wikibio_token_set_f1": 0.28387075920802823, "eval_wikibio_token_set_f1_sem": 0.006648036834838477, "eval_wikibio_token_set_precision": 0.27314929855068715, "eval_wikibio_token_set_recall": 0.32361977068815584, "eval_wikibio_true_num_tokens": 61.1328125, "step": 62 }, { "epoch": 0.27, "eval_bias-bios_accuracy": 0.44571875, "eval_bias-bios_bleu_score": 10.748684882655063, "eval_bias-bios_bleu_score_sem": 0.33633789418984067, "eval_bias-bios_emb_cos_sim": 0.8556249141693115, "eval_bias-bios_emb_cos_sim_sem": 0.0028325299984082552, "eval_bias-bios_emb_top1_equal": 0.30799999833106995, "eval_bias-bios_emb_top1_equal_sem": 0.020667033028164562, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 2.164238214492798, "eval_bias-bios_n_ngrams_match_1": 20.344, "eval_bias-bios_n_ngrams_match_2": 7.47, "eval_bias-bios_n_ngrams_match_3": 3.198, "eval_bias-bios_num_pred_words": 48.886, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 8.707965787384495, "eval_bias-bios_pred_num_tokens": 62.3828125, "eval_bias-bios_rouge_score": 0.46276773387866293, "eval_bias-bios_runtime": 7.4252, "eval_bias-bios_samples_per_second": 67.338, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.5012030916495526, "eval_bias-bios_token_set_f1_sem": 0.004882132968376436, "eval_bias-bios_token_set_precision": 0.4824323129514482, "eval_bias-bios_token_set_recall": 0.535152442471092, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 62 }, { "epoch": 0.32, "learning_rate": 0.001, "loss": 2.4842, "step": 72 }, { "epoch": 0.37, "learning_rate": 0.001, "loss": 2.3402, "step": 84 }, { "epoch": 0.41, "eval_ag_news_accuracy": 0.31328125, "eval_ag_news_bleu_score": 4.516750644571753, "eval_ag_news_bleu_score_sem": 0.17149583918765143, "eval_ag_news_emb_cos_sim": 0.793439507484436, "eval_ag_news_emb_cos_sim_sem": 0.004841141464243117, "eval_ag_news_emb_top1_equal": 0.24400000274181366, "eval_ag_news_emb_top1_equal_sem": 0.0192267343061996, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4803214073181152, "eval_ag_news_n_ngrams_match_1": 11.478, "eval_ag_news_n_ngrams_match_2": 2.472, "eval_ag_news_n_ngrams_match_3": 0.702, "eval_ag_news_num_pred_words": 31.22, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.47015654485049, "eval_ag_news_pred_num_tokens": 49.0, "eval_ag_news_rouge_score": 0.35275194042143654, "eval_ag_news_runtime": 30.8963, "eval_ag_news_samples_per_second": 16.183, "eval_ag_news_steps_per_second": 0.032, "eval_ag_news_token_set_f1": 0.3386501404441052, "eval_ag_news_token_set_f1_sem": 0.005318713349884382, "eval_ag_news_token_set_precision": 0.2896306380985742, "eval_ag_news_token_set_recall": 0.4466289829368145, "eval_ag_news_true_num_tokens": 56.09375, "step": 93 }, { "epoch": 0.41, "eval_anthropic_toxic_prompts_accuracy": 0.11359375, "eval_anthropic_toxic_prompts_bleu_score": 5.140854055881747, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.21195092740800459, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6991554498672485, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004861811532057735, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17399999499320984, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016971269551723376, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.741424322128296, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.802, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.944, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.756, "eval_anthropic_toxic_prompts_num_pred_words": 29.662, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 15.5090592685474, "eval_anthropic_toxic_prompts_pred_num_tokens": 45.1328125, "eval_anthropic_toxic_prompts_rouge_score": 0.3009768237903324, "eval_anthropic_toxic_prompts_runtime": 6.6478, "eval_anthropic_toxic_prompts_samples_per_second": 75.213, "eval_anthropic_toxic_prompts_steps_per_second": 0.15, "eval_anthropic_toxic_prompts_token_set_f1": 0.3644166633832432, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067735124312273635, "eval_anthropic_toxic_prompts_token_set_precision": 0.4294945619168725, "eval_anthropic_toxic_prompts_token_set_recall": 0.34899513455575765, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 93 }, { "epoch": 0.41, "eval_arxiv_accuracy": 0.40621875, "eval_arxiv_bleu_score": 3.486266642764337, "eval_arxiv_bleu_score_sem": 0.11682124642354311, "eval_arxiv_emb_cos_sim": 0.7276380062103271, "eval_arxiv_emb_cos_sim_sem": 0.006449194438198392, "eval_arxiv_emb_top1_equal": 0.17000000178813934, "eval_arxiv_emb_top1_equal_sem": 0.016815633120741882, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0975985527038574, "eval_arxiv_n_ngrams_match_1": 12.438, "eval_arxiv_n_ngrams_match_2": 2.302, "eval_arxiv_n_ngrams_match_3": 0.498, "eval_arxiv_num_pred_words": 29.636, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.144708027378755, "eval_arxiv_pred_num_tokens": 50.4609375, "eval_arxiv_rouge_score": 0.34343007600768327, "eval_arxiv_runtime": 6.9219, "eval_arxiv_samples_per_second": 72.235, "eval_arxiv_steps_per_second": 0.144, "eval_arxiv_token_set_f1": 0.34406688703760474, "eval_arxiv_token_set_f1_sem": 0.0052970795722565266, "eval_arxiv_token_set_precision": 0.2720488131413779, "eval_arxiv_token_set_recall": 0.5071973334549789, "eval_arxiv_true_num_tokens": 64.0, "step": 93 }, { "epoch": 0.41, "eval_python_code_alpaca_accuracy": 0.15865625, "eval_python_code_alpaca_bleu_score": 5.943762510754415, "eval_python_code_alpaca_bleu_score_sem": 0.2175054353006214, "eval_python_code_alpaca_emb_cos_sim": 0.7461953163146973, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00596813547007352, "eval_python_code_alpaca_emb_top1_equal": 0.19200000166893005, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01763218126724194, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.4123122692108154, "eval_python_code_alpaca_n_ngrams_match_1": 8.432, "eval_python_code_alpaca_n_ngrams_match_2": 2.242, "eval_python_code_alpaca_n_ngrams_match_3": 0.706, "eval_python_code_alpaca_num_pred_words": 26.672, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.159735647926345, "eval_python_code_alpaca_pred_num_tokens": 47.8203125, "eval_python_code_alpaca_rouge_score": 0.40986058218769317, "eval_python_code_alpaca_runtime": 6.8363, "eval_python_code_alpaca_samples_per_second": 73.139, "eval_python_code_alpaca_steps_per_second": 0.146, "eval_python_code_alpaca_token_set_f1": 0.4641263193534667, "eval_python_code_alpaca_token_set_f1_sem": 0.006759693397650342, "eval_python_code_alpaca_token_set_precision": 0.4751880021278726, "eval_python_code_alpaca_token_set_recall": 0.4834618991897721, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 93 }, { "epoch": 0.41, "eval_wikibio_accuracy": 0.3475, "eval_wikibio_bleu_score": 5.7450436332065165, "eval_wikibio_bleu_score_sem": 0.23459594560767927, "eval_wikibio_emb_cos_sim": 0.7174302339553833, "eval_wikibio_emb_cos_sim_sem": 0.006967267271935492, "eval_wikibio_emb_top1_equal": 0.16599999368190765, "eval_wikibio_emb_top1_equal_sem": 0.01665661404240883, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5982017517089844, "eval_wikibio_n_ngrams_match_1": 8.528, "eval_wikibio_n_ngrams_match_2": 2.744, "eval_wikibio_n_ngrams_match_3": 1.016, "eval_wikibio_num_pred_words": 29.094, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.532480869480594, "eval_wikibio_pred_num_tokens": 53.8671875, "eval_wikibio_rouge_score": 0.33783075470658863, "eval_wikibio_runtime": 6.9323, "eval_wikibio_samples_per_second": 72.126, "eval_wikibio_steps_per_second": 0.144, "eval_wikibio_token_set_f1": 0.2926915338296796, "eval_wikibio_token_set_f1_sem": 0.006339384709369148, "eval_wikibio_token_set_precision": 0.28480366224074966, "eval_wikibio_token_set_recall": 0.3280943627476054, "eval_wikibio_true_num_tokens": 61.1328125, "step": 93 }, { "epoch": 0.41, "eval_bias-bios_accuracy": 0.47609375, "eval_bias-bios_bleu_score": 14.143628030554162, "eval_bias-bios_bleu_score_sem": 0.5952833016066833, "eval_bias-bios_emb_cos_sim": 0.8480692505836487, "eval_bias-bios_emb_cos_sim_sem": 0.0034921289055575185, "eval_bias-bios_emb_top1_equal": 0.2840000092983246, "eval_bias-bios_emb_top1_equal_sem": 0.020186705101045338, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 2.0621674060821533, "eval_bias-bios_n_ngrams_match_1": 18.828, "eval_bias-bios_n_ngrams_match_2": 7.64, "eval_bias-bios_n_ngrams_match_3": 3.826, "eval_bias-bios_num_pred_words": 36.126, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 7.862993655133785, "eval_bias-bios_pred_num_tokens": 47.9921875, "eval_bias-bios_rouge_score": 0.5012418979790247, "eval_bias-bios_runtime": 7.4397, "eval_bias-bios_samples_per_second": 67.207, "eval_bias-bios_steps_per_second": 0.134, "eval_bias-bios_token_set_f1": 0.5158033024363126, "eval_bias-bios_token_set_f1_sem": 0.006050981474164487, "eval_bias-bios_token_set_precision": 0.46195844368781785, "eval_bias-bios_token_set_recall": 0.6017904464338891, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 93 }, { "epoch": 0.42, "learning_rate": 0.001, "loss": 2.1097, "step": 96 }, { "epoch": 0.48, "learning_rate": 0.001, "loss": 2.3791, "step": 108 }, { "epoch": 0.53, "learning_rate": 0.001, "loss": 2.3587, "step": 120 }, { "epoch": 0.55, "eval_ag_news_accuracy": 0.31, "eval_ag_news_bleu_score": 4.7225283432858465, "eval_ag_news_bleu_score_sem": 0.15513744356970763, "eval_ag_news_emb_cos_sim": 0.8204448819160461, "eval_ag_news_emb_cos_sim_sem": 0.004084354753754322, "eval_ag_news_emb_top1_equal": 0.28200000524520874, "eval_ag_news_emb_top1_equal_sem": 0.02014357434811239, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.450395345687866, "eval_ag_news_n_ngrams_match_1": 13.582, "eval_ag_news_n_ngrams_match_2": 2.918, "eval_ag_news_n_ngrams_match_3": 0.774, "eval_ag_news_num_pred_words": 42.082, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.512848315065245, "eval_ag_news_pred_num_tokens": 62.9140625, "eval_ag_news_rouge_score": 0.3611285789834328, "eval_ag_news_runtime": 7.7072, "eval_ag_news_samples_per_second": 64.874, "eval_ag_news_steps_per_second": 0.13, "eval_ag_news_token_set_f1": 0.35262006963334624, "eval_ag_news_token_set_f1_sem": 0.004891780010880941, "eval_ag_news_token_set_precision": 0.32770115783501474, "eval_ag_news_token_set_recall": 0.40682450130250397, "eval_ag_news_true_num_tokens": 56.09375, "step": 124 }, { "epoch": 0.55, "eval_anthropic_toxic_prompts_accuracy": 0.11134375, "eval_anthropic_toxic_prompts_bleu_score": 3.588870597966171, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12529714132983094, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7048087120056152, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004388768025387664, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12200000137090683, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014651325247908655, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.969221353530884, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.566, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.188, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.804, "eval_anthropic_toxic_prompts_num_pred_words": 44.512, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 19.476748189009147, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.921875, "eval_anthropic_toxic_prompts_rouge_score": 0.24404335146473338, "eval_anthropic_toxic_prompts_runtime": 6.7594, "eval_anthropic_toxic_prompts_samples_per_second": 73.971, "eval_anthropic_toxic_prompts_steps_per_second": 0.148, "eval_anthropic_toxic_prompts_token_set_f1": 0.35608243439319104, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006070834817090331, "eval_anthropic_toxic_prompts_token_set_precision": 0.47290911056237195, "eval_anthropic_toxic_prompts_token_set_recall": 0.3119626371016283, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 124 }, { "epoch": 0.55, "eval_arxiv_accuracy": 0.41953125, "eval_arxiv_bleu_score": 4.034091269472269, "eval_arxiv_bleu_score_sem": 0.12793967624084907, "eval_arxiv_emb_cos_sim": 0.7384451031684875, "eval_arxiv_emb_cos_sim_sem": 0.0063196562377584925, "eval_arxiv_emb_top1_equal": 0.24799999594688416, "eval_arxiv_emb_top1_equal_sem": 0.01933234140950753, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.965681314468384, "eval_arxiv_n_ngrams_match_1": 13.97, "eval_arxiv_n_ngrams_match_2": 2.662, "eval_arxiv_n_ngrams_match_3": 0.574, "eval_arxiv_num_pred_words": 36.738, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.407921635830117, "eval_arxiv_pred_num_tokens": 62.9140625, "eval_arxiv_rouge_score": 0.3471889053815287, "eval_arxiv_runtime": 7.1394, "eval_arxiv_samples_per_second": 70.034, "eval_arxiv_steps_per_second": 0.14, "eval_arxiv_token_set_f1": 0.3505383309264076, "eval_arxiv_token_set_f1_sem": 0.0053464458822839395, "eval_arxiv_token_set_precision": 0.2922403618830216, "eval_arxiv_token_set_recall": 0.4868777158351232, "eval_arxiv_true_num_tokens": 64.0, "step": 124 }, { "epoch": 0.55, "eval_python_code_alpaca_accuracy": 0.1574375, "eval_python_code_alpaca_bleu_score": 4.901041283031989, "eval_python_code_alpaca_bleu_score_sem": 0.15128683929670117, "eval_python_code_alpaca_emb_cos_sim": 0.7697573900222778, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0049162044481588, "eval_python_code_alpaca_emb_top1_equal": 0.14800000190734863, "eval_python_code_alpaca_emb_top1_equal_sem": 0.015896458012572223, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.595602512359619, "eval_python_code_alpaca_n_ngrams_match_1": 9.89, "eval_python_code_alpaca_n_ngrams_match_2": 2.874, "eval_python_code_alpaca_n_ngrams_match_3": 0.936, "eval_python_code_alpaca_num_pred_words": 39.19, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.404661402980476, "eval_python_code_alpaca_pred_num_tokens": 62.8203125, "eval_python_code_alpaca_rouge_score": 0.37669615985353927, "eval_python_code_alpaca_runtime": 6.8719, "eval_python_code_alpaca_samples_per_second": 72.76, "eval_python_code_alpaca_steps_per_second": 0.146, "eval_python_code_alpaca_token_set_f1": 0.4669983325014964, "eval_python_code_alpaca_token_set_f1_sem": 0.006102331677873762, "eval_python_code_alpaca_token_set_precision": 0.5525666056613069, "eval_python_code_alpaca_token_set_recall": 0.42905431288408613, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 124 }, { "epoch": 0.55, "eval_wikibio_accuracy": 0.36175, "eval_wikibio_bleu_score": 4.969333362935701, "eval_wikibio_bleu_score_sem": 0.20690804856768155, "eval_wikibio_emb_cos_sim": 0.7222297191619873, "eval_wikibio_emb_cos_sim_sem": 0.00648223522589297, "eval_wikibio_emb_top1_equal": 0.17000000178813934, "eval_wikibio_emb_top1_equal_sem": 0.016815633120741882, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.389568567276001, "eval_wikibio_n_ngrams_match_1": 8.624, "eval_wikibio_n_ngrams_match_2": 2.76, "eval_wikibio_n_ngrams_match_3": 1.06, "eval_wikibio_num_pred_words": 33.248, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 29.65315616829789, "eval_wikibio_pred_num_tokens": 62.9921875, "eval_wikibio_rouge_score": 0.30751147754995894, "eval_wikibio_runtime": 6.8812, "eval_wikibio_samples_per_second": 72.661, "eval_wikibio_steps_per_second": 0.145, "eval_wikibio_token_set_f1": 0.2744272465841197, "eval_wikibio_token_set_f1_sem": 0.006671840977191013, "eval_wikibio_token_set_precision": 0.27600638412423095, "eval_wikibio_token_set_recall": 0.2994770253871302, "eval_wikibio_true_num_tokens": 61.1328125, "step": 124 }, { "epoch": 0.55, "eval_bias-bios_accuracy": 0.47809375, "eval_bias-bios_bleu_score": 13.904461612933037, "eval_bias-bios_bleu_score_sem": 0.5622423795520898, "eval_bias-bios_emb_cos_sim": 0.8658474683761597, "eval_bias-bios_emb_cos_sim_sem": 0.0030276376456932464, "eval_bias-bios_emb_top1_equal": 0.3160000145435333, "eval_bias-bios_emb_top1_equal_sem": 0.020812358915445636, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 2.0254554748535156, "eval_bias-bios_n_ngrams_match_1": 21.246, "eval_bias-bios_n_ngrams_match_2": 8.81, "eval_bias-bios_n_ngrams_match_3": 4.386, "eval_bias-bios_num_pred_words": 47.536, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 7.5795624586388195, "eval_bias-bios_pred_num_tokens": 62.890625, "eval_bias-bios_rouge_score": 0.4883049409995084, "eval_bias-bios_runtime": 7.322, "eval_bias-bios_samples_per_second": 68.287, "eval_bias-bios_steps_per_second": 0.137, "eval_bias-bios_token_set_f1": 0.522675177087755, "eval_bias-bios_token_set_f1_sem": 0.0059232907683261175, "eval_bias-bios_token_set_precision": 0.5037827360959235, "eval_bias-bios_token_set_recall": 0.559121364032907, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 124 }, { "epoch": 0.58, "learning_rate": 0.001, "loss": 2.2958, "step": 132 }, { "epoch": 0.63, "learning_rate": 0.001, "loss": 2.0545, "step": 144 }, { "epoch": 0.68, "eval_ag_news_accuracy": 0.311, "eval_ag_news_bleu_score": 5.003358972744811, "eval_ag_news_bleu_score_sem": 0.1684136330974139, "eval_ag_news_emb_cos_sim": 0.8219054341316223, "eval_ag_news_emb_cos_sim_sem": 0.003758947543815256, "eval_ag_news_emb_top1_equal": 0.28999999165534973, "eval_ag_news_emb_top1_equal_sem": 0.02031317985982347, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4661929607391357, "eval_ag_news_n_ngrams_match_1": 12.61, "eval_ag_news_n_ngrams_match_2": 2.782, "eval_ag_news_n_ngrams_match_3": 0.79, "eval_ag_news_num_pred_words": 34.54, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.01462919700174, "eval_ag_news_pred_num_tokens": 49.7265625, "eval_ag_news_rouge_score": 0.3713894017865863, "eval_ag_news_runtime": 6.9815, "eval_ag_news_samples_per_second": 71.618, "eval_ag_news_steps_per_second": 0.143, "eval_ag_news_token_set_f1": 0.3567973665596035, "eval_ag_news_token_set_f1_sem": 0.004808474411403699, "eval_ag_news_token_set_precision": 0.3171467469913889, "eval_ag_news_token_set_recall": 0.4300849194020247, "eval_ag_news_true_num_tokens": 56.09375, "step": 155 }, { "epoch": 0.68, "eval_anthropic_toxic_prompts_accuracy": 0.1133125, "eval_anthropic_toxic_prompts_bleu_score": 4.991091639968256, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18676981446968294, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7204309701919556, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004466604566883565, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17000000178813934, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016815633120741882, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.8461811542510986, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.34, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.166, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.842, "eval_anthropic_toxic_prompts_num_pred_words": 33.19, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 17.221888366963853, "eval_anthropic_toxic_prompts_pred_num_tokens": 46.6640625, "eval_anthropic_toxic_prompts_rouge_score": 0.29269005770970935, "eval_anthropic_toxic_prompts_runtime": 6.7114, "eval_anthropic_toxic_prompts_samples_per_second": 74.5, "eval_anthropic_toxic_prompts_steps_per_second": 0.149, "eval_anthropic_toxic_prompts_token_set_f1": 0.3702121335287552, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065356531821970775, "eval_anthropic_toxic_prompts_token_set_precision": 0.46274202329252623, "eval_anthropic_toxic_prompts_token_set_recall": 0.33499300377237795, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 155 }, { "epoch": 0.68, "eval_arxiv_accuracy": 0.4100625, "eval_arxiv_bleu_score": 3.9930895970733804, "eval_arxiv_bleu_score_sem": 0.12444908662542796, "eval_arxiv_emb_cos_sim": 0.7626603841781616, "eval_arxiv_emb_cos_sim_sem": 0.005201230139138295, "eval_arxiv_emb_top1_equal": 0.22599999606609344, "eval_arxiv_emb_top1_equal_sem": 0.018722957089283943, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.02276873588562, "eval_arxiv_n_ngrams_match_1": 13.88, "eval_arxiv_n_ngrams_match_2": 2.696, "eval_arxiv_n_ngrams_match_3": 0.612, "eval_arxiv_num_pred_words": 31.28, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.548105273439415, "eval_arxiv_pred_num_tokens": 50.9765625, "eval_arxiv_rouge_score": 0.37042341511355137, "eval_arxiv_runtime": 7.071, "eval_arxiv_samples_per_second": 70.712, "eval_arxiv_steps_per_second": 0.141, "eval_arxiv_token_set_f1": 0.3713592970804077, "eval_arxiv_token_set_f1_sem": 0.0049161836131566845, "eval_arxiv_token_set_precision": 0.30512914849944406, "eval_arxiv_token_set_recall": 0.5025705117271739, "eval_arxiv_true_num_tokens": 64.0, "step": 155 }, { "epoch": 0.68, "eval_python_code_alpaca_accuracy": 0.16328125, "eval_python_code_alpaca_bleu_score": 7.516714220519165, "eval_python_code_alpaca_bleu_score_sem": 0.2405822858298543, "eval_python_code_alpaca_emb_cos_sim": 0.8032925128936768, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00434053064781819, "eval_python_code_alpaca_emb_top1_equal": 0.2160000056028366, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018421909471797383, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.440660238265991, "eval_python_code_alpaca_n_ngrams_match_1": 9.808, "eval_python_code_alpaca_n_ngrams_match_2": 3.082, "eval_python_code_alpaca_n_ngrams_match_3": 1.114, "eval_python_code_alpaca_num_pred_words": 28.448, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.48061818450373, "eval_python_code_alpaca_pred_num_tokens": 44.390625, "eval_python_code_alpaca_rouge_score": 0.45539690730460003, "eval_python_code_alpaca_runtime": 6.7768, "eval_python_code_alpaca_samples_per_second": 73.781, "eval_python_code_alpaca_steps_per_second": 0.148, "eval_python_code_alpaca_token_set_f1": 0.5142372467494214, "eval_python_code_alpaca_token_set_f1_sem": 0.005921915329945734, "eval_python_code_alpaca_token_set_precision": 0.557454862667378, "eval_python_code_alpaca_token_set_recall": 0.49660464965394135, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 155 }, { "epoch": 0.68, "eval_wikibio_accuracy": 0.35134375, "eval_wikibio_bleu_score": 5.640416639205569, "eval_wikibio_bleu_score_sem": 0.22432806451151702, "eval_wikibio_emb_cos_sim": 0.7272863984107971, "eval_wikibio_emb_cos_sim_sem": 0.006542096621932352, "eval_wikibio_emb_top1_equal": 0.1720000058412552, "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.401550769805908, "eval_wikibio_n_ngrams_match_1": 8.604, "eval_wikibio_n_ngrams_match_2": 2.838, "eval_wikibio_n_ngrams_match_3": 1.082, "eval_wikibio_num_pred_words": 29.89, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 30.010603517780968, "eval_wikibio_pred_num_tokens": 56.3046875, "eval_wikibio_rouge_score": 0.3404123434832165, "eval_wikibio_runtime": 8.3562, "eval_wikibio_samples_per_second": 59.835, "eval_wikibio_steps_per_second": 0.12, "eval_wikibio_token_set_f1": 0.29415853483245885, "eval_wikibio_token_set_f1_sem": 0.006476544753051358, "eval_wikibio_token_set_precision": 0.28791783614238897, "eval_wikibio_token_set_recall": 0.33035313035858804, "eval_wikibio_true_num_tokens": 61.1328125, "step": 155 }, { "epoch": 0.68, "eval_bias-bios_accuracy": 0.4783125, "eval_bias-bios_bleu_score": 15.409677713634471, "eval_bias-bios_bleu_score_sem": 0.6793448382828893, "eval_bias-bios_emb_cos_sim": 0.8709923624992371, "eval_bias-bios_emb_cos_sim_sem": 0.0027106197477141454, "eval_bias-bios_emb_top1_equal": 0.31200000643730164, "eval_bias-bios_emb_top1_equal_sem": 0.0207405942792578, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.9928849935531616, "eval_bias-bios_n_ngrams_match_1": 20.156, "eval_bias-bios_n_ngrams_match_2": 8.406, "eval_bias-bios_n_ngrams_match_3": 4.272, "eval_bias-bios_num_pred_words": 38.084, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 7.336669503630765, "eval_bias-bios_pred_num_tokens": 49.7421875, "eval_bias-bios_rouge_score": 0.5228778033893537, "eval_bias-bios_runtime": 56.2212, "eval_bias-bios_samples_per_second": 8.893, "eval_bias-bios_steps_per_second": 0.018, "eval_bias-bios_token_set_f1": 0.5380957539166968, "eval_bias-bios_token_set_f1_sem": 0.006057211310424036, "eval_bias-bios_token_set_precision": 0.5007483299431186, "eval_bias-bios_token_set_recall": 0.5938445162067718, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 155 }, { "epoch": 0.69, "learning_rate": 0.001, "loss": 2.2312, "step": 156 }, { "epoch": 0.74, "learning_rate": 0.001, "loss": 2.303, "step": 168 }, { "epoch": 0.79, "learning_rate": 0.001, "loss": 2.2642, "step": 180 }, { "epoch": 0.82, "eval_ag_news_accuracy": 0.3155, "eval_ag_news_bleu_score": 4.6238494026674, "eval_ag_news_bleu_score_sem": 0.16422759660282926, "eval_ag_news_emb_cos_sim": 0.7977774739265442, "eval_ag_news_emb_cos_sim_sem": 0.004886535312212688, "eval_ag_news_emb_top1_equal": 0.2619999945163727, "eval_ag_news_emb_top1_equal_sem": 0.019684691179025708, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.440540075302124, "eval_ag_news_n_ngrams_match_1": 12.182, "eval_ag_news_n_ngrams_match_2": 2.652, "eval_ag_news_n_ngrams_match_3": 0.724, "eval_ag_news_num_pred_words": 34.344, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.203806023310214, "eval_ag_news_pred_num_tokens": 51.265625, "eval_ag_news_rouge_score": 0.3575247504591853, "eval_ag_news_runtime": 7.0458, "eval_ag_news_samples_per_second": 70.964, "eval_ag_news_steps_per_second": 0.142, "eval_ag_news_token_set_f1": 0.3476905400092977, "eval_ag_news_token_set_f1_sem": 0.0050474775376901995, "eval_ag_news_token_set_precision": 0.30283073407725536, "eval_ag_news_token_set_recall": 0.43549270766889003, "eval_ag_news_true_num_tokens": 56.09375, "step": 186 }, { "epoch": 0.82, "eval_anthropic_toxic_prompts_accuracy": 0.1125625, "eval_anthropic_toxic_prompts_bleu_score": 4.581247855788949, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18571587878474477, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6939055323600769, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004781795528748738, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1340000033378601, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015249692640233114, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.7806220054626465, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.726, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.83, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.678, "eval_anthropic_toxic_prompts_num_pred_words": 32.324, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 16.129050185906753, "eval_anthropic_toxic_prompts_pred_num_tokens": 47.3125, "eval_anthropic_toxic_prompts_rouge_score": 0.27780555495384585, "eval_anthropic_toxic_prompts_runtime": 6.8387, "eval_anthropic_toxic_prompts_samples_per_second": 73.113, "eval_anthropic_toxic_prompts_steps_per_second": 0.146, "eval_anthropic_toxic_prompts_token_set_f1": 0.35234555947824386, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006273855655020789, "eval_anthropic_toxic_prompts_token_set_precision": 0.4208779132955721, "eval_anthropic_toxic_prompts_token_set_recall": 0.33397278331070135, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 186 }, { "epoch": 0.82, "eval_arxiv_accuracy": 0.41828125, "eval_arxiv_bleu_score": 3.7199294246295924, "eval_arxiv_bleu_score_sem": 0.12395209188721983, "eval_arxiv_emb_cos_sim": 0.7246992588043213, "eval_arxiv_emb_cos_sim_sem": 0.00647072716105568, "eval_arxiv_emb_top1_equal": 0.1860000044107437, "eval_arxiv_emb_top1_equal_sem": 0.017418806591218323, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0102925300598145, "eval_arxiv_n_ngrams_match_1": 13.088, "eval_arxiv_n_ngrams_match_2": 2.46, "eval_arxiv_n_ngrams_match_3": 0.548, "eval_arxiv_num_pred_words": 31.562, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.293335467674506, "eval_arxiv_pred_num_tokens": 54.765625, "eval_arxiv_rouge_score": 0.35168978112341753, "eval_arxiv_runtime": 7.2086, "eval_arxiv_samples_per_second": 69.362, "eval_arxiv_steps_per_second": 0.139, "eval_arxiv_token_set_f1": 0.3489796464277637, "eval_arxiv_token_set_f1_sem": 0.005302434445957346, "eval_arxiv_token_set_precision": 0.2804699900490891, "eval_arxiv_token_set_recall": 0.5019397954958496, "eval_arxiv_true_num_tokens": 64.0, "step": 186 }, { "epoch": 0.82, "eval_python_code_alpaca_accuracy": 0.1608125, "eval_python_code_alpaca_bleu_score": 6.1128485412798765, "eval_python_code_alpaca_bleu_score_sem": 0.2066307880111669, "eval_python_code_alpaca_emb_cos_sim": 0.7717223763465881, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0050387301194260315, "eval_python_code_alpaca_emb_top1_equal": 0.21400000154972076, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018359796975924752, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.430375814437866, "eval_python_code_alpaca_n_ngrams_match_1": 9.032, "eval_python_code_alpaca_n_ngrams_match_2": 2.526, "eval_python_code_alpaca_n_ngrams_match_3": 0.822, "eval_python_code_alpaca_num_pred_words": 29.698, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.363151714129962, "eval_python_code_alpaca_pred_num_tokens": 48.4140625, "eval_python_code_alpaca_rouge_score": 0.4158035294654079, "eval_python_code_alpaca_runtime": 7.3079, "eval_python_code_alpaca_samples_per_second": 68.419, "eval_python_code_alpaca_steps_per_second": 0.137, "eval_python_code_alpaca_token_set_f1": 0.48281370346698166, "eval_python_code_alpaca_token_set_f1_sem": 0.0060842657633341036, "eval_python_code_alpaca_token_set_precision": 0.5093156211832727, "eval_python_code_alpaca_token_set_recall": 0.4823383370687486, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 186 }, { "epoch": 0.82, "eval_wikibio_accuracy": 0.3616875, "eval_wikibio_bleu_score": 5.537482915642417, "eval_wikibio_bleu_score_sem": 0.23603302250627642, "eval_wikibio_emb_cos_sim": 0.699407160282135, "eval_wikibio_emb_cos_sim_sem": 0.007551687574736549, "eval_wikibio_emb_top1_equal": 0.15000000596046448, "eval_wikibio_emb_top1_equal_sem": 0.01598471338779901, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.424071788787842, "eval_wikibio_n_ngrams_match_1": 8.252, "eval_wikibio_n_ngrams_match_2": 2.666, "eval_wikibio_n_ngrams_match_3": 1.03, "eval_wikibio_num_pred_words": 29.136, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 30.694140978938165, "eval_wikibio_pred_num_tokens": 57.2421875, "eval_wikibio_rouge_score": 0.3120185963272658, "eval_wikibio_runtime": 7.0014, "eval_wikibio_samples_per_second": 71.415, "eval_wikibio_steps_per_second": 0.143, "eval_wikibio_token_set_f1": 0.27915879560872614, "eval_wikibio_token_set_f1_sem": 0.006843179479202318, "eval_wikibio_token_set_precision": 0.2695843454340866, "eval_wikibio_token_set_recall": 0.31745052828855647, "eval_wikibio_true_num_tokens": 61.1328125, "step": 186 }, { "epoch": 0.82, "eval_bias-bios_accuracy": 0.49115625, "eval_bias-bios_bleu_score": 15.202837676179207, "eval_bias-bios_bleu_score_sem": 0.6997835059838426, "eval_bias-bios_emb_cos_sim": 0.855296790599823, "eval_bias-bios_emb_cos_sim_sem": 0.003214230031382842, "eval_bias-bios_emb_top1_equal": 0.26600000262260437, "eval_bias-bios_emb_top1_equal_sem": 0.01978055817719369, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.9403151273727417, "eval_bias-bios_n_ngrams_match_1": 19.532, "eval_bias-bios_n_ngrams_match_2": 8.136, "eval_bias-bios_n_ngrams_match_3": 4.216, "eval_bias-bios_num_pred_words": 36.87, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.960944209104801, "eval_bias-bios_pred_num_tokens": 50.6484375, "eval_bias-bios_rouge_score": 0.5109369957489318, "eval_bias-bios_runtime": 7.1885, "eval_bias-bios_samples_per_second": 69.556, "eval_bias-bios_steps_per_second": 0.139, "eval_bias-bios_token_set_f1": 0.5280290804881462, "eval_bias-bios_token_set_f1_sem": 0.006173417362359757, "eval_bias-bios_token_set_precision": 0.4782727189071511, "eval_bias-bios_token_set_recall": 0.6061540264575703, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 186 }, { "epoch": 0.85, "learning_rate": 0.001, "loss": 2.0709, "step": 192 }, { "epoch": 0.9, "learning_rate": 0.001, "loss": 2.117, "step": 204 }, { "epoch": 0.95, "learning_rate": 0.001, "loss": 2.2981, "step": 216 }, { "epoch": 0.96, "eval_ag_news_accuracy": 0.30978125, "eval_ag_news_bleu_score": 4.457379225242967, "eval_ag_news_bleu_score_sem": 0.14593041092341294, "eval_ag_news_emb_cos_sim": 0.8129716515541077, "eval_ag_news_emb_cos_sim_sem": 0.004289844915959979, "eval_ag_news_emb_top1_equal": 0.2540000081062317, "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4657886028289795, "eval_ag_news_n_ngrams_match_1": 13.404, "eval_ag_news_n_ngrams_match_2": 2.836, "eval_ag_news_n_ngrams_match_3": 0.748, "eval_ag_news_num_pred_words": 42.124, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.001686445373565, "eval_ag_news_pred_num_tokens": 62.734375, "eval_ag_news_rouge_score": 0.357087203201098, "eval_ag_news_runtime": 7.1579, "eval_ag_news_samples_per_second": 69.853, "eval_ag_news_steps_per_second": 0.14, "eval_ag_news_token_set_f1": 0.3502126568889519, "eval_ag_news_token_set_f1_sem": 0.004873482067898881, "eval_ag_news_token_set_precision": 0.3262434355520826, "eval_ag_news_token_set_recall": 0.4070500176528214, "eval_ag_news_true_num_tokens": 56.09375, "step": 217 }, { "epoch": 0.96, "eval_anthropic_toxic_prompts_accuracy": 0.10990625, "eval_anthropic_toxic_prompts_bleu_score": 3.6506017226905993, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13345084858914963, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7062422633171082, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004333991929178808, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1420000046491623, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015625630310786714, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.999067783355713, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.474, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.128, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.834, "eval_anthropic_toxic_prompts_num_pred_words": 44.49, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.066821576092806, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.2265625, "eval_anthropic_toxic_prompts_rouge_score": 0.24249253007085042, "eval_anthropic_toxic_prompts_runtime": 7.1104, "eval_anthropic_toxic_prompts_samples_per_second": 70.32, "eval_anthropic_toxic_prompts_steps_per_second": 0.141, "eval_anthropic_toxic_prompts_token_set_f1": 0.3484126557045319, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00591580762460389, "eval_anthropic_toxic_prompts_token_set_precision": 0.4671694321444695, "eval_anthropic_toxic_prompts_token_set_recall": 0.3032982618338858, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 217 }, { "epoch": 0.96, "eval_arxiv_accuracy": 0.4244375, "eval_arxiv_bleu_score": 4.147123947233683, "eval_arxiv_bleu_score_sem": 0.1266643362812505, "eval_arxiv_emb_cos_sim": 0.7450116276741028, "eval_arxiv_emb_cos_sim_sem": 0.005653072123844248, "eval_arxiv_emb_top1_equal": 0.24400000274181366, "eval_arxiv_emb_top1_equal_sem": 0.0192267343061996, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.9606425762176514, "eval_arxiv_n_ngrams_match_1": 14.408, "eval_arxiv_n_ngrams_match_2": 2.74, "eval_arxiv_n_ngrams_match_3": 0.596, "eval_arxiv_num_pred_words": 37.866, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.310376158162004, "eval_arxiv_pred_num_tokens": 62.9375, "eval_arxiv_rouge_score": 0.3543904040906273, "eval_arxiv_runtime": 7.2195, "eval_arxiv_samples_per_second": 69.257, "eval_arxiv_steps_per_second": 0.139, "eval_arxiv_token_set_f1": 0.35833270808713374, "eval_arxiv_token_set_f1_sem": 0.004897445521183248, "eval_arxiv_token_set_precision": 0.30320829802083726, "eval_arxiv_token_set_recall": 0.4739172424026966, "eval_arxiv_true_num_tokens": 64.0, "step": 217 }, { "epoch": 0.96, "eval_python_code_alpaca_accuracy": 0.1595625, "eval_python_code_alpaca_bleu_score": 5.186692098353156, "eval_python_code_alpaca_bleu_score_sem": 0.15960643693191615, "eval_python_code_alpaca_emb_cos_sim": 0.7835527658462524, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003771730432211162, "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346174301986407, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.6080775260925293, "eval_python_code_alpaca_n_ngrams_match_1": 10.23, "eval_python_code_alpaca_n_ngrams_match_2": 3.044, "eval_python_code_alpaca_n_ngrams_match_3": 1.038, "eval_python_code_alpaca_num_pred_words": 40.37, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.572932145711775, "eval_python_code_alpaca_pred_num_tokens": 62.0703125, "eval_python_code_alpaca_rouge_score": 0.37938408557690495, "eval_python_code_alpaca_runtime": 7.0219, "eval_python_code_alpaca_samples_per_second": 71.206, "eval_python_code_alpaca_steps_per_second": 0.142, "eval_python_code_alpaca_token_set_f1": 0.4818092806192162, "eval_python_code_alpaca_token_set_f1_sem": 0.005350169286529162, "eval_python_code_alpaca_token_set_precision": 0.5706438293206109, "eval_python_code_alpaca_token_set_recall": 0.4376943035920839, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 217 }, { "epoch": 0.96, "eval_wikibio_accuracy": 0.36484375, "eval_wikibio_bleu_score": 5.0133655577159395, "eval_wikibio_bleu_score_sem": 0.21321050674285638, "eval_wikibio_emb_cos_sim": 0.7070615291595459, "eval_wikibio_emb_cos_sim_sem": 0.006583151538281355, "eval_wikibio_emb_top1_equal": 0.16599999368190765, "eval_wikibio_emb_top1_equal_sem": 0.01665661404240883, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3749570846557617, "eval_wikibio_n_ngrams_match_1": 8.646, "eval_wikibio_n_ngrams_match_2": 2.724, "eval_wikibio_n_ngrams_match_3": 1.032, "eval_wikibio_num_pred_words": 32.776, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 29.22302963794754, "eval_wikibio_pred_num_tokens": 62.9453125, "eval_wikibio_rouge_score": 0.302933650392108, "eval_wikibio_runtime": 7.0614, "eval_wikibio_samples_per_second": 70.807, "eval_wikibio_steps_per_second": 0.142, "eval_wikibio_token_set_f1": 0.2754425983911362, "eval_wikibio_token_set_f1_sem": 0.006761952543645421, "eval_wikibio_token_set_precision": 0.27522435313018684, "eval_wikibio_token_set_recall": 0.30822619478242524, "eval_wikibio_true_num_tokens": 61.1328125, "step": 217 }, { "epoch": 0.96, "eval_bias-bios_accuracy": 0.48771875, "eval_bias-bios_bleu_score": 15.05513983287074, "eval_bias-bios_bleu_score_sem": 0.5941410638440162, "eval_bias-bios_emb_cos_sim": 0.869806706905365, "eval_bias-bios_emb_cos_sim_sem": 0.0030029188635396996, "eval_bias-bios_emb_top1_equal": 0.33000001311302185, "eval_bias-bios_emb_top1_equal_sem": 0.021049612042986412, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.9613577127456665, "eval_bias-bios_n_ngrams_match_1": 21.756, "eval_bias-bios_n_ngrams_match_2": 9.298, "eval_bias-bios_n_ngrams_match_3": 4.902, "eval_bias-bios_num_pred_words": 47.754, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 7.108972458353611, "eval_bias-bios_pred_num_tokens": 62.640625, "eval_bias-bios_rouge_score": 0.4947836024474072, "eval_bias-bios_runtime": 8.4086, "eval_bias-bios_samples_per_second": 59.463, "eval_bias-bios_steps_per_second": 0.119, "eval_bias-bios_token_set_f1": 0.5331869447306742, "eval_bias-bios_token_set_f1_sem": 0.0060534508998137575, "eval_bias-bios_token_set_precision": 0.519431249760555, "eval_bias-bios_token_set_recall": 0.5606207070026014, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 217 }, { "epoch": 1.0, "learning_rate": 0.001, "loss": 2.0476, "step": 228 }, { "epoch": 1.06, "learning_rate": 0.001, "loss": 2.2821, "step": 240 }, { "epoch": 1.09, "eval_ag_news_accuracy": 0.30928125, "eval_ag_news_bleu_score": 4.791953510451809, "eval_ag_news_bleu_score_sem": 0.15930662519567532, "eval_ag_news_emb_cos_sim": 0.8136026263237, "eval_ag_news_emb_cos_sim_sem": 0.004870167857208421, "eval_ag_news_emb_top1_equal": 0.2800000011920929, "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.455613851547241, "eval_ag_news_n_ngrams_match_1": 13.48, "eval_ag_news_n_ngrams_match_2": 2.948, "eval_ag_news_n_ngrams_match_3": 0.85, "eval_ag_news_num_pred_words": 41.558, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.67772813762492, "eval_ag_news_pred_num_tokens": 62.875, "eval_ag_news_rouge_score": 0.36344000370285456, "eval_ag_news_runtime": 7.2817, "eval_ag_news_samples_per_second": 68.665, "eval_ag_news_steps_per_second": 0.137, "eval_ag_news_token_set_f1": 0.3561913692182338, "eval_ag_news_token_set_f1_sem": 0.005130586740939777, "eval_ag_news_token_set_precision": 0.3272258207777092, "eval_ag_news_token_set_recall": 0.42120484031248273, "eval_ag_news_true_num_tokens": 56.09375, "step": 248 }, { "epoch": 1.09, "eval_anthropic_toxic_prompts_accuracy": 0.10884375, "eval_anthropic_toxic_prompts_bleu_score": 3.65233656559296, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12890462494559987, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.698917031288147, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004542096712519974, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.9917824268341064, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.254, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.144, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.786, "eval_anthropic_toxic_prompts_num_pred_words": 43.646, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 19.921158873287087, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.75, "eval_anthropic_toxic_prompts_rouge_score": 0.2387839420169841, "eval_anthropic_toxic_prompts_runtime": 53.6519, "eval_anthropic_toxic_prompts_samples_per_second": 9.319, "eval_anthropic_toxic_prompts_steps_per_second": 0.019, "eval_anthropic_toxic_prompts_token_set_f1": 0.34714249758448196, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006222371501576447, "eval_anthropic_toxic_prompts_token_set_precision": 0.45275869291025683, "eval_anthropic_toxic_prompts_token_set_recall": 0.31389262373602134, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 248 }, { "epoch": 1.09, "eval_arxiv_accuracy": 0.423125, "eval_arxiv_bleu_score": 4.093781013555646, "eval_arxiv_bleu_score_sem": 0.12570296310978762, "eval_arxiv_emb_cos_sim": 0.7403872609138489, "eval_arxiv_emb_cos_sim_sem": 0.00561281955332095, "eval_arxiv_emb_top1_equal": 0.23999999463558197, "eval_arxiv_emb_top1_equal_sem": 0.019118866773455794, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.958312749862671, "eval_arxiv_n_ngrams_match_1": 13.988, "eval_arxiv_n_ngrams_match_2": 2.676, "eval_arxiv_n_ngrams_match_3": 0.594, "eval_arxiv_num_pred_words": 36.748, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.265438703424365, "eval_arxiv_pred_num_tokens": 62.96875, "eval_arxiv_rouge_score": 0.3475298145049057, "eval_arxiv_runtime": 7.4776, "eval_arxiv_samples_per_second": 66.866, "eval_arxiv_steps_per_second": 0.134, "eval_arxiv_token_set_f1": 0.3533324229050544, "eval_arxiv_token_set_f1_sem": 0.004971204765366318, "eval_arxiv_token_set_precision": 0.2929452599792598, "eval_arxiv_token_set_recall": 0.4866153026712255, "eval_arxiv_true_num_tokens": 64.0, "step": 248 }, { "epoch": 1.09, "eval_python_code_alpaca_accuracy": 0.15734375, "eval_python_code_alpaca_bleu_score": 5.042946138157769, "eval_python_code_alpaca_bleu_score_sem": 0.160790946633052, "eval_python_code_alpaca_emb_cos_sim": 0.7753879427909851, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004412812077351404, "eval_python_code_alpaca_emb_top1_equal": 0.17399999499320984, "eval_python_code_alpaca_emb_top1_equal_sem": 0.016971269551723376, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.589085102081299, "eval_python_code_alpaca_n_ngrams_match_1": 9.824, "eval_python_code_alpaca_n_ngrams_match_2": 2.964, "eval_python_code_alpaca_n_ngrams_match_3": 0.972, "eval_python_code_alpaca_num_pred_words": 39.166, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.31758179996233, "eval_python_code_alpaca_pred_num_tokens": 62.5546875, "eval_python_code_alpaca_rouge_score": 0.3776958727600159, "eval_python_code_alpaca_runtime": 7.7405, "eval_python_code_alpaca_samples_per_second": 64.595, "eval_python_code_alpaca_steps_per_second": 0.129, "eval_python_code_alpaca_token_set_f1": 0.47429047711353645, "eval_python_code_alpaca_token_set_f1_sem": 0.005526526442278936, "eval_python_code_alpaca_token_set_precision": 0.546630656850991, "eval_python_code_alpaca_token_set_recall": 0.44059115919441477, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 248 }, { "epoch": 1.09, "eval_wikibio_accuracy": 0.36778125, "eval_wikibio_bleu_score": 4.660533381235605, "eval_wikibio_bleu_score_sem": 0.2076411844668876, "eval_wikibio_emb_cos_sim": 0.6815629005432129, "eval_wikibio_emb_cos_sim_sem": 0.007799395854818424, "eval_wikibio_emb_top1_equal": 0.15399999916553497, "eval_wikibio_emb_top1_equal_sem": 0.016158283980625493, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.345656156539917, "eval_wikibio_n_ngrams_match_1": 7.7, "eval_wikibio_n_ngrams_match_2": 2.462, "eval_wikibio_n_ngrams_match_3": 0.938, "eval_wikibio_num_pred_words": 30.724, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.37919075038963, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2744365265889154, "eval_wikibio_runtime": 7.0186, "eval_wikibio_samples_per_second": 71.24, "eval_wikibio_steps_per_second": 0.142, "eval_wikibio_token_set_f1": 0.25223998792231406, "eval_wikibio_token_set_f1_sem": 0.007338838842555383, "eval_wikibio_token_set_precision": 0.24746688907945488, "eval_wikibio_token_set_recall": 0.2920593541010084, "eval_wikibio_true_num_tokens": 61.1328125, "step": 248 }, { "epoch": 1.09, "eval_bias-bios_accuracy": 0.49, "eval_bias-bios_bleu_score": 15.187181809196383, "eval_bias-bios_bleu_score_sem": 0.6108940925547736, "eval_bias-bios_emb_cos_sim": 0.8737805485725403, "eval_bias-bios_emb_cos_sim_sem": 0.002709689119852354, "eval_bias-bios_emb_top1_equal": 0.30799999833106995, "eval_bias-bios_emb_top1_equal_sem": 0.020667033028164562, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.9598242044448853, "eval_bias-bios_n_ngrams_match_1": 21.678, "eval_bias-bios_n_ngrams_match_2": 9.286, "eval_bias-bios_n_ngrams_match_3": 4.858, "eval_bias-bios_num_pred_words": 46.952, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 7.0980791447069205, "eval_bias-bios_pred_num_tokens": 62.9296875, "eval_bias-bios_rouge_score": 0.5001935759082592, "eval_bias-bios_runtime": 7.964, "eval_bias-bios_samples_per_second": 62.783, "eval_bias-bios_steps_per_second": 0.126, "eval_bias-bios_token_set_f1": 0.5352723142499153, "eval_bias-bios_token_set_f1_sem": 0.006176020693455797, "eval_bias-bios_token_set_precision": 0.5163630183545604, "eval_bias-bios_token_set_recall": 0.569456990893491, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 248 }, { "epoch": 1.11, "learning_rate": 0.001, "loss": 2.2284, "step": 252 }, { "epoch": 1.16, "learning_rate": 0.001, "loss": 2.1384, "step": 264 }, { "epoch": 1.22, "learning_rate": 0.001, "loss": 1.9197, "step": 276 }, { "epoch": 1.23, "eval_ag_news_accuracy": 0.3075625, "eval_ag_news_bleu_score": 4.12785289110093, "eval_ag_news_bleu_score_sem": 0.15900310936221984, "eval_ag_news_emb_cos_sim": 0.7948484420776367, "eval_ag_news_emb_cos_sim_sem": 0.004320750557453928, "eval_ag_news_emb_top1_equal": 0.25200000405311584, "eval_ag_news_emb_top1_equal_sem": 0.019435728067390842, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.555957794189453, "eval_ag_news_n_ngrams_match_1": 10.78, "eval_ag_news_n_ngrams_match_2": 2.354, "eval_ag_news_n_ngrams_match_3": 0.628, "eval_ag_news_num_pred_words": 25.994, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.02134715186829, "eval_ag_news_pred_num_tokens": 38.1875, "eval_ag_news_rouge_score": 0.35369982193370303, "eval_ag_news_runtime": 6.9396, "eval_ag_news_samples_per_second": 72.05, "eval_ag_news_steps_per_second": 0.144, "eval_ag_news_token_set_f1": 0.34152207043374166, "eval_ag_news_token_set_f1_sem": 0.004979529346488383, "eval_ag_news_token_set_precision": 0.2837138845906317, "eval_ag_news_token_set_recall": 0.4584907965605788, "eval_ag_news_true_num_tokens": 56.09375, "step": 279 }, { "epoch": 1.23, "eval_anthropic_toxic_prompts_accuracy": 0.11478125, "eval_anthropic_toxic_prompts_bleu_score": 6.3220699630707715, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.25242026056820677, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7130110263824463, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004697909073150958, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17599999904632568, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017047853594066943, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.773521900177002, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.608, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.842, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, "eval_anthropic_toxic_prompts_num_pred_words": 23.556, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 16.014937815731553, "eval_anthropic_toxic_prompts_pred_num_tokens": 32.7109375, "eval_anthropic_toxic_prompts_rouge_score": 0.3300720544583728, "eval_anthropic_toxic_prompts_runtime": 6.6988, "eval_anthropic_toxic_prompts_samples_per_second": 74.64, "eval_anthropic_toxic_prompts_steps_per_second": 0.149, "eval_anthropic_toxic_prompts_token_set_f1": 0.3603427310230735, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064971793770156085, "eval_anthropic_toxic_prompts_token_set_precision": 0.4225042332482681, "eval_anthropic_toxic_prompts_token_set_recall": 0.34628301551963964, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 279 }, { "epoch": 1.23, "eval_arxiv_accuracy": 0.40384375, "eval_arxiv_bleu_score": 3.082662931867365, "eval_arxiv_bleu_score_sem": 0.09880748146822564, "eval_arxiv_emb_cos_sim": 0.7372804284095764, "eval_arxiv_emb_cos_sim_sem": 0.005092747519021461, "eval_arxiv_emb_top1_equal": 0.11599999666213989, "eval_arxiv_emb_top1_equal_sem": 0.014335236978191066, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.1328988075256348, "eval_arxiv_n_ngrams_match_1": 11.846, "eval_arxiv_n_ngrams_match_2": 2.19, "eval_arxiv_n_ngrams_match_3": 0.468, "eval_arxiv_num_pred_words": 25.296, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.940383005409256, "eval_arxiv_pred_num_tokens": 41.46875, "eval_arxiv_rouge_score": 0.34774279797636015, "eval_arxiv_runtime": 7.0058, "eval_arxiv_samples_per_second": 71.369, "eval_arxiv_steps_per_second": 0.143, "eval_arxiv_token_set_f1": 0.35113133933307145, "eval_arxiv_token_set_f1_sem": 0.004619234430328476, "eval_arxiv_token_set_precision": 0.2752955816735799, "eval_arxiv_token_set_recall": 0.5065531056992818, "eval_arxiv_true_num_tokens": 64.0, "step": 279 }, { "epoch": 1.23, "eval_python_code_alpaca_accuracy": 0.1686875, "eval_python_code_alpaca_bleu_score": 8.305728890617798, "eval_python_code_alpaca_bleu_score_sem": 0.2805659938677739, "eval_python_code_alpaca_emb_cos_sim": 0.8004173040390015, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004346704512354936, "eval_python_code_alpaca_emb_top1_equal": 0.2199999988079071, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01854420989980125, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.3644516468048096, "eval_python_code_alpaca_n_ngrams_match_1": 9.022, "eval_python_code_alpaca_n_ngrams_match_2": 2.61, "eval_python_code_alpaca_n_ngrams_match_3": 0.888, "eval_python_code_alpaca_num_pred_words": 22.57, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 10.638203724806774, "eval_python_code_alpaca_pred_num_tokens": 35.8203125, "eval_python_code_alpaca_rouge_score": 0.47919721432489615, "eval_python_code_alpaca_runtime": 6.8787, "eval_python_code_alpaca_samples_per_second": 72.689, "eval_python_code_alpaca_steps_per_second": 0.145, "eval_python_code_alpaca_token_set_f1": 0.5057425160250362, "eval_python_code_alpaca_token_set_f1_sem": 0.005917123864345868, "eval_python_code_alpaca_token_set_precision": 0.5222648491118239, "eval_python_code_alpaca_token_set_recall": 0.5106662898011187, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 279 }, { "epoch": 1.23, "eval_wikibio_accuracy": 0.35221875, "eval_wikibio_bleu_score": 6.087351841555892, "eval_wikibio_bleu_score_sem": 0.24588335047545587, "eval_wikibio_emb_cos_sim": 0.7169853448867798, "eval_wikibio_emb_cos_sim_sem": 0.0066086913254658555, "eval_wikibio_emb_top1_equal": 0.15600000321865082, "eval_wikibio_emb_top1_equal_sem": 0.01624363651663569, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.538841485977173, "eval_wikibio_n_ngrams_match_1": 8.42, "eval_wikibio_n_ngrams_match_2": 2.744, "eval_wikibio_n_ngrams_match_3": 1.036, "eval_wikibio_num_pred_words": 26.898, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 34.427011902691085, "eval_wikibio_pred_num_tokens": 47.8671875, "eval_wikibio_rouge_score": 0.3474562861357006, "eval_wikibio_runtime": 7.8621, "eval_wikibio_samples_per_second": 63.597, "eval_wikibio_steps_per_second": 0.127, "eval_wikibio_token_set_f1": 0.29907487029588065, "eval_wikibio_token_set_f1_sem": 0.0062630922566897064, "eval_wikibio_token_set_precision": 0.2846087659111068, "eval_wikibio_token_set_recall": 0.33428339505042515, "eval_wikibio_true_num_tokens": 61.1328125, "step": 279 }, { "epoch": 1.23, "eval_bias-bios_accuracy": 0.48678125, "eval_bias-bios_bleu_score": 15.07724608098222, "eval_bias-bios_bleu_score_sem": 0.731810536667803, "eval_bias-bios_emb_cos_sim": 0.8541937470436096, "eval_bias-bios_emb_cos_sim_sem": 0.003292646007841742, "eval_bias-bios_emb_top1_equal": 0.2540000081062317, "eval_bias-bios_emb_top1_equal_sem": 0.019486597059300604, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.9884752035140991, "eval_bias-bios_n_ngrams_match_1": 18.086, "eval_bias-bios_n_ngrams_match_2": 7.742, "eval_bias-bios_n_ngrams_match_3": 4.148, "eval_bias-bios_num_pred_words": 30.046, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 7.304387562139313, "eval_bias-bios_pred_num_tokens": 39.796875, "eval_bias-bios_rouge_score": 0.5196057757221662, "eval_bias-bios_runtime": 7.1371, "eval_bias-bios_samples_per_second": 70.056, "eval_bias-bios_steps_per_second": 0.14, "eval_bias-bios_token_set_f1": 0.5309677565984916, "eval_bias-bios_token_set_f1_sem": 0.00656729720405154, "eval_bias-bios_token_set_precision": 0.4679258027534948, "eval_bias-bios_token_set_recall": 0.6306590632503222, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 279 }, { "epoch": 1.27, "learning_rate": 0.001, "loss": 2.2776, "step": 288 }, { "epoch": 1.32, "learning_rate": 0.001, "loss": 2.2042, "step": 300 }, { "epoch": 1.37, "eval_ag_news_accuracy": 0.31025, "eval_ag_news_bleu_score": 4.693319706150456, "eval_ag_news_bleu_score_sem": 0.15935382004813128, "eval_ag_news_emb_cos_sim": 0.8091204166412354, "eval_ag_news_emb_cos_sim_sem": 0.00475525047688145, "eval_ag_news_emb_top1_equal": 0.2680000066757202, "eval_ag_news_emb_top1_equal_sem": 0.019827715320059287, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.466686487197876, "eval_ag_news_n_ngrams_match_1": 13.084, "eval_ag_news_n_ngrams_match_2": 2.798, "eval_ag_news_n_ngrams_match_3": 0.768, "eval_ag_news_num_pred_words": 39.898, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.03043316309415, "eval_ag_news_pred_num_tokens": 61.5390625, "eval_ag_news_rouge_score": 0.3608276579045019, "eval_ag_news_runtime": 8.1216, "eval_ag_news_samples_per_second": 61.564, "eval_ag_news_steps_per_second": 0.123, "eval_ag_news_token_set_f1": 0.3483842640025073, "eval_ag_news_token_set_f1_sem": 0.005037946090827277, "eval_ag_news_token_set_precision": 0.31860558977002634, "eval_ag_news_token_set_recall": 0.414745433715474, "eval_ag_news_true_num_tokens": 56.09375, "step": 310 }, { "epoch": 1.37, "eval_anthropic_toxic_prompts_accuracy": 0.11003125, "eval_anthropic_toxic_prompts_bleu_score": 3.6853009424635483, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1363010756669311, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6960632801055908, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004544997885736092, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345322399934358, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.927776575088501, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.02, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, "eval_anthropic_toxic_prompts_num_pred_words": 40.326, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 18.68603727333281, "eval_anthropic_toxic_prompts_pred_num_tokens": 59.15625, "eval_anthropic_toxic_prompts_rouge_score": 0.24376940297754532, "eval_anthropic_toxic_prompts_runtime": 7.3659, "eval_anthropic_toxic_prompts_samples_per_second": 67.881, "eval_anthropic_toxic_prompts_steps_per_second": 0.136, "eval_anthropic_toxic_prompts_token_set_f1": 0.33859596445362705, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006132379569659659, "eval_anthropic_toxic_prompts_token_set_precision": 0.4414920439613121, "eval_anthropic_toxic_prompts_token_set_recall": 0.30141584905289376, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 310 }, { "epoch": 1.37, "eval_arxiv_accuracy": 0.4235, "eval_arxiv_bleu_score": 4.218543068302892, "eval_arxiv_bleu_score_sem": 0.13150743222363528, "eval_arxiv_emb_cos_sim": 0.7446539998054504, "eval_arxiv_emb_cos_sim_sem": 0.005929241023132271, "eval_arxiv_emb_top1_equal": 0.25600001215934753, "eval_arxiv_emb_top1_equal_sem": 0.019536923601457774, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.970463275909424, "eval_arxiv_n_ngrams_match_1": 14.362, "eval_arxiv_n_ngrams_match_2": 2.72, "eval_arxiv_n_ngrams_match_3": 0.63, "eval_arxiv_num_pred_words": 36.08, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.50095182485385, "eval_arxiv_pred_num_tokens": 62.0390625, "eval_arxiv_rouge_score": 0.3580856168544577, "eval_arxiv_runtime": 7.3514, "eval_arxiv_samples_per_second": 68.014, "eval_arxiv_steps_per_second": 0.136, "eval_arxiv_token_set_f1": 0.3597396879590065, "eval_arxiv_token_set_f1_sem": 0.004979600128843339, "eval_arxiv_token_set_precision": 0.30250953382052703, "eval_arxiv_token_set_recall": 0.47400110874266405, "eval_arxiv_true_num_tokens": 64.0, "step": 310 }, { "epoch": 1.37, "eval_python_code_alpaca_accuracy": 0.15559375, "eval_python_code_alpaca_bleu_score": 5.160248775704173, "eval_python_code_alpaca_bleu_score_sem": 0.16995230777879264, "eval_python_code_alpaca_emb_cos_sim": 0.7683752775192261, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004745081210020772, "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346174301986407, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.561814069747925, "eval_python_code_alpaca_n_ngrams_match_1": 9.544, "eval_python_code_alpaca_n_ngrams_match_2": 2.728, "eval_python_code_alpaca_n_ngrams_match_3": 0.898, "eval_python_code_alpaca_num_pred_words": 36.88, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.959305088187559, "eval_python_code_alpaca_pred_num_tokens": 59.703125, "eval_python_code_alpaca_rouge_score": 0.37555207515750494, "eval_python_code_alpaca_runtime": 7.0304, "eval_python_code_alpaca_samples_per_second": 71.12, "eval_python_code_alpaca_steps_per_second": 0.142, "eval_python_code_alpaca_token_set_f1": 0.4665177670255288, "eval_python_code_alpaca_token_set_f1_sem": 0.0058097335617844715, "eval_python_code_alpaca_token_set_precision": 0.5313029679560264, "eval_python_code_alpaca_token_set_recall": 0.4370481057831364, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 310 }, { "epoch": 1.37, "eval_wikibio_accuracy": 0.36859375, "eval_wikibio_bleu_score": 4.81258814988866, "eval_wikibio_bleu_score_sem": 0.22299112466528667, "eval_wikibio_emb_cos_sim": 0.6779038906097412, "eval_wikibio_emb_cos_sim_sem": 0.008168129739345551, "eval_wikibio_emb_top1_equal": 0.12399999797344208, "eval_wikibio_emb_top1_equal_sem": 0.014754096152018748, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3508071899414062, "eval_wikibio_n_ngrams_match_1": 7.686, "eval_wikibio_n_ngrams_match_2": 2.39, "eval_wikibio_n_ngrams_match_3": 0.942, "eval_wikibio_num_pred_words": 30.312, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.525750051723897, "eval_wikibio_pred_num_tokens": 62.8125, "eval_wikibio_rouge_score": 0.2758010943062876, "eval_wikibio_runtime": 8.08, "eval_wikibio_samples_per_second": 61.881, "eval_wikibio_steps_per_second": 0.124, "eval_wikibio_token_set_f1": 0.252609745548634, "eval_wikibio_token_set_f1_sem": 0.0070662065239258855, "eval_wikibio_token_set_precision": 0.2461256238356199, "eval_wikibio_token_set_recall": 0.2928348544405085, "eval_wikibio_true_num_tokens": 61.1328125, "step": 310 }, { "epoch": 1.37, "eval_bias-bios_accuracy": 0.4968125, "eval_bias-bios_bleu_score": 16.103834432664616, "eval_bias-bios_bleu_score_sem": 0.6847879152877013, "eval_bias-bios_emb_cos_sim": 0.8709802627563477, "eval_bias-bios_emb_cos_sim_sem": 0.0028683356804806155, "eval_bias-bios_emb_top1_equal": 0.31200000643730164, "eval_bias-bios_emb_top1_equal_sem": 0.0207405942792578, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8962446451187134, "eval_bias-bios_n_ngrams_match_1": 21.612, "eval_bias-bios_n_ngrams_match_2": 9.308, "eval_bias-bios_n_ngrams_match_3": 4.906, "eval_bias-bios_num_pred_words": 45.066, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.660833621539512, "eval_bias-bios_pred_num_tokens": 60.7890625, "eval_bias-bios_rouge_score": 0.5095092930898859, "eval_bias-bios_runtime": 8.1234, "eval_bias-bios_samples_per_second": 61.55, "eval_bias-bios_steps_per_second": 0.123, "eval_bias-bios_token_set_f1": 0.5380991233090037, "eval_bias-bios_token_set_f1_sem": 0.006292962348520924, "eval_bias-bios_token_set_precision": 0.5172965980264513, "eval_bias-bios_token_set_recall": 0.5718470133513807, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 310 }, { "epoch": 1.37, "learning_rate": 0.001, "loss": 2.1261, "step": 312 }, { "epoch": 1.43, "learning_rate": 0.001, "loss": 1.9135, "step": 324 }, { "epoch": 1.48, "learning_rate": 0.001, "loss": 2.2064, "step": 336 }, { "epoch": 1.5, "eval_ag_news_accuracy": 0.30609375, "eval_ag_news_bleu_score": 4.719656086935481, "eval_ag_news_bleu_score_sem": 0.15740510117641943, "eval_ag_news_emb_cos_sim": 0.804071307182312, "eval_ag_news_emb_cos_sim_sem": 0.005197311039635828, "eval_ag_news_emb_top1_equal": 0.25600001215934753, "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.493675470352173, "eval_ag_news_n_ngrams_match_1": 13.14, "eval_ag_news_n_ngrams_match_2": 2.93, "eval_ag_news_n_ngrams_match_3": 0.822, "eval_ag_news_num_pred_words": 42.324, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.90667321059251, "eval_ag_news_pred_num_tokens": 62.5078125, "eval_ag_news_rouge_score": 0.3529883145442065, "eval_ag_news_runtime": 19.2463, "eval_ag_news_samples_per_second": 25.979, "eval_ag_news_steps_per_second": 0.052, "eval_ag_news_token_set_f1": 0.34624435301840834, "eval_ag_news_token_set_f1_sem": 0.005172152165157513, "eval_ag_news_token_set_precision": 0.31946456007584323, "eval_ag_news_token_set_recall": 0.4099159459965073, "eval_ag_news_true_num_tokens": 56.09375, "step": 341 }, { "epoch": 1.5, "eval_anthropic_toxic_prompts_accuracy": 0.10834375, "eval_anthropic_toxic_prompts_bleu_score": 3.655266926251835, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1304988346023646, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6967235207557678, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004537156687930315, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055009156667442, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.007629871368408, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.136, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.8, "eval_anthropic_toxic_prompts_num_pred_words": 43.484, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.23937311515898, "eval_anthropic_toxic_prompts_pred_num_tokens": 61.6015625, "eval_anthropic_toxic_prompts_rouge_score": 0.2394587760880788, "eval_anthropic_toxic_prompts_runtime": 8.2771, "eval_anthropic_toxic_prompts_samples_per_second": 60.408, "eval_anthropic_toxic_prompts_steps_per_second": 0.121, "eval_anthropic_toxic_prompts_token_set_f1": 0.3446042686958433, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005849076849865448, "eval_anthropic_toxic_prompts_token_set_precision": 0.4542920684406606, "eval_anthropic_toxic_prompts_token_set_recall": 0.3043220484487612, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 341 }, { "epoch": 1.5, "eval_arxiv_accuracy": 0.42690625, "eval_arxiv_bleu_score": 4.013162696446355, "eval_arxiv_bleu_score_sem": 0.12361634310105885, "eval_arxiv_emb_cos_sim": 0.7171130180358887, "eval_arxiv_emb_cos_sim_sem": 0.006713028936416672, "eval_arxiv_emb_top1_equal": 0.20000000298023224, "eval_arxiv_emb_top1_equal_sem": 0.017906459589198134, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.9682939052581787, "eval_arxiv_n_ngrams_match_1": 13.592, "eval_arxiv_n_ngrams_match_2": 2.658, "eval_arxiv_n_ngrams_match_3": 0.58, "eval_arxiv_num_pred_words": 37.424, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.4586928865171, "eval_arxiv_pred_num_tokens": 62.859375, "eval_arxiv_rouge_score": 0.3330393479935816, "eval_arxiv_runtime": 11.536, "eval_arxiv_samples_per_second": 43.343, "eval_arxiv_steps_per_second": 0.087, "eval_arxiv_token_set_f1": 0.3419093272156702, "eval_arxiv_token_set_f1_sem": 0.00535918406194554, "eval_arxiv_token_set_precision": 0.28198100534330306, "eval_arxiv_token_set_recall": 0.4941405864834983, "eval_arxiv_true_num_tokens": 64.0, "step": 341 }, { "epoch": 1.5, "eval_python_code_alpaca_accuracy": 0.15521875, "eval_python_code_alpaca_bleu_score": 5.032228005406816, "eval_python_code_alpaca_bleu_score_sem": 0.16508121574472945, "eval_python_code_alpaca_emb_cos_sim": 0.7669004201889038, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004512636826232725, "eval_python_code_alpaca_emb_top1_equal": 0.18799999356269836, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017490679184236527, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.653953790664673, "eval_python_code_alpaca_n_ngrams_match_1": 9.894, "eval_python_code_alpaca_n_ngrams_match_2": 2.998, "eval_python_code_alpaca_n_ngrams_match_3": 0.95, "eval_python_code_alpaca_num_pred_words": 39.462, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.210111528413647, "eval_python_code_alpaca_pred_num_tokens": 61.59375, "eval_python_code_alpaca_rouge_score": 0.37453041601046916, "eval_python_code_alpaca_runtime": 11.8851, "eval_python_code_alpaca_samples_per_second": 42.07, "eval_python_code_alpaca_steps_per_second": 0.084, "eval_python_code_alpaca_token_set_f1": 0.4676837883490022, "eval_python_code_alpaca_token_set_f1_sem": 0.005546046178802121, "eval_python_code_alpaca_token_set_precision": 0.5459422436754255, "eval_python_code_alpaca_token_set_recall": 0.4287897598195947, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 341 }, { "epoch": 1.5, "eval_wikibio_accuracy": 0.37015625, "eval_wikibio_bleu_score": 4.113717211494571, "eval_wikibio_bleu_score_sem": 0.2014863036543364, "eval_wikibio_emb_cos_sim": 0.6409357190132141, "eval_wikibio_emb_cos_sim_sem": 0.008766130611566907, "eval_wikibio_emb_top1_equal": 0.16200000047683716, "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.347160577774048, "eval_wikibio_n_ngrams_match_1": 6.912, "eval_wikibio_n_ngrams_match_2": 2.12, "eval_wikibio_n_ngrams_match_3": 0.782, "eval_wikibio_num_pred_words": 28.338, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.421917138746423, "eval_wikibio_pred_num_tokens": 62.96875, "eval_wikibio_rouge_score": 0.24506871939674832, "eval_wikibio_runtime": 8.7393, "eval_wikibio_samples_per_second": 57.213, "eval_wikibio_steps_per_second": 0.114, "eval_wikibio_token_set_f1": 0.225897167864661, "eval_wikibio_token_set_f1_sem": 0.0075924411709069875, "eval_wikibio_token_set_precision": 0.2193463805245025, "eval_wikibio_token_set_recall": 0.2687629784354538, "eval_wikibio_true_num_tokens": 61.1328125, "step": 341 }, { "epoch": 1.5, "eval_bias-bios_accuracy": 0.49153125, "eval_bias-bios_bleu_score": 15.395719862226972, "eval_bias-bios_bleu_score_sem": 0.6157561000113725, "eval_bias-bios_emb_cos_sim": 0.8640764951705933, "eval_bias-bios_emb_cos_sim_sem": 0.0031932264302743985, "eval_bias-bios_emb_top1_equal": 0.2980000078678131, "eval_bias-bios_emb_top1_equal_sem": 0.020475119103777986, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.94288969039917, "eval_bias-bios_n_ngrams_match_1": 21.756, "eval_bias-bios_n_ngrams_match_2": 9.39, "eval_bias-bios_n_ngrams_match_3": 4.956, "eval_bias-bios_num_pred_words": 47.264, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.978888688379133, "eval_bias-bios_pred_num_tokens": 62.4921875, "eval_bias-bios_rouge_score": 0.49805101735783913, "eval_bias-bios_runtime": 8.6552, "eval_bias-bios_samples_per_second": 57.769, "eval_bias-bios_steps_per_second": 0.116, "eval_bias-bios_token_set_f1": 0.5376376099546539, "eval_bias-bios_token_set_f1_sem": 0.006270653343759874, "eval_bias-bios_token_set_precision": 0.5173410887209161, "eval_bias-bios_token_set_recall": 0.5767708356294744, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 341 }, { "epoch": 1.53, "learning_rate": 0.001, "loss": 2.2102, "step": 348 }, { "epoch": 1.59, "learning_rate": 0.001, "loss": 2.1441, "step": 360 }, { "epoch": 1.64, "learning_rate": 0.001, "loss": 1.9089, "step": 372 }, { "epoch": 1.64, "eval_ag_news_accuracy": 0.30925, "eval_ag_news_bleu_score": 4.688042448833678, "eval_ag_news_bleu_score_sem": 0.16248013966675176, "eval_ag_news_emb_cos_sim": 0.7975764870643616, "eval_ag_news_emb_cos_sim_sem": 0.005275002305890863, "eval_ag_news_emb_top1_equal": 0.257999986410141, "eval_ag_news_emb_top1_equal_sem": 0.019586711692263472, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.494887113571167, "eval_ag_news_n_ngrams_match_1": 11.944, "eval_ag_news_n_ngrams_match_2": 2.586, "eval_ag_news_n_ngrams_match_3": 0.704, "eval_ag_news_num_pred_words": 32.084, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.94656852260913, "eval_ag_news_pred_num_tokens": 48.0078125, "eval_ag_news_rouge_score": 0.36139502901690423, "eval_ag_news_runtime": 7.5346, "eval_ag_news_samples_per_second": 66.361, "eval_ag_news_steps_per_second": 0.133, "eval_ag_news_token_set_f1": 0.34298495738014895, "eval_ag_news_token_set_f1_sem": 0.004847096265450112, "eval_ag_news_token_set_precision": 0.29882186799241595, "eval_ag_news_token_set_recall": 0.42795943849577756, "eval_ag_news_true_num_tokens": 56.09375, "step": 372 }, { "epoch": 1.64, "eval_anthropic_toxic_prompts_accuracy": 0.111375, "eval_anthropic_toxic_prompts_bleu_score": 5.018590065002315, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18062605094910428, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7002917528152466, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005111374737061413, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1420000046491623, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015625630310786714, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.8211557865142822, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.738, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.876, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.718, "eval_anthropic_toxic_prompts_num_pred_words": 29.236, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 16.796252339831998, "eval_anthropic_toxic_prompts_pred_num_tokens": 43.2734375, "eval_anthropic_toxic_prompts_rouge_score": 0.2933406224694952, "eval_anthropic_toxic_prompts_runtime": 8.6745, "eval_anthropic_toxic_prompts_samples_per_second": 57.64, "eval_anthropic_toxic_prompts_steps_per_second": 0.115, "eval_anthropic_toxic_prompts_token_set_f1": 0.35145152234689003, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065480762736281565, "eval_anthropic_toxic_prompts_token_set_precision": 0.4267148235801868, "eval_anthropic_toxic_prompts_token_set_recall": 0.32579150192546075, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 372 }, { "epoch": 1.64, "eval_arxiv_accuracy": 0.415125, "eval_arxiv_bleu_score": 3.8688483882338414, "eval_arxiv_bleu_score_sem": 0.12425377464639385, "eval_arxiv_emb_cos_sim": 0.7446158528327942, "eval_arxiv_emb_cos_sim_sem": 0.00546308976040103, "eval_arxiv_emb_top1_equal": 0.1940000057220459, "eval_arxiv_emb_top1_equal_sem": 0.017701828083634023, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0469841957092285, "eval_arxiv_n_ngrams_match_1": 13.512, "eval_arxiv_n_ngrams_match_2": 2.482, "eval_arxiv_n_ngrams_match_3": 0.548, "eval_arxiv_num_pred_words": 31.494, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.051760602600126, "eval_arxiv_pred_num_tokens": 53.2578125, "eval_arxiv_rouge_score": 0.359901260786132, "eval_arxiv_runtime": 7.5017, "eval_arxiv_samples_per_second": 66.652, "eval_arxiv_steps_per_second": 0.133, "eval_arxiv_token_set_f1": 0.35944233136816656, "eval_arxiv_token_set_f1_sem": 0.004761192847849154, "eval_arxiv_token_set_precision": 0.2945148751601931, "eval_arxiv_token_set_recall": 0.4859506595277603, "eval_arxiv_true_num_tokens": 64.0, "step": 372 }, { "epoch": 1.64, "eval_python_code_alpaca_accuracy": 0.16, "eval_python_code_alpaca_bleu_score": 6.502313122815586, "eval_python_code_alpaca_bleu_score_sem": 0.2160554941968861, "eval_python_code_alpaca_emb_cos_sim": 0.7811670303344727, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004759169576383917, "eval_python_code_alpaca_emb_top1_equal": 0.20399999618530273, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018039369108186407, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.4665610790252686, "eval_python_code_alpaca_n_ngrams_match_1": 9.412, "eval_python_code_alpaca_n_ngrams_match_2": 2.678, "eval_python_code_alpaca_n_ngrams_match_3": 0.86, "eval_python_code_alpaca_num_pred_words": 28.818, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.78186021830665, "eval_python_code_alpaca_pred_num_tokens": 46.53125, "eval_python_code_alpaca_rouge_score": 0.43334761445749104, "eval_python_code_alpaca_runtime": 26.2085, "eval_python_code_alpaca_samples_per_second": 19.078, "eval_python_code_alpaca_steps_per_second": 0.038, "eval_python_code_alpaca_token_set_f1": 0.48901485926774935, "eval_python_code_alpaca_token_set_f1_sem": 0.00599859770187912, "eval_python_code_alpaca_token_set_precision": 0.5291827441521135, "eval_python_code_alpaca_token_set_recall": 0.4760439727772603, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 372 }, { "epoch": 1.64, "eval_wikibio_accuracy": 0.357375, "eval_wikibio_bleu_score": 5.612141235113202, "eval_wikibio_bleu_score_sem": 0.23968984385259356, "eval_wikibio_emb_cos_sim": 0.705348789691925, "eval_wikibio_emb_cos_sim_sem": 0.0076665762998554595, "eval_wikibio_emb_top1_equal": 0.17599999904632568, "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4384877681732178, "eval_wikibio_n_ngrams_match_1": 8.348, "eval_wikibio_n_ngrams_match_2": 2.648, "eval_wikibio_n_ngrams_match_3": 0.996, "eval_wikibio_num_pred_words": 28.77, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 31.139831899489486, "eval_wikibio_pred_num_tokens": 55.4453125, "eval_wikibio_rouge_score": 0.3224364790302755, "eval_wikibio_runtime": 171.104, "eval_wikibio_samples_per_second": 2.922, "eval_wikibio_steps_per_second": 0.006, "eval_wikibio_token_set_f1": 0.28131254068170125, "eval_wikibio_token_set_f1_sem": 0.006813958761638849, "eval_wikibio_token_set_precision": 0.2743595630282444, "eval_wikibio_token_set_recall": 0.31132292902546804, "eval_wikibio_true_num_tokens": 61.1328125, "step": 372 }, { "epoch": 1.64, "eval_bias-bios_accuracy": 0.500125, "eval_bias-bios_bleu_score": 16.492712649226394, "eval_bias-bios_bleu_score_sem": 0.7277856036871401, "eval_bias-bios_emb_cos_sim": 0.8637259006500244, "eval_bias-bios_emb_cos_sim_sem": 0.0030929461968649467, "eval_bias-bios_emb_top1_equal": 0.2759999930858612, "eval_bias-bios_emb_top1_equal_sem": 0.02001121794127971, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.886657953262329, "eval_bias-bios_n_ngrams_match_1": 20.186, "eval_bias-bios_n_ngrams_match_2": 8.578, "eval_bias-bios_n_ngrams_match_3": 4.562, "eval_bias-bios_num_pred_words": 36.648, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.597283367169158, "eval_bias-bios_pred_num_tokens": 48.828125, "eval_bias-bios_rouge_score": 0.5275492861718887, "eval_bias-bios_runtime": 38.8094, "eval_bias-bios_samples_per_second": 12.883, "eval_bias-bios_steps_per_second": 0.026, "eval_bias-bios_token_set_f1": 0.5402486356387385, "eval_bias-bios_token_set_f1_sem": 0.0064675749666265585, "eval_bias-bios_token_set_precision": 0.4971851498629837, "eval_bias-bios_token_set_recall": 0.6049556338933638, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 372 }, { "epoch": 1.69, "learning_rate": 0.001, "loss": 2.135, "step": 384 }, { "epoch": 1.74, "learning_rate": 0.001, "loss": 2.1987, "step": 396 }, { "epoch": 1.78, "eval_ag_news_accuracy": 0.30578125, "eval_ag_news_bleu_score": 4.625886016349093, "eval_ag_news_bleu_score_sem": 0.14406997539717725, "eval_ag_news_emb_cos_sim": 0.8163206577301025, "eval_ag_news_emb_cos_sim_sem": 0.004300577290979063, "eval_ag_news_emb_top1_equal": 0.25600001215934753, "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4820914268493652, "eval_ag_news_n_ngrams_match_1": 13.566, "eval_ag_news_n_ngrams_match_2": 2.89, "eval_ag_news_n_ngrams_match_3": 0.756, "eval_ag_news_num_pred_words": 42.24, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.52768025014022, "eval_ag_news_pred_num_tokens": 62.8984375, "eval_ag_news_rouge_score": 0.36302604683503525, "eval_ag_news_runtime": 18.2763, "eval_ag_news_samples_per_second": 27.358, "eval_ag_news_steps_per_second": 0.055, "eval_ag_news_token_set_f1": 0.3577522859271455, "eval_ag_news_token_set_f1_sem": 0.004878435934676312, "eval_ag_news_token_set_precision": 0.3290160449978668, "eval_ag_news_token_set_recall": 0.4168787106703573, "eval_ag_news_true_num_tokens": 56.09375, "step": 403 }, { "epoch": 1.78, "eval_anthropic_toxic_prompts_accuracy": 0.1088125, "eval_anthropic_toxic_prompts_bleu_score": 3.650061382857713, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1321186844449668, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6943516731262207, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0044313876493768285, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12800000607967377, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014955914115991394, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.054664134979248, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.188, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.1, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.83, "eval_anthropic_toxic_prompts_num_pred_words": 43.168, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 21.214059269791896, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.828125, "eval_anthropic_toxic_prompts_rouge_score": 0.23684138554996986, "eval_anthropic_toxic_prompts_runtime": 8.2401, "eval_anthropic_toxic_prompts_samples_per_second": 60.679, "eval_anthropic_toxic_prompts_steps_per_second": 0.121, "eval_anthropic_toxic_prompts_token_set_f1": 0.34346371837251183, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006149349002418812, "eval_anthropic_toxic_prompts_token_set_precision": 0.45335623185190155, "eval_anthropic_toxic_prompts_token_set_recall": 0.30478903824776704, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 403 }, { "epoch": 1.78, "eval_arxiv_accuracy": 0.42478125, "eval_arxiv_bleu_score": 4.28136821937714, "eval_arxiv_bleu_score_sem": 0.13025702484443386, "eval_arxiv_emb_cos_sim": 0.7463322877883911, "eval_arxiv_emb_cos_sim_sem": 0.005858995112971661, "eval_arxiv_emb_top1_equal": 0.23399999737739563, "eval_arxiv_emb_top1_equal_sem": 0.01895274120352364, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.958116292953491, "eval_arxiv_n_ngrams_match_1": 14.482, "eval_arxiv_n_ngrams_match_2": 2.812, "eval_arxiv_n_ngrams_match_3": 0.646, "eval_arxiv_num_pred_words": 37.338, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.261654246636216, "eval_arxiv_pred_num_tokens": 62.984375, "eval_arxiv_rouge_score": 0.3559682047719085, "eval_arxiv_runtime": 8.7138, "eval_arxiv_samples_per_second": 57.38, "eval_arxiv_steps_per_second": 0.115, "eval_arxiv_token_set_f1": 0.3626915967922936, "eval_arxiv_token_set_f1_sem": 0.0051081697883503824, "eval_arxiv_token_set_precision": 0.3045233229125568, "eval_arxiv_token_set_recall": 0.48386160444507925, "eval_arxiv_true_num_tokens": 64.0, "step": 403 }, { "epoch": 1.78, "eval_python_code_alpaca_accuracy": 0.15565625, "eval_python_code_alpaca_bleu_score": 5.16488546776206, "eval_python_code_alpaca_bleu_score_sem": 0.17072322807655255, "eval_python_code_alpaca_emb_cos_sim": 0.774861752986908, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004186832108581838, "eval_python_code_alpaca_emb_top1_equal": 0.1860000044107437, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017418806591218323, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.666151762008667, "eval_python_code_alpaca_n_ngrams_match_1": 9.89, "eval_python_code_alpaca_n_ngrams_match_2": 3.0, "eval_python_code_alpaca_n_ngrams_match_3": 1.044, "eval_python_code_alpaca_num_pred_words": 39.832, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.384507538028931, "eval_python_code_alpaca_pred_num_tokens": 62.6953125, "eval_python_code_alpaca_rouge_score": 0.3792787066426493, "eval_python_code_alpaca_runtime": 7.7257, "eval_python_code_alpaca_samples_per_second": 64.719, "eval_python_code_alpaca_steps_per_second": 0.129, "eval_python_code_alpaca_token_set_f1": 0.46963115529803334, "eval_python_code_alpaca_token_set_f1_sem": 0.005420621063290702, "eval_python_code_alpaca_token_set_precision": 0.5468159527558938, "eval_python_code_alpaca_token_set_recall": 0.4311605987905754, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 403 }, { "epoch": 1.78, "eval_wikibio_accuracy": 0.36740625, "eval_wikibio_bleu_score": 4.623217624073544, "eval_wikibio_bleu_score_sem": 0.2091302090800041, "eval_wikibio_emb_cos_sim": 0.6995702385902405, "eval_wikibio_emb_cos_sim_sem": 0.006969743615032104, "eval_wikibio_emb_top1_equal": 0.15199999511241913, "eval_wikibio_emb_top1_equal_sem": 0.01607198249074835, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3406922817230225, "eval_wikibio_n_ngrams_match_1": 7.92, "eval_wikibio_n_ngrams_match_2": 2.5, "eval_wikibio_n_ngrams_match_3": 0.946, "eval_wikibio_num_pred_words": 30.96, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.23866905469156, "eval_wikibio_pred_num_tokens": 62.984375, "eval_wikibio_rouge_score": 0.27730911242226863, "eval_wikibio_runtime": 169.5553, "eval_wikibio_samples_per_second": 2.949, "eval_wikibio_steps_per_second": 0.006, "eval_wikibio_token_set_f1": 0.2573279276017233, "eval_wikibio_token_set_f1_sem": 0.007053153351734212, "eval_wikibio_token_set_precision": 0.2515329644167605, "eval_wikibio_token_set_recall": 0.2959943478405537, "eval_wikibio_true_num_tokens": 61.1328125, "step": 403 }, { "epoch": 1.78, "eval_bias-bios_accuracy": 0.49528125, "eval_bias-bios_bleu_score": 15.562289011586145, "eval_bias-bios_bleu_score_sem": 0.6052003609209405, "eval_bias-bios_emb_cos_sim": 0.8724682331085205, "eval_bias-bios_emb_cos_sim_sem": 0.0028584383048964224, "eval_bias-bios_emb_top1_equal": 0.30000001192092896, "eval_bias-bios_emb_top1_equal_sem": 0.020514426052435274, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.929211139678955, "eval_bias-bios_n_ngrams_match_1": 21.876, "eval_bias-bios_n_ngrams_match_2": 9.522, "eval_bias-bios_n_ngrams_match_3": 5.054, "eval_bias-bios_num_pred_words": 46.974, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.884077523429902, "eval_bias-bios_pred_num_tokens": 62.8671875, "eval_bias-bios_rouge_score": 0.5015844383406751, "eval_bias-bios_runtime": 43.9646, "eval_bias-bios_samples_per_second": 11.373, "eval_bias-bios_steps_per_second": 0.023, "eval_bias-bios_token_set_f1": 0.5400765163098435, "eval_bias-bios_token_set_f1_sem": 0.006176592482609455, "eval_bias-bios_token_set_precision": 0.5211310372476144, "eval_bias-bios_token_set_recall": 0.5756587114679573, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 403 }, { "epoch": 1.8, "learning_rate": 0.001, "loss": 2.1609, "step": 408 }, { "epoch": 1.85, "learning_rate": 0.001, "loss": 1.9403, "step": 420 }, { "epoch": 1.9, "learning_rate": 0.001, "loss": 2.0764, "step": 432 }, { "epoch": 1.91, "eval_ag_news_accuracy": 0.3091875, "eval_ag_news_bleu_score": 4.55914803176902, "eval_ag_news_bleu_score_sem": 0.16542642739054694, "eval_ag_news_emb_cos_sim": 0.8069360852241516, "eval_ag_news_emb_cos_sim_sem": 0.004155177433277181, "eval_ag_news_emb_top1_equal": 0.28200000524520874, "eval_ag_news_emb_top1_equal_sem": 0.020143573015312013, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.504622220993042, "eval_ag_news_n_ngrams_match_1": 11.554, "eval_ag_news_n_ngrams_match_2": 2.572, "eval_ag_news_n_ngrams_match_3": 0.704, "eval_ag_news_num_pred_words": 30.168, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.268873196209576, "eval_ag_news_pred_num_tokens": 43.34375, "eval_ag_news_rouge_score": 0.3611896576013237, "eval_ag_news_runtime": 65.9447, "eval_ag_news_samples_per_second": 7.582, "eval_ag_news_steps_per_second": 0.015, "eval_ag_news_token_set_f1": 0.34635594938922654, "eval_ag_news_token_set_f1_sem": 0.00483248319105664, "eval_ag_news_token_set_precision": 0.29541295491834085, "eval_ag_news_token_set_recall": 0.4391054759993797, "eval_ag_news_true_num_tokens": 56.09375, "step": 434 }, { "epoch": 1.91, "eval_anthropic_toxic_prompts_accuracy": 0.11271875, "eval_anthropic_toxic_prompts_bleu_score": 5.6427665524664095, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1959700760747935, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7147530913352966, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00433352245024674, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.8229117393493652, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.834, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.922, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752, "eval_anthropic_toxic_prompts_num_pred_words": 27.112, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 16.82577167644338, "eval_anthropic_toxic_prompts_pred_num_tokens": 37.3125, "eval_anthropic_toxic_prompts_rouge_score": 0.31049158171250457, "eval_anthropic_toxic_prompts_runtime": 188.6449, "eval_anthropic_toxic_prompts_samples_per_second": 2.65, "eval_anthropic_toxic_prompts_steps_per_second": 0.005, "eval_anthropic_toxic_prompts_token_set_f1": 0.36958399302570577, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006043389481903698, "eval_anthropic_toxic_prompts_token_set_precision": 0.43776933009113417, "eval_anthropic_toxic_prompts_token_set_recall": 0.34707368376528736, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 434 }, { "epoch": 1.91, "eval_arxiv_accuracy": 0.40725, "eval_arxiv_bleu_score": 3.6418646525368423, "eval_arxiv_bleu_score_sem": 0.11629012868979755, "eval_arxiv_emb_cos_sim": 0.7531729340553284, "eval_arxiv_emb_cos_sim_sem": 0.00491539443873094, "eval_arxiv_emb_top1_equal": 0.15199999511241913, "eval_arxiv_emb_top1_equal_sem": 0.01607198249074835, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.079040288925171, "eval_arxiv_n_ngrams_match_1": 13.058, "eval_arxiv_n_ngrams_match_2": 2.494, "eval_arxiv_n_ngrams_match_3": 0.576, "eval_arxiv_num_pred_words": 28.4, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.73753063347976, "eval_arxiv_pred_num_tokens": 45.5703125, "eval_arxiv_rouge_score": 0.3647748498973116, "eval_arxiv_runtime": 56.9032, "eval_arxiv_samples_per_second": 8.787, "eval_arxiv_steps_per_second": 0.018, "eval_arxiv_token_set_f1": 0.3648733612451334, "eval_arxiv_token_set_f1_sem": 0.0045413721621260245, "eval_arxiv_token_set_precision": 0.2944474299834195, "eval_arxiv_token_set_recall": 0.4991810318663169, "eval_arxiv_true_num_tokens": 64.0, "step": 434 }, { "epoch": 1.91, "eval_python_code_alpaca_accuracy": 0.1664375, "eval_python_code_alpaca_bleu_score": 8.43929602287766, "eval_python_code_alpaca_bleu_score_sem": 0.26301217317333, "eval_python_code_alpaca_emb_cos_sim": 0.8045483231544495, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004009448707082787, "eval_python_code_alpaca_emb_top1_equal": 0.20200000703334808, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017973259543989376, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.4012229442596436, "eval_python_code_alpaca_n_ngrams_match_1": 9.258, "eval_python_code_alpaca_n_ngrams_match_2": 2.752, "eval_python_code_alpaca_n_ngrams_match_3": 0.984, "eval_python_code_alpaca_num_pred_words": 23.718, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.036665357371508, "eval_python_code_alpaca_pred_num_tokens": 36.3515625, "eval_python_code_alpaca_rouge_score": 0.4748925266615377, "eval_python_code_alpaca_runtime": 7.87, "eval_python_code_alpaca_samples_per_second": 63.532, "eval_python_code_alpaca_steps_per_second": 0.127, "eval_python_code_alpaca_token_set_f1": 0.5204629328352092, "eval_python_code_alpaca_token_set_f1_sem": 0.005860754538010466, "eval_python_code_alpaca_token_set_precision": 0.5424322102606044, "eval_python_code_alpaca_token_set_recall": 0.5191744318018764, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 434 }, { "epoch": 1.91, "eval_wikibio_accuracy": 0.35740625, "eval_wikibio_bleu_score": 5.9326789113657705, "eval_wikibio_bleu_score_sem": 0.22446540672582094, "eval_wikibio_emb_cos_sim": 0.7443342208862305, "eval_wikibio_emb_cos_sim_sem": 0.005850383889749271, "eval_wikibio_emb_top1_equal": 0.1720000058412552, "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4056010246276855, "eval_wikibio_n_ngrams_match_1": 9.216, "eval_wikibio_n_ngrams_match_2": 2.928, "eval_wikibio_n_ngrams_match_3": 1.076, "eval_wikibio_num_pred_words": 31.292, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 30.132400597485695, "eval_wikibio_pred_num_tokens": 53.8203125, "eval_wikibio_rouge_score": 0.357777507241817, "eval_wikibio_runtime": 8.583, "eval_wikibio_samples_per_second": 58.254, "eval_wikibio_steps_per_second": 0.117, "eval_wikibio_token_set_f1": 0.30733041663159916, "eval_wikibio_token_set_f1_sem": 0.0060036693992158545, "eval_wikibio_token_set_precision": 0.3026905663203406, "eval_wikibio_token_set_recall": 0.3309501986443075, "eval_wikibio_true_num_tokens": 61.1328125, "step": 434 }, { "epoch": 1.91, "eval_bias-bios_accuracy": 0.48434375, "eval_bias-bios_bleu_score": 16.377499841333293, "eval_bias-bios_bleu_score_sem": 0.7691216166778587, "eval_bias-bios_emb_cos_sim": 0.8604341149330139, "eval_bias-bios_emb_cos_sim_sem": 0.003229498259277889, "eval_bias-bios_emb_top1_equal": 0.28200000524520874, "eval_bias-bios_emb_top1_equal_sem": 0.020143573015312013, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.9417322874069214, "eval_bias-bios_n_ngrams_match_1": 18.712, "eval_bias-bios_n_ngrams_match_2": 8.296, "eval_bias-bios_n_ngrams_match_3": 4.604, "eval_bias-bios_num_pred_words": 32.288, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.970815974321508, "eval_bias-bios_pred_num_tokens": 42.203125, "eval_bias-bios_rouge_score": 0.5305062820350761, "eval_bias-bios_runtime": 55.3521, "eval_bias-bios_samples_per_second": 9.033, "eval_bias-bios_steps_per_second": 0.018, "eval_bias-bios_token_set_f1": 0.5359290413191452, "eval_bias-bios_token_set_f1_sem": 0.006735846184355515, "eval_bias-bios_token_set_precision": 0.4766454705041298, "eval_bias-bios_token_set_recall": 0.627384497190948, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 434 }, { "epoch": 1.96, "learning_rate": 0.001, "loss": 2.2159, "step": 444 }, { "epoch": 2.01, "learning_rate": 0.001, "loss": 1.9549, "step": 456 }, { "epoch": 2.05, "eval_ag_news_accuracy": 0.30590625, "eval_ag_news_bleu_score": 4.822678096872545, "eval_ag_news_bleu_score_sem": 0.16083087908591537, "eval_ag_news_emb_cos_sim": 0.8099291920661926, "eval_ag_news_emb_cos_sim_sem": 0.00472055568392159, "eval_ag_news_emb_top1_equal": 0.28200000524520874, "eval_ag_news_emb_top1_equal_sem": 0.020143573015312013, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4794719219207764, "eval_ag_news_n_ngrams_match_1": 13.164, "eval_ag_news_n_ngrams_match_2": 2.948, "eval_ag_news_n_ngrams_match_3": 0.802, "eval_ag_news_num_pred_words": 39.948, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.442585333345114, "eval_ag_news_pred_num_tokens": 60.078125, "eval_ag_news_rouge_score": 0.36153641474600484, "eval_ag_news_runtime": 46.6624, "eval_ag_news_samples_per_second": 10.715, "eval_ag_news_steps_per_second": 0.021, "eval_ag_news_token_set_f1": 0.3532317796867009, "eval_ag_news_token_set_f1_sem": 0.005142214646342723, "eval_ag_news_token_set_precision": 0.32060392153292905, "eval_ag_news_token_set_recall": 0.4227661607319599, "eval_ag_news_true_num_tokens": 56.09375, "step": 465 }, { "epoch": 2.05, "eval_anthropic_toxic_prompts_accuracy": 0.10884375, "eval_anthropic_toxic_prompts_bleu_score": 3.959874623385117, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13816720459798965, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.699434220790863, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004483873661341469, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345323732734733, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.9180328845977783, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.084, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.054, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.8, "eval_anthropic_toxic_prompts_num_pred_words": 38.804, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 18.504850457251738, "eval_anthropic_toxic_prompts_pred_num_tokens": 56.421875, "eval_anthropic_toxic_prompts_rouge_score": 0.25094133174243116, "eval_anthropic_toxic_prompts_runtime": 7.2708, "eval_anthropic_toxic_prompts_samples_per_second": 68.768, "eval_anthropic_toxic_prompts_steps_per_second": 0.138, "eval_anthropic_toxic_prompts_token_set_f1": 0.3514441073423798, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0060101823762325176, "eval_anthropic_toxic_prompts_token_set_precision": 0.44442348087365346, "eval_anthropic_toxic_prompts_token_set_recall": 0.3166909629988077, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 465 }, { "epoch": 2.05, "eval_arxiv_accuracy": 0.4220625, "eval_arxiv_bleu_score": 4.262612265285608, "eval_arxiv_bleu_score_sem": 0.1264903806145487, "eval_arxiv_emb_cos_sim": 0.7446135878562927, "eval_arxiv_emb_cos_sim_sem": 0.006035515859428042, "eval_arxiv_emb_top1_equal": 0.2240000069141388, "eval_arxiv_emb_top1_equal_sem": 0.01866399400069726, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.983703374862671, "eval_arxiv_n_ngrams_match_1": 14.304, "eval_arxiv_n_ngrams_match_2": 2.81, "eval_arxiv_n_ngrams_match_3": 0.616, "eval_arxiv_num_pred_words": 36.052, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.760863185901616, "eval_arxiv_pred_num_tokens": 61.8125, "eval_arxiv_rouge_score": 0.3556709302874719, "eval_arxiv_runtime": 95.7762, "eval_arxiv_samples_per_second": 5.221, "eval_arxiv_steps_per_second": 0.01, "eval_arxiv_token_set_f1": 0.35673938740964367, "eval_arxiv_token_set_f1_sem": 0.005106620682017066, "eval_arxiv_token_set_precision": 0.29837823430756255, "eval_arxiv_token_set_recall": 0.4749938707041415, "eval_arxiv_true_num_tokens": 64.0, "step": 465 }, { "epoch": 2.05, "eval_python_code_alpaca_accuracy": 0.15734375, "eval_python_code_alpaca_bleu_score": 6.366092347760931, "eval_python_code_alpaca_bleu_score_sem": 0.2046752995963428, "eval_python_code_alpaca_emb_cos_sim": 0.7874159216880798, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00428374135664301, "eval_python_code_alpaca_emb_top1_equal": 0.21199999749660492, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01829703673906991, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.538551092147827, "eval_python_code_alpaca_n_ngrams_match_1": 9.946, "eval_python_code_alpaca_n_ngrams_match_2": 3.158, "eval_python_code_alpaca_n_ngrams_match_3": 1.128, "eval_python_code_alpaca_num_pred_words": 34.552, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.661312599007509, "eval_python_code_alpaca_pred_num_tokens": 54.9609375, "eval_python_code_alpaca_rouge_score": 0.4099865377696851, "eval_python_code_alpaca_runtime": 10.0581, "eval_python_code_alpaca_samples_per_second": 49.711, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.4949877378470005, "eval_python_code_alpaca_token_set_f1_sem": 0.0058422653141193045, "eval_python_code_alpaca_token_set_precision": 0.5573490328307835, "eval_python_code_alpaca_token_set_recall": 0.461735096324448, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 465 }, { "epoch": 2.05, "eval_wikibio_accuracy": 0.37009375, "eval_wikibio_bleu_score": 4.919931286316885, "eval_wikibio_bleu_score_sem": 0.21407350443020087, "eval_wikibio_emb_cos_sim": 0.6965718865394592, "eval_wikibio_emb_cos_sim_sem": 0.007230857201284975, "eval_wikibio_emb_top1_equal": 0.1459999978542328, "eval_wikibio_emb_top1_equal_sem": 0.015807205702664997, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3172950744628906, "eval_wikibio_n_ngrams_match_1": 8.042, "eval_wikibio_n_ngrams_match_2": 2.538, "eval_wikibio_n_ngrams_match_3": 0.978, "eval_wikibio_num_pred_words": 30.932, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.58563246891694, "eval_wikibio_pred_num_tokens": 62.5, "eval_wikibio_rouge_score": 0.2864471028441363, "eval_wikibio_runtime": 7.7415, "eval_wikibio_samples_per_second": 64.587, "eval_wikibio_steps_per_second": 0.129, "eval_wikibio_token_set_f1": 0.26047722756912484, "eval_wikibio_token_set_f1_sem": 0.007163829543694026, "eval_wikibio_token_set_precision": 0.256809804635162, "eval_wikibio_token_set_recall": 0.2936675715034865, "eval_wikibio_true_num_tokens": 61.1328125, "step": 465 }, { "epoch": 2.05, "eval_bias-bios_accuracy": 0.49628125, "eval_bias-bios_bleu_score": 16.98683275890492, "eval_bias-bios_bleu_score_sem": 0.7217988752336373, "eval_bias-bios_emb_cos_sim": 0.8703109622001648, "eval_bias-bios_emb_cos_sim_sem": 0.0029760236183739, "eval_bias-bios_emb_top1_equal": 0.30399999022483826, "eval_bias-bios_emb_top1_equal_sem": 0.020591649838958805, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8828622102737427, "eval_bias-bios_n_ngrams_match_1": 21.976, "eval_bias-bios_n_ngrams_match_2": 9.688, "eval_bias-bios_n_ngrams_match_3": 5.232, "eval_bias-bios_num_pred_words": 45.322, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.572289240733505, "eval_bias-bios_pred_num_tokens": 60.9921875, "eval_bias-bios_rouge_score": 0.5184656659022286, "eval_bias-bios_runtime": 8.3916, "eval_bias-bios_samples_per_second": 59.584, "eval_bias-bios_steps_per_second": 0.119, "eval_bias-bios_token_set_f1": 0.5493044015119622, "eval_bias-bios_token_set_f1_sem": 0.006290346034109139, "eval_bias-bios_token_set_precision": 0.525720347228826, "eval_bias-bios_token_set_recall": 0.587055844834679, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 465 }, { "epoch": 2.06, "learning_rate": 0.001, "loss": 2.2039, "step": 468 }, { "epoch": 2.11, "learning_rate": 0.001, "loss": 2.1345, "step": 480 }, { "epoch": 2.17, "learning_rate": 0.001, "loss": 2.0227, "step": 492 }, { "epoch": 2.19, "eval_ag_news_accuracy": 0.30540625, "eval_ag_news_bleu_score": 4.668250881532122, "eval_ag_news_bleu_score_sem": 0.15988691631826146, "eval_ag_news_emb_cos_sim": 0.8072042465209961, "eval_ag_news_emb_cos_sim_sem": 0.0046731316477809145, "eval_ag_news_emb_top1_equal": 0.24199999868869781, "eval_ag_news_emb_top1_equal_sem": 0.019173085092707744, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.51320219039917, "eval_ag_news_n_ngrams_match_1": 12.482, "eval_ag_news_n_ngrams_match_2": 2.586, "eval_ag_news_n_ngrams_match_3": 0.718, "eval_ag_news_num_pred_words": 36.002, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.55554717876515, "eval_ag_news_pred_num_tokens": 56.2734375, "eval_ag_news_rouge_score": 0.36259427294137575, "eval_ag_news_runtime": 178.0758, "eval_ag_news_samples_per_second": 2.808, "eval_ag_news_steps_per_second": 0.006, "eval_ag_news_token_set_f1": 0.3456775660047359, "eval_ag_news_token_set_f1_sem": 0.005010765573039154, "eval_ag_news_token_set_precision": 0.3088513221500971, "eval_ag_news_token_set_recall": 0.41505437710191306, "eval_ag_news_true_num_tokens": 56.09375, "step": 496 }, { "epoch": 2.19, "eval_anthropic_toxic_prompts_accuracy": 0.10984375, "eval_anthropic_toxic_prompts_bleu_score": 4.4912971391928895, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.16681860043496632, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6957270503044128, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00475449144706793, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15199999511241913, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01607198249074835, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.8501698970794678, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.858, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77, "eval_anthropic_toxic_prompts_num_pred_words": 33.982, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 17.29071923373302, "eval_anthropic_toxic_prompts_pred_num_tokens": 49.703125, "eval_anthropic_toxic_prompts_rouge_score": 0.2722764609394385, "eval_anthropic_toxic_prompts_runtime": 7.3923, "eval_anthropic_toxic_prompts_samples_per_second": 67.638, "eval_anthropic_toxic_prompts_steps_per_second": 0.135, "eval_anthropic_toxic_prompts_token_set_f1": 0.3534656539571027, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006123770732202779, "eval_anthropic_toxic_prompts_token_set_precision": 0.43206533638597494, "eval_anthropic_toxic_prompts_token_set_recall": 0.3274092318834864, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 496 }, { "epoch": 2.19, "eval_arxiv_accuracy": 0.41796875, "eval_arxiv_bleu_score": 4.164145065124605, "eval_arxiv_bleu_score_sem": 0.11827294796233954, "eval_arxiv_emb_cos_sim": 0.7487242817878723, "eval_arxiv_emb_cos_sim_sem": 0.005828643916433681, "eval_arxiv_emb_top1_equal": 0.21400000154972076, "eval_arxiv_emb_top1_equal_sem": 0.01835979564312438, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0731747150421143, "eval_arxiv_n_ngrams_match_1": 14.366, "eval_arxiv_n_ngrams_match_2": 2.74, "eval_arxiv_n_ngrams_match_3": 0.59, "eval_arxiv_num_pred_words": 34.388, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.610400750864795, "eval_arxiv_pred_num_tokens": 58.109375, "eval_arxiv_rouge_score": 0.36396878972148394, "eval_arxiv_runtime": 7.7906, "eval_arxiv_samples_per_second": 64.18, "eval_arxiv_steps_per_second": 0.128, "eval_arxiv_token_set_f1": 0.36518113630628457, "eval_arxiv_token_set_f1_sem": 0.0047476112934269005, "eval_arxiv_token_set_precision": 0.3069979542932561, "eval_arxiv_token_set_recall": 0.4743010616126002, "eval_arxiv_true_num_tokens": 64.0, "step": 496 }, { "epoch": 2.19, "eval_python_code_alpaca_accuracy": 0.15715625, "eval_python_code_alpaca_bleu_score": 6.223549285381672, "eval_python_code_alpaca_bleu_score_sem": 0.206436710584848, "eval_python_code_alpaca_emb_cos_sim": 0.7807042598724365, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0045085411306805805, "eval_python_code_alpaca_emb_top1_equal": 0.2160000056028366, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018421909471797383, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.5024209022521973, "eval_python_code_alpaca_n_ngrams_match_1": 9.552, "eval_python_code_alpaca_n_ngrams_match_2": 2.754, "eval_python_code_alpaca_n_ngrams_match_3": 0.916, "eval_python_code_alpaca_num_pred_words": 31.346, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.212022315979649, "eval_python_code_alpaca_pred_num_tokens": 50.46875, "eval_python_code_alpaca_rouge_score": 0.4221245259695163, "eval_python_code_alpaca_runtime": 7.3188, "eval_python_code_alpaca_samples_per_second": 68.317, "eval_python_code_alpaca_steps_per_second": 0.137, "eval_python_code_alpaca_token_set_f1": 0.48784522361653215, "eval_python_code_alpaca_token_set_f1_sem": 0.0058729159175178874, "eval_python_code_alpaca_token_set_precision": 0.5370299536629132, "eval_python_code_alpaca_token_set_recall": 0.4647996593704775, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 496 }, { "epoch": 2.19, "eval_wikibio_accuracy": 0.35896875, "eval_wikibio_bleu_score": 5.454117014872539, "eval_wikibio_bleu_score_sem": 0.2204721001818411, "eval_wikibio_emb_cos_sim": 0.719548761844635, "eval_wikibio_emb_cos_sim_sem": 0.006735396658708852, "eval_wikibio_emb_top1_equal": 0.1720000058412552, "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.470444917678833, "eval_wikibio_n_ngrams_match_1": 8.478, "eval_wikibio_n_ngrams_match_2": 2.72, "eval_wikibio_n_ngrams_match_3": 1.01, "eval_wikibio_num_pred_words": 30.79, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 32.15104383084076, "eval_wikibio_pred_num_tokens": 59.2734375, "eval_wikibio_rouge_score": 0.31754796272682606, "eval_wikibio_runtime": 7.6641, "eval_wikibio_samples_per_second": 65.239, "eval_wikibio_steps_per_second": 0.13, "eval_wikibio_token_set_f1": 0.2818988883768649, "eval_wikibio_token_set_f1_sem": 0.006598024742830464, "eval_wikibio_token_set_precision": 0.27590995276757235, "eval_wikibio_token_set_recall": 0.31430922001444034, "eval_wikibio_true_num_tokens": 61.1328125, "step": 496 }, { "epoch": 2.19, "eval_bias-bios_accuracy": 0.50121875, "eval_bias-bios_bleu_score": 17.215132232614394, "eval_bias-bios_bleu_score_sem": 0.7343335349514112, "eval_bias-bios_emb_cos_sim": 0.8732749223709106, "eval_bias-bios_emb_cos_sim_sem": 0.002797716253014933, "eval_bias-bios_emb_top1_equal": 0.2840000092983246, "eval_bias-bios_emb_top1_equal_sem": 0.020186705101045338, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8789442777633667, "eval_bias-bios_n_ngrams_match_1": 21.28, "eval_bias-bios_n_ngrams_match_2": 9.284, "eval_bias-bios_n_ngrams_match_3": 5.012, "eval_bias-bios_num_pred_words": 41.134, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.546589832197976, "eval_bias-bios_pred_num_tokens": 56.4453125, "eval_bias-bios_rouge_score": 0.5248652780897624, "eval_bias-bios_runtime": 8.232, "eval_bias-bios_samples_per_second": 60.739, "eval_bias-bios_steps_per_second": 0.121, "eval_bias-bios_token_set_f1": 0.5451987629708742, "eval_bias-bios_token_set_f1_sem": 0.006575960199532963, "eval_bias-bios_token_set_precision": 0.5150912165153675, "eval_bias-bios_token_set_recall": 0.5888287947698326, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 496 }, { "epoch": 2.22, "learning_rate": 0.001, "loss": 1.8183, "step": 504 }, { "epoch": 2.27, "learning_rate": 0.001, "loss": 2.2791, "step": 516 }, { "epoch": 2.32, "eval_ag_news_accuracy": 0.306125, "eval_ag_news_bleu_score": 4.7201156512212785, "eval_ag_news_bleu_score_sem": 0.15726246147004735, "eval_ag_news_emb_cos_sim": 0.8120728731155396, "eval_ag_news_emb_cos_sim_sem": 0.004673524823891517, "eval_ag_news_emb_top1_equal": 0.27799999713897705, "eval_ag_news_emb_top1_equal_sem": 0.0200558347666307, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4953548908233643, "eval_ag_news_n_ngrams_match_1": 13.506, "eval_ag_news_n_ngrams_match_2": 2.876, "eval_ag_news_n_ngrams_match_3": 0.796, "eval_ag_news_num_pred_words": 42.818, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.96198378307496, "eval_ag_news_pred_num_tokens": 62.71875, "eval_ag_news_rouge_score": 0.35785736889738995, "eval_ag_news_runtime": 202.08, "eval_ag_news_samples_per_second": 2.474, "eval_ag_news_steps_per_second": 0.005, "eval_ag_news_token_set_f1": 0.351394254769294, "eval_ag_news_token_set_f1_sem": 0.004847415999858728, "eval_ag_news_token_set_precision": 0.3297289801897619, "eval_ag_news_token_set_recall": 0.3999756145796819, "eval_ag_news_true_num_tokens": 56.09375, "step": 527 }, { "epoch": 2.32, "eval_anthropic_toxic_prompts_accuracy": 0.109, "eval_anthropic_toxic_prompts_bleu_score": 3.659994327362634, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1316802797000685, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7012580633163452, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004484919909635785, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15000000596046448, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01598471338779901, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0036044120788574, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.412, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.082, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.846, "eval_anthropic_toxic_prompts_num_pred_words": 43.826, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.158064105488453, "eval_anthropic_toxic_prompts_pred_num_tokens": 61.8515625, "eval_anthropic_toxic_prompts_rouge_score": 0.23854465360649846, "eval_anthropic_toxic_prompts_runtime": 67.8278, "eval_anthropic_toxic_prompts_samples_per_second": 7.372, "eval_anthropic_toxic_prompts_steps_per_second": 0.015, "eval_anthropic_toxic_prompts_token_set_f1": 0.3379282851444896, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005772174167376433, "eval_anthropic_toxic_prompts_token_set_precision": 0.46172926677129095, "eval_anthropic_toxic_prompts_token_set_recall": 0.2918538154689661, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 527 }, { "epoch": 2.32, "eval_arxiv_accuracy": 0.4254375, "eval_arxiv_bleu_score": 4.145314235326048, "eval_arxiv_bleu_score_sem": 0.11821382340409703, "eval_arxiv_emb_cos_sim": 0.7381144165992737, "eval_arxiv_emb_cos_sim_sem": 0.006309414999677569, "eval_arxiv_emb_top1_equal": 0.25600001215934753, "eval_arxiv_emb_top1_equal_sem": 0.019536923601457774, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.979003429412842, "eval_arxiv_n_ngrams_match_1": 14.622, "eval_arxiv_n_ngrams_match_2": 2.772, "eval_arxiv_n_ngrams_match_3": 0.57, "eval_arxiv_num_pred_words": 38.324, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.668206119028458, "eval_arxiv_pred_num_tokens": 62.9140625, "eval_arxiv_rouge_score": 0.35247261343150804, "eval_arxiv_runtime": 130.6734, "eval_arxiv_samples_per_second": 3.826, "eval_arxiv_steps_per_second": 0.008, "eval_arxiv_token_set_f1": 0.35541673856122147, "eval_arxiv_token_set_f1_sem": 0.005079548130897894, "eval_arxiv_token_set_precision": 0.30314107444116767, "eval_arxiv_token_set_recall": 0.46493872398715763, "eval_arxiv_true_num_tokens": 64.0, "step": 527 }, { "epoch": 2.32, "eval_python_code_alpaca_accuracy": 0.156, "eval_python_code_alpaca_bleu_score": 5.21731257718578, "eval_python_code_alpaca_bleu_score_sem": 0.1584529336329874, "eval_python_code_alpaca_emb_cos_sim": 0.7805857062339783, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004051591521737822, "eval_python_code_alpaca_emb_top1_equal": 0.1940000057220459, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017701826750833646, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.6478047370910645, "eval_python_code_alpaca_n_ngrams_match_1": 10.218, "eval_python_code_alpaca_n_ngrams_match_2": 3.078, "eval_python_code_alpaca_n_ngrams_match_3": 1.028, "eval_python_code_alpaca_num_pred_words": 40.092, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.123000889807464, "eval_python_code_alpaca_pred_num_tokens": 61.7265625, "eval_python_code_alpaca_rouge_score": 0.38214918518907504, "eval_python_code_alpaca_runtime": 92.9861, "eval_python_code_alpaca_samples_per_second": 5.377, "eval_python_code_alpaca_steps_per_second": 0.011, "eval_python_code_alpaca_token_set_f1": 0.4712166923748369, "eval_python_code_alpaca_token_set_f1_sem": 0.0053732417427534845, "eval_python_code_alpaca_token_set_precision": 0.5681916403675348, "eval_python_code_alpaca_token_set_recall": 0.41949507105905237, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 527 }, { "epoch": 2.32, "eval_wikibio_accuracy": 0.36740625, "eval_wikibio_bleu_score": 4.968187379806609, "eval_wikibio_bleu_score_sem": 0.2135883339125878, "eval_wikibio_emb_cos_sim": 0.7012571096420288, "eval_wikibio_emb_cos_sim_sem": 0.006859008562281646, "eval_wikibio_emb_top1_equal": 0.15800000727176666, "eval_wikibio_emb_top1_equal_sem": 0.01632805076118194, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3648173809051514, "eval_wikibio_n_ngrams_match_1": 8.462, "eval_wikibio_n_ngrams_match_2": 2.71, "eval_wikibio_n_ngrams_match_3": 1.024, "eval_wikibio_num_pred_words": 32.64, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.92821397239545, "eval_wikibio_pred_num_tokens": 62.8984375, "eval_wikibio_rouge_score": 0.2981068112492569, "eval_wikibio_runtime": 7.308, "eval_wikibio_samples_per_second": 68.418, "eval_wikibio_steps_per_second": 0.137, "eval_wikibio_token_set_f1": 0.2712183003628458, "eval_wikibio_token_set_f1_sem": 0.006982557410045616, "eval_wikibio_token_set_precision": 0.270139310786642, "eval_wikibio_token_set_recall": 0.2990774367348193, "eval_wikibio_true_num_tokens": 61.1328125, "step": 527 }, { "epoch": 2.32, "eval_bias-bios_accuracy": 0.501375, "eval_bias-bios_bleu_score": 16.21711718360367, "eval_bias-bios_bleu_score_sem": 0.6479797993591269, "eval_bias-bios_emb_cos_sim": 0.8746238350868225, "eval_bias-bios_emb_cos_sim_sem": 0.002782096499020999, "eval_bias-bios_emb_top1_equal": 0.335999995470047, "eval_bias-bios_emb_top1_equal_sem": 0.02114479131616093, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8893781900405884, "eval_bias-bios_n_ngrams_match_1": 22.216, "eval_bias-bios_n_ngrams_match_2": 9.848, "eval_bias-bios_n_ngrams_match_3": 5.332, "eval_bias-bios_num_pred_words": 47.686, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.615253971088581, "eval_bias-bios_pred_num_tokens": 62.8125, "eval_bias-bios_rouge_score": 0.5050361311054431, "eval_bias-bios_runtime": 7.4646, "eval_bias-bios_samples_per_second": 66.983, "eval_bias-bios_steps_per_second": 0.134, "eval_bias-bios_token_set_f1": 0.5426654504434694, "eval_bias-bios_token_set_f1_sem": 0.0063148590036132875, "eval_bias-bios_token_set_precision": 0.5333376764248211, "eval_bias-bios_token_set_recall": 0.5644582496982357, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 527 }, { "epoch": 2.33, "learning_rate": 0.001, "loss": 2.1562, "step": 528 }, { "epoch": 2.38, "learning_rate": 0.001, "loss": 2.0533, "step": 540 }, { "epoch": 2.43, "learning_rate": 0.001, "loss": 1.8237, "step": 552 }, { "epoch": 2.46, "eval_ag_news_accuracy": 0.305, "eval_ag_news_bleu_score": 4.17830284982609, "eval_ag_news_bleu_score_sem": 0.1646197309754618, "eval_ag_news_emb_cos_sim": 0.794195830821991, "eval_ag_news_emb_cos_sim_sem": 0.004617011089193853, "eval_ag_news_emb_top1_equal": 0.2540000081062317, "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.561718225479126, "eval_ag_news_n_ngrams_match_1": 10.746, "eval_ag_news_n_ngrams_match_2": 2.262, "eval_ag_news_n_ngrams_match_3": 0.63, "eval_ag_news_num_pred_words": 26.666, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.223667382242766, "eval_ag_news_pred_num_tokens": 40.484375, "eval_ag_news_rouge_score": 0.35462382870198267, "eval_ag_news_runtime": 142.7305, "eval_ag_news_samples_per_second": 3.503, "eval_ag_news_steps_per_second": 0.007, "eval_ag_news_token_set_f1": 0.33559673288595876, "eval_ag_news_token_set_f1_sem": 0.005097527789773717, "eval_ag_news_token_set_precision": 0.279699209130729, "eval_ag_news_token_set_recall": 0.4440647256504291, "eval_ag_news_true_num_tokens": 56.09375, "step": 558 }, { "epoch": 2.46, "eval_anthropic_toxic_prompts_accuracy": 0.11325, "eval_anthropic_toxic_prompts_bleu_score": 6.4154336131325485, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.2339881642763996, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7061901688575745, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005124980296488741, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016328049428381567, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.805786609649658, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.594, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.868, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7, "eval_anthropic_toxic_prompts_num_pred_words": 23.004, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 16.54008137789522, "eval_anthropic_toxic_prompts_pred_num_tokens": 33.375, "eval_anthropic_toxic_prompts_rouge_score": 0.3356250127185185, "eval_anthropic_toxic_prompts_runtime": 6.8433, "eval_anthropic_toxic_prompts_samples_per_second": 73.064, "eval_anthropic_toxic_prompts_steps_per_second": 0.146, "eval_anthropic_toxic_prompts_token_set_f1": 0.3638587971000849, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063676985920877545, "eval_anthropic_toxic_prompts_token_set_precision": 0.4205459789432383, "eval_anthropic_toxic_prompts_token_set_recall": 0.34815521435094127, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 558 }, { "epoch": 2.46, "eval_arxiv_accuracy": 0.407375, "eval_arxiv_bleu_score": 3.1676163846149956, "eval_arxiv_bleu_score_sem": 0.09382946641044632, "eval_arxiv_emb_cos_sim": 0.7366307973861694, "eval_arxiv_emb_cos_sim_sem": 0.00562818807444421, "eval_arxiv_emb_top1_equal": 0.14000000059604645, "eval_arxiv_emb_top1_equal_sem": 0.015533272576005909, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.1155734062194824, "eval_arxiv_n_ngrams_match_1": 12.256, "eval_arxiv_n_ngrams_match_2": 2.306, "eval_arxiv_n_ngrams_match_3": 0.478, "eval_arxiv_num_pred_words": 24.856, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.546354867749912, "eval_arxiv_pred_num_tokens": 40.1171875, "eval_arxiv_rouge_score": 0.3596539092889215, "eval_arxiv_runtime": 7.2229, "eval_arxiv_samples_per_second": 69.225, "eval_arxiv_steps_per_second": 0.138, "eval_arxiv_token_set_f1": 0.3574995386820636, "eval_arxiv_token_set_f1_sem": 0.004623552007997876, "eval_arxiv_token_set_precision": 0.28173053784426294, "eval_arxiv_token_set_recall": 0.5071577095029983, "eval_arxiv_true_num_tokens": 64.0, "step": 558 }, { "epoch": 2.46, "eval_python_code_alpaca_accuracy": 0.16434375, "eval_python_code_alpaca_bleu_score": 8.688607617329108, "eval_python_code_alpaca_bleu_score_sem": 0.302776161989141, "eval_python_code_alpaca_emb_cos_sim": 0.8036087155342102, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004132519493703362, "eval_python_code_alpaca_emb_top1_equal": 0.23600000143051147, "eval_python_code_alpaca_emb_top1_equal_sem": 0.019008700160065242, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.3998489379882812, "eval_python_code_alpaca_n_ngrams_match_1": 8.842, "eval_python_code_alpaca_n_ngrams_match_2": 2.514, "eval_python_code_alpaca_n_ngrams_match_3": 0.838, "eval_python_code_alpaca_num_pred_words": 20.93, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.02151132320864, "eval_python_code_alpaca_pred_num_tokens": 31.46875, "eval_python_code_alpaca_rouge_score": 0.4927890115023661, "eval_python_code_alpaca_runtime": 6.8949, "eval_python_code_alpaca_samples_per_second": 72.517, "eval_python_code_alpaca_steps_per_second": 0.145, "eval_python_code_alpaca_token_set_f1": 0.5100735743849811, "eval_python_code_alpaca_token_set_f1_sem": 0.005861874662831224, "eval_python_code_alpaca_token_set_precision": 0.5207688191851509, "eval_python_code_alpaca_token_set_recall": 0.5181048782640013, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 558 }, { "epoch": 2.46, "eval_wikibio_accuracy": 0.35678125, "eval_wikibio_bleu_score": 5.5551563909930115, "eval_wikibio_bleu_score_sem": 0.22468231620514026, "eval_wikibio_emb_cos_sim": 0.7146796584129333, "eval_wikibio_emb_cos_sim_sem": 0.006399664244265241, "eval_wikibio_emb_top1_equal": 0.15000000596046448, "eval_wikibio_emb_top1_equal_sem": 0.01598471338779901, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4578168392181396, "eval_wikibio_n_ngrams_match_1": 7.942, "eval_wikibio_n_ngrams_match_2": 2.468, "eval_wikibio_n_ngrams_match_3": 0.9, "eval_wikibio_num_pred_words": 26.488, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 31.74759070708929, "eval_wikibio_pred_num_tokens": 49.1640625, "eval_wikibio_rouge_score": 0.32450583191323545, "eval_wikibio_runtime": 9.7808, "eval_wikibio_samples_per_second": 51.12, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.28186914042405625, "eval_wikibio_token_set_f1_sem": 0.006523463043082381, "eval_wikibio_token_set_precision": 0.2670717795693041, "eval_wikibio_token_set_recall": 0.3212183854151821, "eval_wikibio_true_num_tokens": 61.1328125, "step": 558 }, { "epoch": 2.46, "eval_bias-bios_accuracy": 0.49503125, "eval_bias-bios_bleu_score": 15.804996894943608, "eval_bias-bios_bleu_score_sem": 0.7817717179636867, "eval_bias-bios_emb_cos_sim": 0.857208788394928, "eval_bias-bios_emb_cos_sim_sem": 0.0032387648870846487, "eval_bias-bios_emb_top1_equal": 0.257999986410141, "eval_bias-bios_emb_top1_equal_sem": 0.019586711692263472, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.9215714931488037, "eval_bias-bios_n_ngrams_match_1": 17.874, "eval_bias-bios_n_ngrams_match_2": 7.994, "eval_bias-bios_n_ngrams_match_3": 4.424, "eval_bias-bios_num_pred_words": 28.774, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.831685985695067, "eval_bias-bios_pred_num_tokens": 38.359375, "eval_bias-bios_rouge_score": 0.5230174325337358, "eval_bias-bios_runtime": 7.248, "eval_bias-bios_samples_per_second": 68.985, "eval_bias-bios_steps_per_second": 0.138, "eval_bias-bios_token_set_f1": 0.5333727037684048, "eval_bias-bios_token_set_f1_sem": 0.00680824842461018, "eval_bias-bios_token_set_precision": 0.46402045143806697, "eval_bias-bios_token_set_recall": 0.6437579715420798, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 558 }, { "epoch": 2.48, "learning_rate": 0.001, "loss": 2.1767, "step": 564 }, { "epoch": 2.54, "learning_rate": 0.001, "loss": 2.125, "step": 576 }, { "epoch": 2.59, "learning_rate": 0.001, "loss": 2.0658, "step": 588 }, { "epoch": 2.59, "eval_ag_news_accuracy": 0.308375, "eval_ag_news_bleu_score": 4.825938925986893, "eval_ag_news_bleu_score_sem": 0.17537066666011905, "eval_ag_news_emb_cos_sim": 0.8115467429161072, "eval_ag_news_emb_cos_sim_sem": 0.004464737979958483, "eval_ag_news_emb_top1_equal": 0.2720000147819519, "eval_ag_news_emb_top1_equal_sem": 0.019920483557355567, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.498967170715332, "eval_ag_news_n_ngrams_match_1": 12.898, "eval_ag_news_n_ngrams_match_2": 2.776, "eval_ag_news_n_ngrams_match_3": 0.74, "eval_ag_news_num_pred_words": 38.22, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.08126700678294, "eval_ag_news_pred_num_tokens": 58.78125, "eval_ag_news_rouge_score": 0.3649716418907004, "eval_ag_news_runtime": 104.8253, "eval_ag_news_samples_per_second": 4.77, "eval_ag_news_steps_per_second": 0.01, "eval_ag_news_token_set_f1": 0.35050368829625284, "eval_ag_news_token_set_f1_sem": 0.004982748591545938, "eval_ag_news_token_set_precision": 0.3167835362371466, "eval_ag_news_token_set_recall": 0.4121145961802362, "eval_ag_news_true_num_tokens": 56.09375, "step": 589 }, { "epoch": 2.59, "eval_anthropic_toxic_prompts_accuracy": 0.10878125, "eval_anthropic_toxic_prompts_bleu_score": 4.171680099560288, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1593958533538672, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6833683252334595, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005055784299823478, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055009156667442, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.92020845413208, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.804, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.862, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.726, "eval_anthropic_toxic_prompts_num_pred_words": 35.886, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 18.545152870598372, "eval_anthropic_toxic_prompts_pred_num_tokens": 52.578125, "eval_anthropic_toxic_prompts_rouge_score": 0.2567360788471482, "eval_anthropic_toxic_prompts_runtime": 53.4305, "eval_anthropic_toxic_prompts_samples_per_second": 9.358, "eval_anthropic_toxic_prompts_steps_per_second": 0.019, "eval_anthropic_toxic_prompts_token_set_f1": 0.3451024730372745, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006029522771575968, "eval_anthropic_toxic_prompts_token_set_precision": 0.4288044873442379, "eval_anthropic_toxic_prompts_token_set_recall": 0.31550608856249085, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 589 }, { "epoch": 2.59, "eval_arxiv_accuracy": 0.4219375, "eval_arxiv_bleu_score": 4.140443900056267, "eval_arxiv_bleu_score_sem": 0.12015419626352614, "eval_arxiv_emb_cos_sim": 0.7514610886573792, "eval_arxiv_emb_cos_sim_sem": 0.0054524023673946, "eval_arxiv_emb_top1_equal": 0.21199999749660492, "eval_arxiv_emb_top1_equal_sem": 0.01829703673906991, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.029334783554077, "eval_arxiv_n_ngrams_match_1": 14.722, "eval_arxiv_n_ngrams_match_2": 2.734, "eval_arxiv_n_ngrams_match_3": 0.554, "eval_arxiv_num_pred_words": 35.644, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.683469028267105, "eval_arxiv_pred_num_tokens": 59.8203125, "eval_arxiv_rouge_score": 0.3687405463610957, "eval_arxiv_runtime": 24.1607, "eval_arxiv_samples_per_second": 20.695, "eval_arxiv_steps_per_second": 0.041, "eval_arxiv_token_set_f1": 0.3668459906499189, "eval_arxiv_token_set_f1_sem": 0.004671911518585731, "eval_arxiv_token_set_precision": 0.3116453335220418, "eval_arxiv_token_set_recall": 0.46383344443392316, "eval_arxiv_true_num_tokens": 64.0, "step": 589 }, { "epoch": 2.59, "eval_python_code_alpaca_accuracy": 0.15428125, "eval_python_code_alpaca_bleu_score": 5.864369043870192, "eval_python_code_alpaca_bleu_score_sem": 0.19204430713131432, "eval_python_code_alpaca_emb_cos_sim": 0.774185836315155, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004486241714407617, "eval_python_code_alpaca_emb_top1_equal": 0.1979999989271164, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017838958581409683, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.568850040435791, "eval_python_code_alpaca_n_ngrams_match_1": 9.564, "eval_python_code_alpaca_n_ngrams_match_2": 2.782, "eval_python_code_alpaca_n_ngrams_match_3": 0.926, "eval_python_code_alpaca_num_pred_words": 33.608, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.050807907012663, "eval_python_code_alpaca_pred_num_tokens": 53.8359375, "eval_python_code_alpaca_rouge_score": 0.40507381092872385, "eval_python_code_alpaca_runtime": 77.0073, "eval_python_code_alpaca_samples_per_second": 6.493, "eval_python_code_alpaca_steps_per_second": 0.013, "eval_python_code_alpaca_token_set_f1": 0.4784863540932467, "eval_python_code_alpaca_token_set_f1_sem": 0.0057110262975351034, "eval_python_code_alpaca_token_set_precision": 0.5350334116878842, "eval_python_code_alpaca_token_set_recall": 0.4515350360067306, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 589 }, { "epoch": 2.59, "eval_wikibio_accuracy": 0.3678125, "eval_wikibio_bleu_score": 5.08333193380027, "eval_wikibio_bleu_score_sem": 0.21579463987083333, "eval_wikibio_emb_cos_sim": 0.7033864855766296, "eval_wikibio_emb_cos_sim_sem": 0.0071376558038668434, "eval_wikibio_emb_top1_equal": 0.1860000044107437, "eval_wikibio_emb_top1_equal_sem": 0.017418806591218323, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3738596439361572, "eval_wikibio_n_ngrams_match_1": 8.356, "eval_wikibio_n_ngrams_match_2": 2.586, "eval_wikibio_n_ngrams_match_3": 0.97, "eval_wikibio_num_pred_words": 30.618, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 29.190976686596628, "eval_wikibio_pred_num_tokens": 61.5078125, "eval_wikibio_rouge_score": 0.3009390844051775, "eval_wikibio_runtime": 104.4552, "eval_wikibio_samples_per_second": 4.787, "eval_wikibio_steps_per_second": 0.01, "eval_wikibio_token_set_f1": 0.27607929651752483, "eval_wikibio_token_set_f1_sem": 0.006884092160953384, "eval_wikibio_token_set_precision": 0.2687024695785008, "eval_wikibio_token_set_recall": 0.31430470518840553, "eval_wikibio_true_num_tokens": 61.1328125, "step": 589 }, { "epoch": 2.59, "eval_bias-bios_accuracy": 0.5073125, "eval_bias-bios_bleu_score": 17.78100403924648, "eval_bias-bios_bleu_score_sem": 0.7668617383166656, "eval_bias-bios_emb_cos_sim": 0.8714081048965454, "eval_bias-bios_emb_cos_sim_sem": 0.003140707764700572, "eval_bias-bios_emb_top1_equal": 0.3319999873638153, "eval_bias-bios_emb_top1_equal_sem": 0.02108176585203148, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.832431674003601, "eval_bias-bios_n_ngrams_match_1": 21.574, "eval_bias-bios_n_ngrams_match_2": 9.494, "eval_bias-bios_n_ngrams_match_3": 5.212, "eval_bias-bios_num_pred_words": 42.016, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.249063884207591, "eval_bias-bios_pred_num_tokens": 57.7578125, "eval_bias-bios_rouge_score": 0.5275146750904657, "eval_bias-bios_runtime": 46.2079, "eval_bias-bios_samples_per_second": 10.821, "eval_bias-bios_steps_per_second": 0.022, "eval_bias-bios_token_set_f1": 0.5490236205565298, "eval_bias-bios_token_set_f1_sem": 0.006594456906976342, "eval_bias-bios_token_set_precision": 0.52137228578852, "eval_bias-bios_token_set_recall": 0.5902546299142385, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 589 }, { "epoch": 2.64, "learning_rate": 0.001, "loss": 1.8307, "step": 600 }, { "epoch": 2.7, "learning_rate": 0.001, "loss": 2.1034, "step": 612 }, { "epoch": 2.73, "eval_ag_news_accuracy": 0.30353125, "eval_ag_news_bleu_score": 4.53689408224436, "eval_ag_news_bleu_score_sem": 0.1407891868623165, "eval_ag_news_emb_cos_sim": 0.808462381362915, "eval_ag_news_emb_cos_sim_sem": 0.004580344751279384, "eval_ag_news_emb_top1_equal": 0.2639999985694885, "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.506286144256592, "eval_ag_news_n_ngrams_match_1": 13.222, "eval_ag_news_n_ngrams_match_2": 2.836, "eval_ag_news_n_ngrams_match_3": 0.734, "eval_ag_news_num_pred_words": 42.192, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.32427612860431, "eval_ag_news_pred_num_tokens": 62.1015625, "eval_ag_news_rouge_score": 0.35460702911253306, "eval_ag_news_runtime": 22.9681, "eval_ag_news_samples_per_second": 21.769, "eval_ag_news_steps_per_second": 0.044, "eval_ag_news_token_set_f1": 0.34999672868570797, "eval_ag_news_token_set_f1_sem": 0.0049020199900705235, "eval_ag_news_token_set_precision": 0.3237693505139728, "eval_ag_news_token_set_recall": 0.4076253755054518, "eval_ag_news_true_num_tokens": 56.09375, "step": 620 }, { "epoch": 2.73, "eval_anthropic_toxic_prompts_accuracy": 0.107875, "eval_anthropic_toxic_prompts_bleu_score": 3.63208410886937, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13144121713299511, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6909038424491882, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00449909224242255, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15600000321865082, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016243635183835314, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0125765800476074, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.2, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782, "eval_anthropic_toxic_prompts_num_pred_words": 41.784, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.339739434618117, "eval_anthropic_toxic_prompts_pred_num_tokens": 60.3984375, "eval_anthropic_toxic_prompts_rouge_score": 0.24196796556361308, "eval_anthropic_toxic_prompts_runtime": 6.9661, "eval_anthropic_toxic_prompts_samples_per_second": 71.776, "eval_anthropic_toxic_prompts_steps_per_second": 0.144, "eval_anthropic_toxic_prompts_token_set_f1": 0.3394931645156119, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005807740971075464, "eval_anthropic_toxic_prompts_token_set_precision": 0.44422747807761476, "eval_anthropic_toxic_prompts_token_set_recall": 0.2990046287227596, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 620 }, { "epoch": 2.73, "eval_arxiv_accuracy": 0.4233125, "eval_arxiv_bleu_score": 4.284955203954001, "eval_arxiv_bleu_score_sem": 0.12627207210690689, "eval_arxiv_emb_cos_sim": 0.741362452507019, "eval_arxiv_emb_cos_sim_sem": 0.005599541529585759, "eval_arxiv_emb_top1_equal": 0.27799999713897705, "eval_arxiv_emb_top1_equal_sem": 0.0200558347666307, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0010297298431396, "eval_arxiv_n_ngrams_match_1": 14.89, "eval_arxiv_n_ngrams_match_2": 2.868, "eval_arxiv_n_ngrams_match_3": 0.614, "eval_arxiv_num_pred_words": 38.106, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.106230252413784, "eval_arxiv_pred_num_tokens": 61.8046875, "eval_arxiv_rouge_score": 0.3562602292924252, "eval_arxiv_runtime": 7.3893, "eval_arxiv_samples_per_second": 67.665, "eval_arxiv_steps_per_second": 0.135, "eval_arxiv_token_set_f1": 0.3608819560059443, "eval_arxiv_token_set_f1_sem": 0.004948931806275159, "eval_arxiv_token_set_precision": 0.30757811736542806, "eval_arxiv_token_set_recall": 0.4654241563081048, "eval_arxiv_true_num_tokens": 64.0, "step": 620 }, { "epoch": 2.73, "eval_python_code_alpaca_accuracy": 0.152125, "eval_python_code_alpaca_bleu_score": 5.215313521009815, "eval_python_code_alpaca_bleu_score_sem": 0.16132353607710642, "eval_python_code_alpaca_emb_cos_sim": 0.7622130513191223, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004359099555841739, "eval_python_code_alpaca_emb_top1_equal": 0.15600000321865082, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01624363651663569, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.671508312225342, "eval_python_code_alpaca_n_ngrams_match_1": 9.816, "eval_python_code_alpaca_n_ngrams_match_2": 2.878, "eval_python_code_alpaca_n_ngrams_match_3": 0.96, "eval_python_code_alpaca_num_pred_words": 37.962, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.461765608637934, "eval_python_code_alpaca_pred_num_tokens": 59.875, "eval_python_code_alpaca_rouge_score": 0.3860518554997959, "eval_python_code_alpaca_runtime": 98.8415, "eval_python_code_alpaca_samples_per_second": 5.059, "eval_python_code_alpaca_steps_per_second": 0.01, "eval_python_code_alpaca_token_set_f1": 0.4681935618748671, "eval_python_code_alpaca_token_set_f1_sem": 0.005613756137103073, "eval_python_code_alpaca_token_set_precision": 0.5427428112223796, "eval_python_code_alpaca_token_set_recall": 0.4299327669158068, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 620 }, { "epoch": 2.73, "eval_wikibio_accuracy": 0.3721875, "eval_wikibio_bleu_score": 4.7620090481459, "eval_wikibio_bleu_score_sem": 0.20192915764833883, "eval_wikibio_emb_cos_sim": 0.7033131122589111, "eval_wikibio_emb_cos_sim_sem": 0.007050469334540749, "eval_wikibio_emb_top1_equal": 0.16200000047683716, "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3212852478027344, "eval_wikibio_n_ngrams_match_1": 8.292, "eval_wikibio_n_ngrams_match_2": 2.552, "eval_wikibio_n_ngrams_match_3": 0.932, "eval_wikibio_num_pred_words": 31.282, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.6959238186253, "eval_wikibio_pred_num_tokens": 62.828125, "eval_wikibio_rouge_score": 0.2892639774887934, "eval_wikibio_runtime": 7.1225, "eval_wikibio_samples_per_second": 70.2, "eval_wikibio_steps_per_second": 0.14, "eval_wikibio_token_set_f1": 0.26863722552618025, "eval_wikibio_token_set_f1_sem": 0.00692945422994504, "eval_wikibio_token_set_precision": 0.26243191660598264, "eval_wikibio_token_set_recall": 0.3061311954238753, "eval_wikibio_true_num_tokens": 61.1328125, "step": 620 }, { "epoch": 2.73, "eval_bias-bios_accuracy": 0.5038125, "eval_bias-bios_bleu_score": 16.77870924904314, "eval_bias-bios_bleu_score_sem": 0.7032499787869223, "eval_bias-bios_emb_cos_sim": 0.8721063137054443, "eval_bias-bios_emb_cos_sim_sem": 0.0027367717900708775, "eval_bias-bios_emb_top1_equal": 0.32199999690055847, "eval_bias-bios_emb_top1_equal_sem": 0.020916667871188392, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8617655038833618, "eval_bias-bios_n_ngrams_match_1": 22.31, "eval_bias-bios_n_ngrams_match_2": 9.792, "eval_bias-bios_n_ngrams_match_3": 5.316, "eval_bias-bios_num_pred_words": 46.466, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.435087920945597, "eval_bias-bios_pred_num_tokens": 61.8046875, "eval_bias-bios_rouge_score": 0.5140055790947482, "eval_bias-bios_runtime": 8.3194, "eval_bias-bios_samples_per_second": 60.1, "eval_bias-bios_steps_per_second": 0.12, "eval_bias-bios_token_set_f1": 0.5454037892631416, "eval_bias-bios_token_set_f1_sem": 0.006311858395773849, "eval_bias-bios_token_set_precision": 0.5311017560395707, "eval_bias-bios_token_set_recall": 0.5703081426345601, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 620 }, { "epoch": 2.75, "learning_rate": 0.001, "loss": 2.1311, "step": 624 }, { "epoch": 2.8, "learning_rate": 0.001, "loss": 2.0834, "step": 636 }, { "epoch": 2.85, "learning_rate": 0.001, "loss": 1.8634, "step": 648 }, { "epoch": 2.87, "eval_ag_news_accuracy": 0.30515625, "eval_ag_news_bleu_score": 4.473343439625787, "eval_ag_news_bleu_score_sem": 0.15810206804676052, "eval_ag_news_emb_cos_sim": 0.8005340695381165, "eval_ag_news_emb_cos_sim_sem": 0.004695550349687434, "eval_ag_news_emb_top1_equal": 0.2879999876022339, "eval_ag_news_emb_top1_equal_sem": 0.020271503192099565, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5658531188964844, "eval_ag_news_n_ngrams_match_1": 11.65, "eval_ag_news_n_ngrams_match_2": 2.418, "eval_ag_news_n_ngrams_match_3": 0.658, "eval_ag_news_num_pred_words": 30.516, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.3696150236647, "eval_ag_news_pred_num_tokens": 46.015625, "eval_ag_news_rouge_score": 0.3610716279802496, "eval_ag_news_runtime": 7.1672, "eval_ag_news_samples_per_second": 69.762, "eval_ag_news_steps_per_second": 0.14, "eval_ag_news_token_set_f1": 0.34436389102369475, "eval_ag_news_token_set_f1_sem": 0.004961501646001305, "eval_ag_news_token_set_precision": 0.29844150324042096, "eval_ag_news_token_set_recall": 0.42716703328221317, "eval_ag_news_true_num_tokens": 56.09375, "step": 651 }, { "epoch": 2.87, "eval_anthropic_toxic_prompts_accuracy": 0.11078125, "eval_anthropic_toxic_prompts_bleu_score": 5.265969515801792, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.19656878745061954, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7007984519004822, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004628107985115478, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15600000321865082, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016243635183835314, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.847456455230713, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.706, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.832, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684, "eval_anthropic_toxic_prompts_num_pred_words": 27.388, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 17.243865468800184, "eval_anthropic_toxic_prompts_pred_num_tokens": 40.8984375, "eval_anthropic_toxic_prompts_rouge_score": 0.30469045903105896, "eval_anthropic_toxic_prompts_runtime": 6.928, "eval_anthropic_toxic_prompts_samples_per_second": 72.171, "eval_anthropic_toxic_prompts_steps_per_second": 0.144, "eval_anthropic_toxic_prompts_token_set_f1": 0.34764036986523544, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006320732915520322, "eval_anthropic_toxic_prompts_token_set_precision": 0.4238956512970005, "eval_anthropic_toxic_prompts_token_set_recall": 0.3209230248717735, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 651 }, { "epoch": 2.87, "eval_arxiv_accuracy": 0.4124375, "eval_arxiv_bleu_score": 3.725053262080295, "eval_arxiv_bleu_score_sem": 0.11067579921127993, "eval_arxiv_emb_cos_sim": 0.7445892095565796, "eval_arxiv_emb_cos_sim_sem": 0.005560418840580304, "eval_arxiv_emb_top1_equal": 0.17399999499320984, "eval_arxiv_emb_top1_equal_sem": 0.016971270884523753, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.109774112701416, "eval_arxiv_n_ngrams_match_1": 13.534, "eval_arxiv_n_ngrams_match_2": 2.47, "eval_arxiv_n_ngrams_match_3": 0.534, "eval_arxiv_num_pred_words": 29.738, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.41598034356986, "eval_arxiv_pred_num_tokens": 50.703125, "eval_arxiv_rouge_score": 0.36506026933321756, "eval_arxiv_runtime": 7.2303, "eval_arxiv_samples_per_second": 69.154, "eval_arxiv_steps_per_second": 0.138, "eval_arxiv_token_set_f1": 0.36532853948773913, "eval_arxiv_token_set_f1_sem": 0.004653775964601775, "eval_arxiv_token_set_precision": 0.30101683877486113, "eval_arxiv_token_set_recall": 0.4827124457846711, "eval_arxiv_true_num_tokens": 64.0, "step": 651 }, { "epoch": 2.87, "eval_python_code_alpaca_accuracy": 0.1569375, "eval_python_code_alpaca_bleu_score": 7.137868958322952, "eval_python_code_alpaca_bleu_score_sem": 0.2364063665756055, "eval_python_code_alpaca_emb_cos_sim": 0.7880061268806458, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004177613128388481, "eval_python_code_alpaca_emb_top1_equal": 0.1899999976158142, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017561800077843276, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.4980950355529785, "eval_python_code_alpaca_n_ngrams_match_1": 9.17, "eval_python_code_alpaca_n_ngrams_match_2": 2.492, "eval_python_code_alpaca_n_ngrams_match_3": 0.788, "eval_python_code_alpaca_num_pred_words": 25.402, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.159308833265369, "eval_python_code_alpaca_pred_num_tokens": 41.65625, "eval_python_code_alpaca_rouge_score": 0.4624655642995602, "eval_python_code_alpaca_runtime": 26.2419, "eval_python_code_alpaca_samples_per_second": 19.053, "eval_python_code_alpaca_steps_per_second": 0.038, "eval_python_code_alpaca_token_set_f1": 0.4860980481971889, "eval_python_code_alpaca_token_set_f1_sem": 0.00580305500608555, "eval_python_code_alpaca_token_set_precision": 0.5229148183140903, "eval_python_code_alpaca_token_set_recall": 0.4709995327571117, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 651 }, { "epoch": 2.87, "eval_wikibio_accuracy": 0.35765625, "eval_wikibio_bleu_score": 5.85492257184771, "eval_wikibio_bleu_score_sem": 0.23014008018986573, "eval_wikibio_emb_cos_sim": 0.7288503646850586, "eval_wikibio_emb_cos_sim_sem": 0.006177860272270408, "eval_wikibio_emb_top1_equal": 0.16200000047683716, "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4955646991729736, "eval_wikibio_n_ngrams_match_1": 8.694, "eval_wikibio_n_ngrams_match_2": 2.738, "eval_wikibio_n_ngrams_match_3": 1.004, "eval_wikibio_num_pred_words": 29.054, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 32.96890020802882, "eval_wikibio_pred_num_tokens": 52.8046875, "eval_wikibio_rouge_score": 0.3403089616964229, "eval_wikibio_runtime": 32.6626, "eval_wikibio_samples_per_second": 15.308, "eval_wikibio_steps_per_second": 0.031, "eval_wikibio_token_set_f1": 0.29763297721712917, "eval_wikibio_token_set_f1_sem": 0.006160074286052675, "eval_wikibio_token_set_precision": 0.2869254675733824, "eval_wikibio_token_set_recall": 0.33152872531158917, "eval_wikibio_true_num_tokens": 61.1328125, "step": 651 }, { "epoch": 2.87, "eval_bias-bios_accuracy": 0.50496875, "eval_bias-bios_bleu_score": 17.404360770996192, "eval_bias-bios_bleu_score_sem": 0.8065126842992738, "eval_bias-bios_emb_cos_sim": 0.865310549736023, "eval_bias-bios_emb_cos_sim_sem": 0.003374083442750598, "eval_bias-bios_emb_top1_equal": 0.33000001311302185, "eval_bias-bios_emb_top1_equal_sem": 0.021049612042986412, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.858695387840271, "eval_bias-bios_n_ngrams_match_1": 20.136, "eval_bias-bios_n_ngrams_match_2": 8.792, "eval_bias-bios_n_ngrams_match_3": 4.816, "eval_bias-bios_num_pred_words": 35.484, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.415361750591107, "eval_bias-bios_pred_num_tokens": 48.4296875, "eval_bias-bios_rouge_score": 0.5350815981968027, "eval_bias-bios_runtime": 33.4966, "eval_bias-bios_samples_per_second": 14.927, "eval_bias-bios_steps_per_second": 0.03, "eval_bias-bios_token_set_f1": 0.5462957317603716, "eval_bias-bios_token_set_f1_sem": 0.006750628440175147, "eval_bias-bios_token_set_precision": 0.5028559854585791, "eval_bias-bios_token_set_recall": 0.6104946925640782, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 651 }, { "epoch": 2.91, "learning_rate": 0.001, "loss": 2.0138, "step": 660 }, { "epoch": 2.96, "learning_rate": 0.001, "loss": 2.11, "step": 672 }, { "epoch": 3.0, "eval_ag_news_accuracy": 0.3066875, "eval_ag_news_bleu_score": 3.9999289719672073, "eval_ag_news_bleu_score_sem": 0.14619700926918378, "eval_ag_news_emb_cos_sim": 0.7976146936416626, "eval_ag_news_emb_cos_sim_sem": 0.0042587303580072615, "eval_ag_news_emb_top1_equal": 0.2540000081062317, "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.557628870010376, "eval_ag_news_n_ngrams_match_1": 10.736, "eval_ag_news_n_ngrams_match_2": 2.24, "eval_ag_news_n_ngrams_match_3": 0.614, "eval_ag_news_num_pred_words": 25.642, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.07991940401688, "eval_ag_news_pred_num_tokens": 37.46875, "eval_ag_news_rouge_score": 0.35758803023918306, "eval_ag_news_runtime": 8.2019, "eval_ag_news_samples_per_second": 60.962, "eval_ag_news_steps_per_second": 0.122, "eval_ag_news_token_set_f1": 0.3417297542358024, "eval_ag_news_token_set_f1_sem": 0.0049447789658150195, "eval_ag_news_token_set_precision": 0.2815595439954063, "eval_ag_news_token_set_recall": 0.45808750249581603, "eval_ag_news_true_num_tokens": 56.09375, "step": 682 }, { "epoch": 3.0, "eval_anthropic_toxic_prompts_accuracy": 0.11465625, "eval_anthropic_toxic_prompts_bleu_score": 6.620783700595906, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.2504343615589252, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.708827793598175, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004979361193125184, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1720000058412552, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01689386850274998, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.778644323348999, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.462, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.776, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694, "eval_anthropic_toxic_prompts_num_pred_words": 21.826, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 16.097183573245367, "eval_anthropic_toxic_prompts_pred_num_tokens": 30.984375, "eval_anthropic_toxic_prompts_rouge_score": 0.3427616398928224, "eval_anthropic_toxic_prompts_runtime": 7.17, "eval_anthropic_toxic_prompts_samples_per_second": 69.735, "eval_anthropic_toxic_prompts_steps_per_second": 0.139, "eval_anthropic_toxic_prompts_token_set_f1": 0.36721266069559144, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006592633901857433, "eval_anthropic_toxic_prompts_token_set_precision": 0.4157216533469948, "eval_anthropic_toxic_prompts_token_set_recall": 0.3591411989218731, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 682 }, { "epoch": 3.0, "eval_arxiv_accuracy": 0.41234375, "eval_arxiv_bleu_score": 3.2402474085140467, "eval_arxiv_bleu_score_sem": 0.11054316602251692, "eval_arxiv_emb_cos_sim": 0.7450404763221741, "eval_arxiv_emb_cos_sim_sem": 0.005016281096310982, "eval_arxiv_emb_top1_equal": 0.1459999978542328, "eval_arxiv_emb_top1_equal_sem": 0.01580720436986462, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.100191593170166, "eval_arxiv_n_ngrams_match_1": 12.564, "eval_arxiv_n_ngrams_match_2": 2.338, "eval_arxiv_n_ngrams_match_3": 0.484, "eval_arxiv_num_pred_words": 25.684, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.20220466474542, "eval_arxiv_pred_num_tokens": 39.765625, "eval_arxiv_rouge_score": 0.3643006369430338, "eval_arxiv_runtime": 7.1688, "eval_arxiv_samples_per_second": 69.747, "eval_arxiv_steps_per_second": 0.139, "eval_arxiv_token_set_f1": 0.36394150696450756, "eval_arxiv_token_set_f1_sem": 0.004365745503615316, "eval_arxiv_token_set_precision": 0.288894392717936, "eval_arxiv_token_set_recall": 0.5062836949338945, "eval_arxiv_true_num_tokens": 64.0, "step": 682 }, { "epoch": 3.0, "eval_python_code_alpaca_accuracy": 0.17009375, "eval_python_code_alpaca_bleu_score": 8.932379578566175, "eval_python_code_alpaca_bleu_score_sem": 0.33461997730428233, "eval_python_code_alpaca_emb_cos_sim": 0.8038181662559509, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004188257205382726, "eval_python_code_alpaca_emb_top1_equal": 0.24400000274181366, "eval_python_code_alpaca_emb_top1_equal_sem": 0.0192267343061996, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.3401527404785156, "eval_python_code_alpaca_n_ngrams_match_1": 8.898, "eval_python_code_alpaca_n_ngrams_match_2": 2.476, "eval_python_code_alpaca_n_ngrams_match_3": 0.838, "eval_python_code_alpaca_num_pred_words": 20.412, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 10.382822318873513, "eval_python_code_alpaca_pred_num_tokens": 31.5078125, "eval_python_code_alpaca_rouge_score": 0.4983959997409262, "eval_python_code_alpaca_runtime": 55.2072, "eval_python_code_alpaca_samples_per_second": 9.057, "eval_python_code_alpaca_steps_per_second": 0.018, "eval_python_code_alpaca_token_set_f1": 0.5136739256107152, "eval_python_code_alpaca_token_set_f1_sem": 0.005907835885019311, "eval_python_code_alpaca_token_set_precision": 0.5236014854603724, "eval_python_code_alpaca_token_set_recall": 0.5256264438798506, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 682 }, { "epoch": 3.0, "eval_wikibio_accuracy": 0.357625, "eval_wikibio_bleu_score": 5.884401182468587, "eval_wikibio_bleu_score_sem": 0.2169968312792518, "eval_wikibio_emb_cos_sim": 0.7289230823516846, "eval_wikibio_emb_cos_sim_sem": 0.005783947123060196, "eval_wikibio_emb_top1_equal": 0.12200000137090683, "eval_wikibio_emb_top1_equal_sem": 0.014651325247908655, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4971401691436768, "eval_wikibio_n_ngrams_match_1": 8.648, "eval_wikibio_n_ngrams_match_2": 2.708, "eval_wikibio_n_ngrams_match_3": 0.984, "eval_wikibio_num_pred_words": 27.808, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 33.02088265791592, "eval_wikibio_pred_num_tokens": 48.890625, "eval_wikibio_rouge_score": 0.3478947595641799, "eval_wikibio_runtime": 7.1125, "eval_wikibio_samples_per_second": 70.298, "eval_wikibio_steps_per_second": 0.141, "eval_wikibio_token_set_f1": 0.30490824395081195, "eval_wikibio_token_set_f1_sem": 0.0059019070100866785, "eval_wikibio_token_set_precision": 0.2908642059105115, "eval_wikibio_token_set_recall": 0.33954481636458195, "eval_wikibio_true_num_tokens": 61.1328125, "step": 682 }, { "epoch": 3.0, "eval_bias-bios_accuracy": 0.50125, "eval_bias-bios_bleu_score": 15.98849001010628, "eval_bias-bios_bleu_score_sem": 0.7974510881576483, "eval_bias-bios_emb_cos_sim": 0.8551180958747864, "eval_bias-bios_emb_cos_sim_sem": 0.00377614433385282, "eval_bias-bios_emb_top1_equal": 0.2720000147819519, "eval_bias-bios_emb_top1_equal_sem": 0.019920483557355567, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.8715966939926147, "eval_bias-bios_n_ngrams_match_1": 18.124, "eval_bias-bios_n_ngrams_match_2": 8.164, "eval_bias-bios_n_ngrams_match_3": 4.542, "eval_bias-bios_num_pred_words": 28.884, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.498664498307737, "eval_bias-bios_pred_num_tokens": 38.703125, "eval_bias-bios_rouge_score": 0.5264331775686338, "eval_bias-bios_runtime": 7.7438, "eval_bias-bios_samples_per_second": 64.568, "eval_bias-bios_steps_per_second": 0.129, "eval_bias-bios_token_set_f1": 0.5371140795022766, "eval_bias-bios_token_set_f1_sem": 0.006801180559363128, "eval_bias-bios_token_set_precision": 0.46791660140110625, "eval_bias-bios_token_set_recall": 0.6506526258419852, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 682 }, { "epoch": 3.01, "learning_rate": 0.001, "loss": 1.9015, "step": 684 }, { "epoch": 3.07, "learning_rate": 0.001, "loss": 2.1372, "step": 696 }, { "epoch": 3.12, "learning_rate": 0.001, "loss": 2.0761, "step": 708 }, { "epoch": 3.14, "eval_ag_news_accuracy": 0.30365625, "eval_ag_news_bleu_score": 4.807231978089563, "eval_ag_news_bleu_score_sem": 0.16163813088315626, "eval_ag_news_emb_cos_sim": 0.8174247145652771, "eval_ag_news_emb_cos_sim_sem": 0.004546760847432024, "eval_ag_news_emb_top1_equal": 0.25999999046325684, "eval_ag_news_emb_top1_equal_sem": 0.0196359666629192, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5345346927642822, "eval_ag_news_n_ngrams_match_1": 13.404, "eval_ag_news_n_ngrams_match_2": 2.846, "eval_ag_news_n_ngrams_match_3": 0.77, "eval_ag_news_num_pred_words": 41.124, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.27906070777844, "eval_ag_news_pred_num_tokens": 61.5234375, "eval_ag_news_rouge_score": 0.36701003791655307, "eval_ag_news_runtime": 7.3142, "eval_ag_news_samples_per_second": 68.36, "eval_ag_news_steps_per_second": 0.137, "eval_ag_news_token_set_f1": 0.35534880626682386, "eval_ag_news_token_set_f1_sem": 0.004803456660972898, "eval_ag_news_token_set_precision": 0.32871916471570295, "eval_ag_news_token_set_recall": 0.4079597792220003, "eval_ag_news_true_num_tokens": 56.09375, "step": 713 }, { "epoch": 3.14, "eval_anthropic_toxic_prompts_accuracy": 0.10734375, "eval_anthropic_toxic_prompts_bleu_score": 3.7004674765467946, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1351453047196849, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.693345308303833, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004463090638695075, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12200000137090683, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014651325247908655, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.01737380027771, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.18, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.95, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.726, "eval_anthropic_toxic_prompts_num_pred_words": 40.516, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.43754806129512, "eval_anthropic_toxic_prompts_pred_num_tokens": 59.2265625, "eval_anthropic_toxic_prompts_rouge_score": 0.24905914379520377, "eval_anthropic_toxic_prompts_runtime": 7.0211, "eval_anthropic_toxic_prompts_samples_per_second": 71.214, "eval_anthropic_toxic_prompts_steps_per_second": 0.142, "eval_anthropic_toxic_prompts_token_set_f1": 0.34231952265276866, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00582109173922676, "eval_anthropic_toxic_prompts_token_set_precision": 0.4466406316128299, "eval_anthropic_toxic_prompts_token_set_recall": 0.3017256420993679, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 713 }, { "epoch": 3.14, "eval_arxiv_accuracy": 0.42584375, "eval_arxiv_bleu_score": 4.510228645858232, "eval_arxiv_bleu_score_sem": 0.12525631185548242, "eval_arxiv_emb_cos_sim": 0.7616528272628784, "eval_arxiv_emb_cos_sim_sem": 0.004894787345734036, "eval_arxiv_emb_top1_equal": 0.2540000081062317, "eval_arxiv_emb_top1_equal_sem": 0.019486597059300604, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.996361017227173, "eval_arxiv_n_ngrams_match_1": 15.418, "eval_arxiv_n_ngrams_match_2": 2.996, "eval_arxiv_n_ngrams_match_3": 0.676, "eval_arxiv_num_pred_words": 37.702, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.012578827478347, "eval_arxiv_pred_num_tokens": 62.390625, "eval_arxiv_rouge_score": 0.37442979613112276, "eval_arxiv_runtime": 99.6345, "eval_arxiv_samples_per_second": 5.018, "eval_arxiv_steps_per_second": 0.01, "eval_arxiv_token_set_f1": 0.3760448173627775, "eval_arxiv_token_set_f1_sem": 0.004571537213954523, "eval_arxiv_token_set_precision": 0.32504903646266936, "eval_arxiv_token_set_recall": 0.4649487924076311, "eval_arxiv_true_num_tokens": 64.0, "step": 713 }, { "epoch": 3.14, "eval_python_code_alpaca_accuracy": 0.15446875, "eval_python_code_alpaca_bleu_score": 5.5939644522079615, "eval_python_code_alpaca_bleu_score_sem": 0.17768506193955427, "eval_python_code_alpaca_emb_cos_sim": 0.7780863642692566, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038236100200052118, "eval_python_code_alpaca_emb_top1_equal": 0.1940000057220459, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017701828083634023, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.6528801918029785, "eval_python_code_alpaca_n_ngrams_match_1": 10.01, "eval_python_code_alpaca_n_ngrams_match_2": 2.964, "eval_python_code_alpaca_n_ngrams_match_3": 0.988, "eval_python_code_alpaca_num_pred_words": 36.804, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.194863755312682, "eval_python_code_alpaca_pred_num_tokens": 59.1875, "eval_python_code_alpaca_rouge_score": 0.39856666647945677, "eval_python_code_alpaca_runtime": 7.0358, "eval_python_code_alpaca_samples_per_second": 71.065, "eval_python_code_alpaca_steps_per_second": 0.142, "eval_python_code_alpaca_token_set_f1": 0.47840825139573334, "eval_python_code_alpaca_token_set_f1_sem": 0.0054816918459534405, "eval_python_code_alpaca_token_set_precision": 0.5579828664085862, "eval_python_code_alpaca_token_set_recall": 0.4363118980299957, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 713 }, { "epoch": 3.14, "eval_wikibio_accuracy": 0.367875, "eval_wikibio_bleu_score": 4.735244091280166, "eval_wikibio_bleu_score_sem": 0.20963187820347412, "eval_wikibio_emb_cos_sim": 0.6974137425422668, "eval_wikibio_emb_cos_sim_sem": 0.007348966638509892, "eval_wikibio_emb_top1_equal": 0.17399999499320984, "eval_wikibio_emb_top1_equal_sem": 0.016971270884523753, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.351142644882202, "eval_wikibio_n_ngrams_match_1": 7.906, "eval_wikibio_n_ngrams_match_2": 2.508, "eval_wikibio_n_ngrams_match_3": 0.934, "eval_wikibio_num_pred_words": 30.27, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.535320760699715, "eval_wikibio_pred_num_tokens": 62.890625, "eval_wikibio_rouge_score": 0.2857146526105555, "eval_wikibio_runtime": 7.1168, "eval_wikibio_samples_per_second": 70.257, "eval_wikibio_steps_per_second": 0.141, "eval_wikibio_token_set_f1": 0.25992420696120655, "eval_wikibio_token_set_f1_sem": 0.007061032205840481, "eval_wikibio_token_set_precision": 0.25434709052883214, "eval_wikibio_token_set_recall": 0.2923511219456792, "eval_wikibio_true_num_tokens": 61.1328125, "step": 713 }, { "epoch": 3.14, "eval_bias-bios_accuracy": 0.510375, "eval_bias-bios_bleu_score": 17.46569017961834, "eval_bias-bios_bleu_score_sem": 0.7332074185725256, "eval_bias-bios_emb_cos_sim": 0.8748842477798462, "eval_bias-bios_emb_cos_sim_sem": 0.0030136818929674064, "eval_bias-bios_emb_top1_equal": 0.3619999885559082, "eval_bias-bios_emb_top1_equal_sem": 0.02151366247912668, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8190138339996338, "eval_bias-bios_n_ngrams_match_1": 22.302, "eval_bias-bios_n_ngrams_match_2": 9.934, "eval_bias-bios_n_ngrams_match_3": 5.444, "eval_bias-bios_num_pred_words": 45.506, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.165774973073229, "eval_bias-bios_pred_num_tokens": 61.265625, "eval_bias-bios_rouge_score": 0.5202713511356998, "eval_bias-bios_runtime": 7.4369, "eval_bias-bios_samples_per_second": 67.232, "eval_bias-bios_steps_per_second": 0.134, "eval_bias-bios_token_set_f1": 0.549705304733179, "eval_bias-bios_token_set_f1_sem": 0.006451127709977456, "eval_bias-bios_token_set_precision": 0.5326632628189287, "eval_bias-bios_token_set_recall": 0.5795277275593579, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 713 }, { "epoch": 3.17, "learning_rate": 0.001, "loss": 1.944, "step": 720 }, { "epoch": 3.22, "learning_rate": 0.001, "loss": 1.7949, "step": 732 }, { "epoch": 3.28, "learning_rate": 0.001, "loss": 2.1684, "step": 744 }, { "epoch": 3.28, "eval_ag_news_accuracy": 0.3044375, "eval_ag_news_bleu_score": 4.672991899082724, "eval_ag_news_bleu_score_sem": 0.15476185602666204, "eval_ag_news_emb_cos_sim": 0.811370849609375, "eval_ag_news_emb_cos_sim_sem": 0.004829103278056701, "eval_ag_news_emb_top1_equal": 0.25600001215934753, "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.504905939102173, "eval_ag_news_n_ngrams_match_1": 13.25, "eval_ag_news_n_ngrams_match_2": 2.822, "eval_ag_news_n_ngrams_match_3": 0.754, "eval_ag_news_num_pred_words": 41.12, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.27831351713739, "eval_ag_news_pred_num_tokens": 60.546875, "eval_ag_news_rouge_score": 0.3606610416794005, "eval_ag_news_runtime": 7.4579, "eval_ag_news_samples_per_second": 67.043, "eval_ag_news_steps_per_second": 0.134, "eval_ag_news_token_set_f1": 0.3520361867777557, "eval_ag_news_token_set_f1_sem": 0.004946594020772925, "eval_ag_news_token_set_precision": 0.3243223940123956, "eval_ag_news_token_set_recall": 0.4096605761331192, "eval_ag_news_true_num_tokens": 56.09375, "step": 744 }, { "epoch": 3.28, "eval_anthropic_toxic_prompts_accuracy": 0.10796875, "eval_anthropic_toxic_prompts_bleu_score": 3.9505558874336377, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14533239487420707, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.691646933555603, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00474973001772851, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14800000190734863, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015896458012572223, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0150554180145264, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.154, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.818, "eval_anthropic_toxic_prompts_num_pred_words": 40.01, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.390220894796755, "eval_anthropic_toxic_prompts_pred_num_tokens": 55.5703125, "eval_anthropic_toxic_prompts_rouge_score": 0.2475437197594741, "eval_anthropic_toxic_prompts_runtime": 7.0734, "eval_anthropic_toxic_prompts_samples_per_second": 70.687, "eval_anthropic_toxic_prompts_steps_per_second": 0.141, "eval_anthropic_toxic_prompts_token_set_f1": 0.3445415672624848, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005892741461353698, "eval_anthropic_toxic_prompts_token_set_precision": 0.44540730751231317, "eval_anthropic_toxic_prompts_token_set_recall": 0.3069377172835802, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 744 }, { "epoch": 3.28, "eval_arxiv_accuracy": 0.42346875, "eval_arxiv_bleu_score": 4.406566856808188, "eval_arxiv_bleu_score_sem": 0.1332551318523246, "eval_arxiv_emb_cos_sim": 0.7289456129074097, "eval_arxiv_emb_cos_sim_sem": 0.00676433508684941, "eval_arxiv_emb_top1_equal": 0.25200000405311584, "eval_arxiv_emb_top1_equal_sem": 0.019435728067390842, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.984241008758545, "eval_arxiv_n_ngrams_match_1": 14.726, "eval_arxiv_n_ngrams_match_2": 2.96, "eval_arxiv_n_ngrams_match_3": 0.69, "eval_arxiv_num_pred_words": 37.278, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.77149015221476, "eval_arxiv_pred_num_tokens": 61.8984375, "eval_arxiv_rouge_score": 0.35180234388079, "eval_arxiv_runtime": 8.2469, "eval_arxiv_samples_per_second": 60.629, "eval_arxiv_steps_per_second": 0.121, "eval_arxiv_token_set_f1": 0.35925759294812704, "eval_arxiv_token_set_f1_sem": 0.005295346068408739, "eval_arxiv_token_set_precision": 0.30474853688198655, "eval_arxiv_token_set_recall": 0.4687242650845372, "eval_arxiv_true_num_tokens": 64.0, "step": 744 }, { "epoch": 3.28, "eval_python_code_alpaca_accuracy": 0.15509375, "eval_python_code_alpaca_bleu_score": 6.1177832646981205, "eval_python_code_alpaca_bleu_score_sem": 0.2048795479105771, "eval_python_code_alpaca_emb_cos_sim": 0.7686617374420166, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004338874310152252, "eval_python_code_alpaca_emb_top1_equal": 0.15800000727176666, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01632805076118194, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.619371175765991, "eval_python_code_alpaca_n_ngrams_match_1": 9.736, "eval_python_code_alpaca_n_ngrams_match_2": 2.998, "eval_python_code_alpaca_n_ngrams_match_3": 1.076, "eval_python_code_alpaca_num_pred_words": 35.354, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.727088944336183, "eval_python_code_alpaca_pred_num_tokens": 55.4296875, "eval_python_code_alpaca_rouge_score": 0.4021383557670485, "eval_python_code_alpaca_runtime": 7.0106, "eval_python_code_alpaca_samples_per_second": 71.32, "eval_python_code_alpaca_steps_per_second": 0.143, "eval_python_code_alpaca_token_set_f1": 0.4800895200151073, "eval_python_code_alpaca_token_set_f1_sem": 0.005615133377042818, "eval_python_code_alpaca_token_set_precision": 0.5409584793979987, "eval_python_code_alpaca_token_set_recall": 0.4499633268060651, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 744 }, { "epoch": 3.28, "eval_wikibio_accuracy": 0.37146875, "eval_wikibio_bleu_score": 4.702581855515065, "eval_wikibio_bleu_score_sem": 0.20529887964322238, "eval_wikibio_emb_cos_sim": 0.682021975517273, "eval_wikibio_emb_cos_sim_sem": 0.007582080088086151, "eval_wikibio_emb_top1_equal": 0.164000004529953, "eval_wikibio_emb_top1_equal_sem": 0.016575811686878626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.2913637161254883, "eval_wikibio_n_ngrams_match_1": 7.786, "eval_wikibio_n_ngrams_match_2": 2.476, "eval_wikibio_n_ngrams_match_3": 0.928, "eval_wikibio_num_pred_words": 29.428, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 26.879494673398067, "eval_wikibio_pred_num_tokens": 62.5703125, "eval_wikibio_rouge_score": 0.2758099282727248, "eval_wikibio_runtime": 7.0912, "eval_wikibio_samples_per_second": 70.51, "eval_wikibio_steps_per_second": 0.141, "eval_wikibio_token_set_f1": 0.2562598491451561, "eval_wikibio_token_set_f1_sem": 0.007338948588135056, "eval_wikibio_token_set_precision": 0.24709290983369253, "eval_wikibio_token_set_recall": 0.3000468657914441, "eval_wikibio_true_num_tokens": 61.1328125, "step": 744 }, { "epoch": 3.28, "eval_bias-bios_accuracy": 0.5049375, "eval_bias-bios_bleu_score": 17.635346680899122, "eval_bias-bios_bleu_score_sem": 0.7664231815693775, "eval_bias-bios_emb_cos_sim": 0.8713113069534302, "eval_bias-bios_emb_cos_sim_sem": 0.003266782016966041, "eval_bias-bios_emb_top1_equal": 0.33000001311302185, "eval_bias-bios_emb_top1_equal_sem": 0.021049612042986412, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8327958583831787, "eval_bias-bios_n_ngrams_match_1": 22.168, "eval_bias-bios_n_ngrams_match_2": 9.896, "eval_bias-bios_n_ngrams_match_3": 5.414, "eval_bias-bios_num_pred_words": 45.57, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.251340110119005, "eval_bias-bios_pred_num_tokens": 60.21875, "eval_bias-bios_rouge_score": 0.5185499712129331, "eval_bias-bios_runtime": 22.9658, "eval_bias-bios_samples_per_second": 21.771, "eval_bias-bios_steps_per_second": 0.044, "eval_bias-bios_token_set_f1": 0.5517670294522569, "eval_bias-bios_token_set_f1_sem": 0.00664155954121532, "eval_bias-bios_token_set_precision": 0.5298568658456634, "eval_bias-bios_token_set_recall": 0.58644952362815, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 744 }, { "epoch": 3.33, "learning_rate": 0.001, "loss": 2.0783, "step": 756 }, { "epoch": 3.38, "learning_rate": 0.001, "loss": 1.9614, "step": 768 }, { "epoch": 3.41, "eval_ag_news_accuracy": 0.3060625, "eval_ag_news_bleu_score": 4.830644842559454, "eval_ag_news_bleu_score_sem": 0.17131361214852292, "eval_ag_news_emb_cos_sim": 0.8080878257751465, "eval_ag_news_emb_cos_sim_sem": 0.004762135390418219, "eval_ag_news_emb_top1_equal": 0.2639999985694885, "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5362191200256348, "eval_ag_news_n_ngrams_match_1": 12.318, "eval_ag_news_n_ngrams_match_2": 2.566, "eval_ag_news_n_ngrams_match_3": 0.756, "eval_ag_news_num_pred_words": 33.394, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.33684994935138, "eval_ag_news_pred_num_tokens": 50.8125, "eval_ag_news_rouge_score": 0.3686651207110883, "eval_ag_news_runtime": 54.0329, "eval_ag_news_samples_per_second": 9.254, "eval_ag_news_steps_per_second": 0.019, "eval_ag_news_token_set_f1": 0.35017067187233036, "eval_ag_news_token_set_f1_sem": 0.004965635312749522, "eval_ag_news_token_set_precision": 0.30866475239635843, "eval_ag_news_token_set_recall": 0.4222894602236413, "eval_ag_news_true_num_tokens": 56.09375, "step": 775 }, { "epoch": 3.41, "eval_anthropic_toxic_prompts_accuracy": 0.11034375, "eval_anthropic_toxic_prompts_bleu_score": 4.791208765914185, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.17235693520941445, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6980757117271423, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004599339155822628, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016328049428381567, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.8975107669830322, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.794, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.804, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67, "eval_anthropic_toxic_prompts_num_pred_words": 29.974, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 18.128961946099636, "eval_anthropic_toxic_prompts_pred_num_tokens": 43.9765625, "eval_anthropic_toxic_prompts_rouge_score": 0.28998494944399944, "eval_anthropic_toxic_prompts_runtime": 6.7811, "eval_anthropic_toxic_prompts_samples_per_second": 73.734, "eval_anthropic_toxic_prompts_steps_per_second": 0.147, "eval_anthropic_toxic_prompts_token_set_f1": 0.3506330942661759, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006079127723106288, "eval_anthropic_toxic_prompts_token_set_precision": 0.43022796801569213, "eval_anthropic_toxic_prompts_token_set_recall": 0.3216893300248196, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 775 }, { "epoch": 3.41, "eval_arxiv_accuracy": 0.41875, "eval_arxiv_bleu_score": 3.985202336909602, "eval_arxiv_bleu_score_sem": 0.11427823441579887, "eval_arxiv_emb_cos_sim": 0.7555274367332458, "eval_arxiv_emb_cos_sim_sem": 0.0048518951640682, "eval_arxiv_emb_top1_equal": 0.20399999618530273, "eval_arxiv_emb_top1_equal_sem": 0.018039369108186407, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0569183826446533, "eval_arxiv_n_ngrams_match_1": 14.21, "eval_arxiv_n_ngrams_match_2": 2.628, "eval_arxiv_n_ngrams_match_3": 0.562, "eval_arxiv_num_pred_words": 32.458, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.26193495492643, "eval_arxiv_pred_num_tokens": 54.359375, "eval_arxiv_rouge_score": 0.37090033558555063, "eval_arxiv_runtime": 10.0922, "eval_arxiv_samples_per_second": 49.543, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3694772950313383, "eval_arxiv_token_set_f1_sem": 0.004405857439219603, "eval_arxiv_token_set_precision": 0.3106585602925783, "eval_arxiv_token_set_recall": 0.4722938585818135, "eval_arxiv_true_num_tokens": 64.0, "step": 775 }, { "epoch": 3.41, "eval_python_code_alpaca_accuracy": 0.15715625, "eval_python_code_alpaca_bleu_score": 6.703733250427161, "eval_python_code_alpaca_bleu_score_sem": 0.21109497746566985, "eval_python_code_alpaca_emb_cos_sim": 0.7801447510719299, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004055824828928678, "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.5180015563964844, "eval_python_code_alpaca_n_ngrams_match_1": 9.252, "eval_python_code_alpaca_n_ngrams_match_2": 2.58, "eval_python_code_alpaca_n_ngrams_match_3": 0.83, "eval_python_code_alpaca_num_pred_words": 28.04, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.403783616070726, "eval_python_code_alpaca_pred_num_tokens": 45.625, "eval_python_code_alpaca_rouge_score": 0.4425684197842046, "eval_python_code_alpaca_runtime": 7.0427, "eval_python_code_alpaca_samples_per_second": 70.996, "eval_python_code_alpaca_steps_per_second": 0.142, "eval_python_code_alpaca_token_set_f1": 0.48194826632417753, "eval_python_code_alpaca_token_set_f1_sem": 0.005645163528078008, "eval_python_code_alpaca_token_set_precision": 0.5236863218849204, "eval_python_code_alpaca_token_set_recall": 0.465454270189137, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 775 }, { "epoch": 3.41, "eval_wikibio_accuracy": 0.3623125, "eval_wikibio_bleu_score": 5.491105598896529, "eval_wikibio_bleu_score_sem": 0.22105851156678893, "eval_wikibio_emb_cos_sim": 0.7249027490615845, "eval_wikibio_emb_cos_sim_sem": 0.006267459110275117, "eval_wikibio_emb_top1_equal": 0.16599999368190765, "eval_wikibio_emb_top1_equal_sem": 0.016656615375209204, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.428741693496704, "eval_wikibio_n_ngrams_match_1": 8.588, "eval_wikibio_n_ngrams_match_2": 2.652, "eval_wikibio_n_ngrams_match_3": 0.99, "eval_wikibio_num_pred_words": 30.438, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 30.837814903094237, "eval_wikibio_pred_num_tokens": 57.0234375, "eval_wikibio_rouge_score": 0.32513790206198945, "eval_wikibio_runtime": 7.7874, "eval_wikibio_samples_per_second": 64.206, "eval_wikibio_steps_per_second": 0.128, "eval_wikibio_token_set_f1": 0.28688274851414197, "eval_wikibio_token_set_f1_sem": 0.006164553502671263, "eval_wikibio_token_set_precision": 0.2807405610348686, "eval_wikibio_token_set_recall": 0.31940677840867493, "eval_wikibio_true_num_tokens": 61.1328125, "step": 775 }, { "epoch": 3.41, "eval_bias-bios_accuracy": 0.51146875, "eval_bias-bios_bleu_score": 18.121918930666734, "eval_bias-bios_bleu_score_sem": 0.8110035460979876, "eval_bias-bios_emb_cos_sim": 0.8704509139060974, "eval_bias-bios_emb_cos_sim_sem": 0.003329327339760428, "eval_bias-bios_emb_top1_equal": 0.35199999809265137, "eval_bias-bios_emb_top1_equal_sem": 0.021380041244738194, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.806106686592102, "eval_bias-bios_n_ngrams_match_1": 21.02, "eval_bias-bios_n_ngrams_match_2": 9.28, "eval_bias-bios_n_ngrams_match_3": 5.16, "eval_bias-bios_num_pred_words": 37.962, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.086703796015009, "eval_bias-bios_pred_num_tokens": 51.6171875, "eval_bias-bios_rouge_score": 0.5405338845092296, "eval_bias-bios_runtime": 7.2575, "eval_bias-bios_samples_per_second": 68.894, "eval_bias-bios_steps_per_second": 0.138, "eval_bias-bios_token_set_f1": 0.5530509264634874, "eval_bias-bios_token_set_f1_sem": 0.006721981375685361, "eval_bias-bios_token_set_precision": 0.5179105484815628, "eval_bias-bios_token_set_recall": 0.6050661590454904, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 775 }, { "epoch": 3.44, "learning_rate": 0.001, "loss": 1.7502, "step": 780 }, { "epoch": 3.49, "learning_rate": 0.001, "loss": 2.136, "step": 792 }, { "epoch": 3.54, "learning_rate": 0.001, "loss": 2.0723, "step": 804 }, { "epoch": 3.55, "eval_ag_news_accuracy": 0.304625, "eval_ag_news_bleu_score": 4.60205913529834, "eval_ag_news_bleu_score_sem": 0.15505346124207306, "eval_ag_news_emb_cos_sim": 0.8124366998672485, "eval_ag_news_emb_cos_sim_sem": 0.004842483927420761, "eval_ag_news_emb_top1_equal": 0.30000001192092896, "eval_ag_news_emb_top1_equal_sem": 0.020514426052435274, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5086352825164795, "eval_ag_news_n_ngrams_match_1": 13.326, "eval_ag_news_n_ngrams_match_2": 2.786, "eval_ag_news_n_ngrams_match_3": 0.78, "eval_ag_news_num_pred_words": 42.364, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.40265148186903, "eval_ag_news_pred_num_tokens": 62.515625, "eval_ag_news_rouge_score": 0.3569843557545479, "eval_ag_news_runtime": 7.0297, "eval_ag_news_samples_per_second": 71.126, "eval_ag_news_steps_per_second": 0.142, "eval_ag_news_token_set_f1": 0.3495832375018209, "eval_ag_news_token_set_f1_sem": 0.004923836215222782, "eval_ag_news_token_set_precision": 0.3228266549890825, "eval_ag_news_token_set_recall": 0.4072312643510978, "eval_ag_news_true_num_tokens": 56.09375, "step": 806 }, { "epoch": 3.55, "eval_anthropic_toxic_prompts_accuracy": 0.10778125, "eval_anthropic_toxic_prompts_bleu_score": 3.478540847435448, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11949226015249044, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6928165555000305, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004549284504941945, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14399999380111694, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01571693380047095, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.05265212059021, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.078, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.922, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.728, "eval_anthropic_toxic_prompts_num_pred_words": 42.112, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 21.17141918790496, "eval_anthropic_toxic_prompts_pred_num_tokens": 60.5078125, "eval_anthropic_toxic_prompts_rouge_score": 0.23727029553517592, "eval_anthropic_toxic_prompts_runtime": 6.8213, "eval_anthropic_toxic_prompts_samples_per_second": 73.3, "eval_anthropic_toxic_prompts_steps_per_second": 0.147, "eval_anthropic_toxic_prompts_token_set_f1": 0.3390444125571501, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005674077469671755, "eval_anthropic_toxic_prompts_token_set_precision": 0.4443096392091491, "eval_anthropic_toxic_prompts_token_set_recall": 0.29979884045938265, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 806 }, { "epoch": 3.55, "eval_arxiv_accuracy": 0.42653125, "eval_arxiv_bleu_score": 4.436794308460483, "eval_arxiv_bleu_score_sem": 0.1341657588866233, "eval_arxiv_emb_cos_sim": 0.7514486312866211, "eval_arxiv_emb_cos_sim_sem": 0.005709252325248086, "eval_arxiv_emb_top1_equal": 0.27399998903274536, "eval_arxiv_emb_top1_equal_sem": 0.01996610531418925, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.9720091819763184, "eval_arxiv_n_ngrams_match_1": 14.996, "eval_arxiv_n_ngrams_match_2": 2.906, "eval_arxiv_n_ngrams_match_3": 0.702, "eval_arxiv_num_pred_words": 38.318, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.53112177853892, "eval_arxiv_pred_num_tokens": 62.4609375, "eval_arxiv_rouge_score": 0.3604203239944716, "eval_arxiv_runtime": 7.3771, "eval_arxiv_samples_per_second": 67.777, "eval_arxiv_steps_per_second": 0.136, "eval_arxiv_token_set_f1": 0.36417605653305596, "eval_arxiv_token_set_f1_sem": 0.004806454033055331, "eval_arxiv_token_set_precision": 0.31203181590774637, "eval_arxiv_token_set_recall": 0.4612931928641072, "eval_arxiv_true_num_tokens": 64.0, "step": 806 }, { "epoch": 3.55, "eval_python_code_alpaca_accuracy": 0.15625, "eval_python_code_alpaca_bleu_score": 5.248670371367343, "eval_python_code_alpaca_bleu_score_sem": 0.1658923075864523, "eval_python_code_alpaca_emb_cos_sim": 0.772881031036377, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004222808389102189, "eval_python_code_alpaca_emb_top1_equal": 0.1940000057220459, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017701828083634023, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.6592369079589844, "eval_python_code_alpaca_n_ngrams_match_1": 9.83, "eval_python_code_alpaca_n_ngrams_match_2": 2.906, "eval_python_code_alpaca_n_ngrams_match_3": 0.95, "eval_python_code_alpaca_num_pred_words": 37.892, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.285383875623985, "eval_python_code_alpaca_pred_num_tokens": 60.4453125, "eval_python_code_alpaca_rouge_score": 0.38629347615812193, "eval_python_code_alpaca_runtime": 6.9202, "eval_python_code_alpaca_samples_per_second": 72.253, "eval_python_code_alpaca_steps_per_second": 0.145, "eval_python_code_alpaca_token_set_f1": 0.4710159508399305, "eval_python_code_alpaca_token_set_f1_sem": 0.005425578193334968, "eval_python_code_alpaca_token_set_precision": 0.5464500732328734, "eval_python_code_alpaca_token_set_recall": 0.43287514402648203, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 806 }, { "epoch": 3.55, "eval_wikibio_accuracy": 0.3736875, "eval_wikibio_bleu_score": 4.874848732011922, "eval_wikibio_bleu_score_sem": 0.2031192765037749, "eval_wikibio_emb_cos_sim": 0.7125859260559082, "eval_wikibio_emb_cos_sim_sem": 0.00642388589187894, "eval_wikibio_emb_top1_equal": 0.17599999904632568, "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.30753231048584, "eval_wikibio_n_ngrams_match_1": 8.302, "eval_wikibio_n_ngrams_match_2": 2.6, "eval_wikibio_n_ngrams_match_3": 0.98, "eval_wikibio_num_pred_words": 31.694, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.31763079713352, "eval_wikibio_pred_num_tokens": 62.9765625, "eval_wikibio_rouge_score": 0.2901026002628322, "eval_wikibio_runtime": 7.0902, "eval_wikibio_samples_per_second": 70.52, "eval_wikibio_steps_per_second": 0.141, "eval_wikibio_token_set_f1": 0.2677244947313496, "eval_wikibio_token_set_f1_sem": 0.006927357451301914, "eval_wikibio_token_set_precision": 0.26264790463960996, "eval_wikibio_token_set_recall": 0.30136590727706775, "eval_wikibio_true_num_tokens": 61.1328125, "step": 806 }, { "epoch": 3.55, "eval_bias-bios_accuracy": 0.50834375, "eval_bias-bios_bleu_score": 17.11151424904348, "eval_bias-bios_bleu_score_sem": 0.7161483293889718, "eval_bias-bios_emb_cos_sim": 0.8768417239189148, "eval_bias-bios_emb_cos_sim_sem": 0.003176824322460384, "eval_bias-bios_emb_top1_equal": 0.335999995470047, "eval_bias-bios_emb_top1_equal_sem": 0.02114479131616093, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8117848634719849, "eval_bias-bios_n_ngrams_match_1": 22.56, "eval_bias-bios_n_ngrams_match_2": 10.072, "eval_bias-bios_n_ngrams_match_3": 5.464, "eval_bias-bios_num_pred_words": 46.834, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.121363485584354, "eval_bias-bios_pred_num_tokens": 62.03125, "eval_bias-bios_rouge_score": 0.5183632183459099, "eval_bias-bios_runtime": 7.4291, "eval_bias-bios_samples_per_second": 67.303, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.5541207654751751, "eval_bias-bios_token_set_f1_sem": 0.006455986166626027, "eval_bias-bios_token_set_precision": 0.5388610772243735, "eval_bias-bios_token_set_recall": 0.5830024266618952, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 806 }, { "epoch": 3.59, "learning_rate": 0.001, "loss": 1.9926, "step": 816 }, { "epoch": 3.65, "learning_rate": 0.001, "loss": 1.7553, "step": 828 }, { "epoch": 3.69, "eval_ag_news_accuracy": 0.3098125, "eval_ag_news_bleu_score": 4.778759162593388, "eval_ag_news_bleu_score_sem": 0.16529183512727746, "eval_ag_news_emb_cos_sim": 0.8075339794158936, "eval_ag_news_emb_cos_sim_sem": 0.004170562947605222, "eval_ag_news_emb_top1_equal": 0.2720000147819519, "eval_ag_news_emb_top1_equal_sem": 0.019920483557355567, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5137295722961426, "eval_ag_news_n_ngrams_match_1": 11.836, "eval_ag_news_n_ngrams_match_2": 2.59, "eval_ag_news_n_ngrams_match_3": 0.718, "eval_ag_news_num_pred_words": 30.43, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.57324843414079, "eval_ag_news_pred_num_tokens": 44.484375, "eval_ag_news_rouge_score": 0.3698284591121457, "eval_ag_news_runtime": 25.8981, "eval_ag_news_samples_per_second": 19.306, "eval_ag_news_steps_per_second": 0.039, "eval_ag_news_token_set_f1": 0.3519000375758839, "eval_ag_news_token_set_f1_sem": 0.004968601342353017, "eval_ag_news_token_set_precision": 0.2999630348624532, "eval_ag_news_token_set_recall": 0.4435569510076943, "eval_ag_news_true_num_tokens": 56.09375, "step": 837 }, { "epoch": 3.69, "eval_anthropic_toxic_prompts_accuracy": 0.1125625, "eval_anthropic_toxic_prompts_bleu_score": 6.020973366102066, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.23054669884648044, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7090463042259216, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0047895847473398935, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.164000004529953, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016575810354078253, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.860866069793701, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.784, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.862, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, "eval_anthropic_toxic_prompts_num_pred_words": 25.654, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 17.476656388246514, "eval_anthropic_toxic_prompts_pred_num_tokens": 37.1171875, "eval_anthropic_toxic_prompts_rouge_score": 0.32144459354531985, "eval_anthropic_toxic_prompts_runtime": 32.6489, "eval_anthropic_toxic_prompts_samples_per_second": 15.314, "eval_anthropic_toxic_prompts_steps_per_second": 0.031, "eval_anthropic_toxic_prompts_token_set_f1": 0.36752908789633254, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006169528972758457, "eval_anthropic_toxic_prompts_token_set_precision": 0.4319967039477071, "eval_anthropic_toxic_prompts_token_set_recall": 0.3459659996987827, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 837 }, { "epoch": 3.69, "eval_arxiv_accuracy": 0.4183125, "eval_arxiv_bleu_score": 3.5945334417121058, "eval_arxiv_bleu_score_sem": 0.10744192704090952, "eval_arxiv_emb_cos_sim": 0.7484161853790283, "eval_arxiv_emb_cos_sim_sem": 0.0052398406995996846, "eval_arxiv_emb_top1_equal": 0.18000000715255737, "eval_arxiv_emb_top1_equal_sem": 0.017198593316470962, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0348353385925293, "eval_arxiv_n_ngrams_match_1": 13.398, "eval_arxiv_n_ngrams_match_2": 2.454, "eval_arxiv_n_ngrams_match_3": 0.532, "eval_arxiv_num_pred_words": 29.392, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.797553063154922, "eval_arxiv_pred_num_tokens": 48.25, "eval_arxiv_rouge_score": 0.3667639714517909, "eval_arxiv_runtime": 7.4071, "eval_arxiv_samples_per_second": 67.503, "eval_arxiv_steps_per_second": 0.135, "eval_arxiv_token_set_f1": 0.3633731359269907, "eval_arxiv_token_set_f1_sem": 0.004638444139085674, "eval_arxiv_token_set_precision": 0.29672680849330163, "eval_arxiv_token_set_recall": 0.48394511783078614, "eval_arxiv_true_num_tokens": 64.0, "step": 837 }, { "epoch": 3.69, "eval_python_code_alpaca_accuracy": 0.16496875, "eval_python_code_alpaca_bleu_score": 8.204287617402592, "eval_python_code_alpaca_bleu_score_sem": 0.2525555146312472, "eval_python_code_alpaca_emb_cos_sim": 0.8038115501403809, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037339978539967537, "eval_python_code_alpaca_emb_top1_equal": 0.20399999618530273, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01803936777538603, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.432777166366577, "eval_python_code_alpaca_n_ngrams_match_1": 9.406, "eval_python_code_alpaca_n_ngrams_match_2": 2.684, "eval_python_code_alpaca_n_ngrams_match_3": 0.878, "eval_python_code_alpaca_num_pred_words": 23.662, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.390471429412177, "eval_python_code_alpaca_pred_num_tokens": 36.5546875, "eval_python_code_alpaca_rouge_score": 0.4847810986828926, "eval_python_code_alpaca_runtime": 7.0241, "eval_python_code_alpaca_samples_per_second": 71.184, "eval_python_code_alpaca_steps_per_second": 0.142, "eval_python_code_alpaca_token_set_f1": 0.5182590446946109, "eval_python_code_alpaca_token_set_f1_sem": 0.00562349793252812, "eval_python_code_alpaca_token_set_precision": 0.5384421834239288, "eval_python_code_alpaca_token_set_recall": 0.5156545489627503, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 837 }, { "epoch": 3.69, "eval_wikibio_accuracy": 0.3656875, "eval_wikibio_bleu_score": 5.745106458262442, "eval_wikibio_bleu_score_sem": 0.22901795042124162, "eval_wikibio_emb_cos_sim": 0.7403951287269592, "eval_wikibio_emb_cos_sim_sem": 0.005469197984519312, "eval_wikibio_emb_top1_equal": 0.17599999904632568, "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3659920692443848, "eval_wikibio_n_ngrams_match_1": 8.816, "eval_wikibio_n_ngrams_match_2": 2.794, "eval_wikibio_n_ngrams_match_3": 1.048, "eval_wikibio_num_pred_words": 30.122, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.962215574756698, "eval_wikibio_pred_num_tokens": 55.6640625, "eval_wikibio_rouge_score": 0.34001650158245367, "eval_wikibio_runtime": 7.2558, "eval_wikibio_samples_per_second": 68.91, "eval_wikibio_steps_per_second": 0.138, "eval_wikibio_token_set_f1": 0.29675582828204017, "eval_wikibio_token_set_f1_sem": 0.0062596510158436265, "eval_wikibio_token_set_precision": 0.2891007207276478, "eval_wikibio_token_set_recall": 0.3230393369655439, "eval_wikibio_true_num_tokens": 61.1328125, "step": 837 }, { "epoch": 3.69, "eval_bias-bios_accuracy": 0.50478125, "eval_bias-bios_bleu_score": 17.50427548811937, "eval_bias-bios_bleu_score_sem": 0.8169224344931361, "eval_bias-bios_emb_cos_sim": 0.8618428111076355, "eval_bias-bios_emb_cos_sim_sem": 0.0035489455191403843, "eval_bias-bios_emb_top1_equal": 0.28200000524520874, "eval_bias-bios_emb_top1_equal_sem": 0.02014357168251164, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8244915008544922, "eval_bias-bios_n_ngrams_match_1": 19.316, "eval_bias-bios_n_ngrams_match_2": 8.598, "eval_bias-bios_n_ngrams_match_3": 4.876, "eval_bias-bios_num_pred_words": 32.384, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.199641704934617, "eval_bias-bios_pred_num_tokens": 43.8203125, "eval_bias-bios_rouge_score": 0.5388605533551043, "eval_bias-bios_runtime": 7.3982, "eval_bias-bios_samples_per_second": 67.584, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.5473429726394254, "eval_bias-bios_token_set_f1_sem": 0.006850989831666622, "eval_bias-bios_token_set_precision": 0.48906374168199385, "eval_bias-bios_token_set_recall": 0.6374659787111201, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 837 }, { "epoch": 3.7, "learning_rate": 0.001, "loss": 2.0441, "step": 840 }, { "epoch": 3.75, "learning_rate": 0.001, "loss": 2.0718, "step": 852 }, { "epoch": 3.81, "learning_rate": 0.001, "loss": 2.0151, "step": 864 }, { "epoch": 3.82, "eval_ag_news_accuracy": 0.3065, "eval_ag_news_bleu_score": 4.649906244640959, "eval_ag_news_bleu_score_sem": 0.15743135612355671, "eval_ag_news_emb_cos_sim": 0.8059832453727722, "eval_ag_news_emb_cos_sim_sem": 0.004423913304874249, "eval_ag_news_emb_top1_equal": 0.24799999594688416, "eval_ag_news_emb_top1_equal_sem": 0.01933234140950753, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5127530097961426, "eval_ag_news_n_ngrams_match_1": 12.54, "eval_ag_news_n_ngrams_match_2": 2.6, "eval_ag_news_n_ngrams_match_3": 0.696, "eval_ag_news_num_pred_words": 35.938, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.54047806247922, "eval_ag_news_pred_num_tokens": 53.4375, "eval_ag_news_rouge_score": 0.36159762379126176, "eval_ag_news_runtime": 7.3824, "eval_ag_news_samples_per_second": 67.729, "eval_ag_news_steps_per_second": 0.135, "eval_ag_news_token_set_f1": 0.3494382517083244, "eval_ag_news_token_set_f1_sem": 0.004868961435080832, "eval_ag_news_token_set_precision": 0.3109381587573618, "eval_ag_news_token_set_recall": 0.41859546357524646, "eval_ag_news_true_num_tokens": 56.09375, "step": 868 }, { "epoch": 3.82, "eval_anthropic_toxic_prompts_accuracy": 0.10953125, "eval_anthropic_toxic_prompts_bleu_score": 4.555906116852318, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.16559701222456658, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6958400011062622, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004614752992158636, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.164000004529953, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016575810354078253, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.9055869579315186, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.9, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.804, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.654, "eval_anthropic_toxic_prompts_num_pred_words": 31.814, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 18.27596772882499, "eval_anthropic_toxic_prompts_pred_num_tokens": 46.8984375, "eval_anthropic_toxic_prompts_rouge_score": 0.2803638376890342, "eval_anthropic_toxic_prompts_runtime": 8.594, "eval_anthropic_toxic_prompts_samples_per_second": 58.18, "eval_anthropic_toxic_prompts_steps_per_second": 0.116, "eval_anthropic_toxic_prompts_token_set_f1": 0.35203804284307416, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0059340870185835256, "eval_anthropic_toxic_prompts_token_set_precision": 0.43291347429398824, "eval_anthropic_toxic_prompts_token_set_recall": 0.3207922059739797, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 868 }, { "epoch": 3.82, "eval_arxiv_accuracy": 0.421, "eval_arxiv_bleu_score": 3.9870535349349185, "eval_arxiv_bleu_score_sem": 0.11244166499753491, "eval_arxiv_emb_cos_sim": 0.7511023879051208, "eval_arxiv_emb_cos_sim_sem": 0.005155412127449404, "eval_arxiv_emb_top1_equal": 0.20600000023841858, "eval_arxiv_emb_top1_equal_sem": 0.018104793612990725, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.038604736328125, "eval_arxiv_n_ngrams_match_1": 14.278, "eval_arxiv_n_ngrams_match_2": 2.6, "eval_arxiv_n_ngrams_match_3": 0.544, "eval_arxiv_num_pred_words": 33.842, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.876095247947585, "eval_arxiv_pred_num_tokens": 55.46875, "eval_arxiv_rouge_score": 0.3653415985199304, "eval_arxiv_runtime": 7.2009, "eval_arxiv_samples_per_second": 69.436, "eval_arxiv_steps_per_second": 0.139, "eval_arxiv_token_set_f1": 0.3664645739316681, "eval_arxiv_token_set_f1_sem": 0.0044187143685006655, "eval_arxiv_token_set_precision": 0.3076602694776184, "eval_arxiv_token_set_recall": 0.4728561521285042, "eval_arxiv_true_num_tokens": 64.0, "step": 868 }, { "epoch": 3.82, "eval_python_code_alpaca_accuracy": 0.15759375, "eval_python_code_alpaca_bleu_score": 6.532033427224206, "eval_python_code_alpaca_bleu_score_sem": 0.21411555875924942, "eval_python_code_alpaca_emb_cos_sim": 0.7827064394950867, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038833028167971654, "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.5585899353027344, "eval_python_code_alpaca_n_ngrams_match_1": 9.566, "eval_python_code_alpaca_n_ngrams_match_2": 2.766, "eval_python_code_alpaca_n_ngrams_match_3": 0.938, "eval_python_code_alpaca_num_pred_words": 30.694, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.917589830203067, "eval_python_code_alpaca_pred_num_tokens": 46.953125, "eval_python_code_alpaca_rouge_score": 0.4305157812205427, "eval_python_code_alpaca_runtime": 6.8722, "eval_python_code_alpaca_samples_per_second": 72.757, "eval_python_code_alpaca_steps_per_second": 0.146, "eval_python_code_alpaca_token_set_f1": 0.49402210084007936, "eval_python_code_alpaca_token_set_f1_sem": 0.005532471827537771, "eval_python_code_alpaca_token_set_precision": 0.539240847548361, "eval_python_code_alpaca_token_set_recall": 0.47278297846068446, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 868 }, { "epoch": 3.82, "eval_wikibio_accuracy": 0.36740625, "eval_wikibio_bleu_score": 5.1254217873397945, "eval_wikibio_bleu_score_sem": 0.21410165833187295, "eval_wikibio_emb_cos_sim": 0.7113275527954102, "eval_wikibio_emb_cos_sim_sem": 0.0068368860752584535, "eval_wikibio_emb_top1_equal": 0.15600000321865082, "eval_wikibio_emb_top1_equal_sem": 0.01624363651663569, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.370696783065796, "eval_wikibio_n_ngrams_match_1": 8.212, "eval_wikibio_n_ngrams_match_2": 2.554, "eval_wikibio_n_ngrams_match_3": 0.912, "eval_wikibio_num_pred_words": 29.378, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 29.098795543578024, "eval_wikibio_pred_num_tokens": 58.8515625, "eval_wikibio_rouge_score": 0.30634676609525147, "eval_wikibio_runtime": 7.0087, "eval_wikibio_samples_per_second": 71.34, "eval_wikibio_steps_per_second": 0.143, "eval_wikibio_token_set_f1": 0.2760021469603922, "eval_wikibio_token_set_f1_sem": 0.006655398358510342, "eval_wikibio_token_set_precision": 0.2669645781576738, "eval_wikibio_token_set_recall": 0.3113305789003813, "eval_wikibio_true_num_tokens": 61.1328125, "step": 868 }, { "epoch": 3.82, "eval_bias-bios_accuracy": 0.51403125, "eval_bias-bios_bleu_score": 18.309380581053627, "eval_bias-bios_bleu_score_sem": 0.7998042611430283, "eval_bias-bios_emb_cos_sim": 0.8731557130813599, "eval_bias-bios_emb_cos_sim_sem": 0.0032644259591032673, "eval_bias-bios_emb_top1_equal": 0.30399999022483826, "eval_bias-bios_emb_top1_equal_sem": 0.020591649838958805, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.78922438621521, "eval_bias-bios_n_ngrams_match_1": 21.338, "eval_bias-bios_n_ngrams_match_2": 9.45, "eval_bias-bios_n_ngrams_match_3": 5.204, "eval_bias-bios_num_pred_words": 39.284, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.984808765578816, "eval_bias-bios_pred_num_tokens": 53.078125, "eval_bias-bios_rouge_score": 0.5363434462187412, "eval_bias-bios_runtime": 7.3659, "eval_bias-bios_samples_per_second": 67.88, "eval_bias-bios_steps_per_second": 0.136, "eval_bias-bios_token_set_f1": 0.5546380520588752, "eval_bias-bios_token_set_f1_sem": 0.006561511146551417, "eval_bias-bios_token_set_precision": 0.519204443494721, "eval_bias-bios_token_set_recall": 0.608071169462653, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 868 }, { "epoch": 3.86, "learning_rate": 0.001, "loss": 1.8108, "step": 876 }, { "epoch": 3.91, "learning_rate": 0.001, "loss": 1.9904, "step": 888 }, { "epoch": 3.96, "eval_ag_news_accuracy": 0.30365625, "eval_ag_news_bleu_score": 4.70485632940363, "eval_ag_news_bleu_score_sem": 0.1545656516010875, "eval_ag_news_emb_cos_sim": 0.8099994659423828, "eval_ag_news_emb_cos_sim_sem": 0.004615151499470739, "eval_ag_news_emb_top1_equal": 0.26600000262260437, "eval_ag_news_emb_top1_equal_sem": 0.01978055817719369, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.51027250289917, "eval_ag_news_n_ngrams_match_1": 13.276, "eval_ag_news_n_ngrams_match_2": 2.766, "eval_ag_news_n_ngrams_match_3": 0.722, "eval_ag_news_num_pred_words": 41.458, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.457383775898954, "eval_ag_news_pred_num_tokens": 61.1484375, "eval_ag_news_rouge_score": 0.3576016763421582, "eval_ag_news_runtime": 7.2607, "eval_ag_news_samples_per_second": 68.864, "eval_ag_news_steps_per_second": 0.138, "eval_ag_news_token_set_f1": 0.3483609027032103, "eval_ag_news_token_set_f1_sem": 0.004683975440046473, "eval_ag_news_token_set_precision": 0.32529803711985894, "eval_ag_news_token_set_recall": 0.3941194852067757, "eval_ag_news_true_num_tokens": 56.09375, "step": 899 }, { "epoch": 3.96, "eval_anthropic_toxic_prompts_accuracy": 0.10740625, "eval_anthropic_toxic_prompts_bleu_score": 3.6769418625327526, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.133806955295666, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6916981339454651, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00461967735634389, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055010489467818, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.042083978652954, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.084, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.976, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788, "eval_anthropic_toxic_prompts_num_pred_words": 40.926, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.9488547426713, "eval_anthropic_toxic_prompts_pred_num_tokens": 57.859375, "eval_anthropic_toxic_prompts_rouge_score": 0.2433747464426228, "eval_anthropic_toxic_prompts_runtime": 47.0283, "eval_anthropic_toxic_prompts_samples_per_second": 10.632, "eval_anthropic_toxic_prompts_steps_per_second": 0.021, "eval_anthropic_toxic_prompts_token_set_f1": 0.3355777043880606, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00582801703663308, "eval_anthropic_toxic_prompts_token_set_precision": 0.4417152321391377, "eval_anthropic_toxic_prompts_token_set_recall": 0.2954359544567566, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 899 }, { "epoch": 3.96, "eval_arxiv_accuracy": 0.42559375, "eval_arxiv_bleu_score": 4.2724707434332805, "eval_arxiv_bleu_score_sem": 0.12141510014680776, "eval_arxiv_emb_cos_sim": 0.7448193430900574, "eval_arxiv_emb_cos_sim_sem": 0.005906011645397703, "eval_arxiv_emb_top1_equal": 0.257999986410141, "eval_arxiv_emb_top1_equal_sem": 0.019586711692263472, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.988633871078491, "eval_arxiv_n_ngrams_match_1": 14.812, "eval_arxiv_n_ngrams_match_2": 2.814, "eval_arxiv_n_ngrams_match_3": 0.622, "eval_arxiv_num_pred_words": 37.656, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.858534633548246, "eval_arxiv_pred_num_tokens": 61.75, "eval_arxiv_rouge_score": 0.3571695577267662, "eval_arxiv_runtime": 11.6894, "eval_arxiv_samples_per_second": 42.774, "eval_arxiv_steps_per_second": 0.086, "eval_arxiv_token_set_f1": 0.3610709746435564, "eval_arxiv_token_set_f1_sem": 0.00493953188589629, "eval_arxiv_token_set_precision": 0.31268383336107497, "eval_arxiv_token_set_recall": 0.4499879942866487, "eval_arxiv_true_num_tokens": 64.0, "step": 899 }, { "epoch": 3.96, "eval_python_code_alpaca_accuracy": 0.153, "eval_python_code_alpaca_bleu_score": 5.200061694757475, "eval_python_code_alpaca_bleu_score_sem": 0.17449763706947194, "eval_python_code_alpaca_emb_cos_sim": 0.7678459882736206, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037719813318817415, "eval_python_code_alpaca_emb_top1_equal": 0.1979999989271164, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017838958581409683, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7135307788848877, "eval_python_code_alpaca_n_ngrams_match_1": 9.646, "eval_python_code_alpaca_n_ngrams_match_2": 2.78, "eval_python_code_alpaca_n_ngrams_match_3": 0.904, "eval_python_code_alpaca_num_pred_words": 37.516, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.082434354347528, "eval_python_code_alpaca_pred_num_tokens": 58.8828125, "eval_python_code_alpaca_rouge_score": 0.3818168407275595, "eval_python_code_alpaca_runtime": 7.6206, "eval_python_code_alpaca_samples_per_second": 65.611, "eval_python_code_alpaca_steps_per_second": 0.131, "eval_python_code_alpaca_token_set_f1": 0.46320618527919866, "eval_python_code_alpaca_token_set_f1_sem": 0.0054899797669887875, "eval_python_code_alpaca_token_set_precision": 0.536733945796312, "eval_python_code_alpaca_token_set_recall": 0.42535964460210746, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 899 }, { "epoch": 3.96, "eval_wikibio_accuracy": 0.371625, "eval_wikibio_bleu_score": 4.961991532761174, "eval_wikibio_bleu_score_sem": 0.19391037642899922, "eval_wikibio_emb_cos_sim": 0.710414469242096, "eval_wikibio_emb_cos_sim_sem": 0.006885463983323721, "eval_wikibio_emb_top1_equal": 0.1679999977350235, "eval_wikibio_emb_top1_equal_sem": 0.016736554076096456, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3331379890441895, "eval_wikibio_n_ngrams_match_1": 8.558, "eval_wikibio_n_ngrams_match_2": 2.624, "eval_wikibio_n_ngrams_match_3": 0.958, "eval_wikibio_num_pred_words": 32.418, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.02614961148761, "eval_wikibio_pred_num_tokens": 62.796875, "eval_wikibio_rouge_score": 0.301097672734814, "eval_wikibio_runtime": 7.0138, "eval_wikibio_samples_per_second": 71.288, "eval_wikibio_steps_per_second": 0.143, "eval_wikibio_token_set_f1": 0.27633025678840173, "eval_wikibio_token_set_f1_sem": 0.0066326804022172215, "eval_wikibio_token_set_precision": 0.27362565948405515, "eval_wikibio_token_set_recall": 0.30748851633911084, "eval_wikibio_true_num_tokens": 61.1328125, "step": 899 }, { "epoch": 3.96, "eval_bias-bios_accuracy": 0.5096875, "eval_bias-bios_bleu_score": 17.551685060770186, "eval_bias-bios_bleu_score_sem": 0.7535367337734199, "eval_bias-bios_emb_cos_sim": 0.8744969964027405, "eval_bias-bios_emb_cos_sim_sem": 0.0031596961716421285, "eval_bias-bios_emb_top1_equal": 0.33799999952316284, "eval_bias-bios_emb_top1_equal_sem": 0.02117566563684607, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8095989227294922, "eval_bias-bios_n_ngrams_match_1": 22.328, "eval_bias-bios_n_ngrams_match_2": 9.944, "eval_bias-bios_n_ngrams_match_3": 5.458, "eval_bias-bios_num_pred_words": 45.576, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.107997162059592, "eval_bias-bios_pred_num_tokens": 60.453125, "eval_bias-bios_rouge_score": 0.5208868368337067, "eval_bias-bios_runtime": 7.3719, "eval_bias-bios_samples_per_second": 67.825, "eval_bias-bios_steps_per_second": 0.136, "eval_bias-bios_token_set_f1": 0.5481558626317612, "eval_bias-bios_token_set_f1_sem": 0.0065506123204596995, "eval_bias-bios_token_set_precision": 0.5357022644757441, "eval_bias-bios_token_set_recall": 0.5720472461620488, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 899 }, { "epoch": 3.96, "learning_rate": 0.001, "loss": 2.0314, "step": 900 }, { "epoch": 4.02, "learning_rate": 0.001, "loss": 1.879, "step": 912 }, { "epoch": 4.07, "learning_rate": 0.001, "loss": 2.1048, "step": 924 }, { "epoch": 4.1, "eval_ag_news_accuracy": 0.30309375, "eval_ag_news_bleu_score": 4.649942117770666, "eval_ag_news_bleu_score_sem": 0.1556338609252372, "eval_ag_news_emb_cos_sim": 0.8118283152580261, "eval_ag_news_emb_cos_sim_sem": 0.004582791772767747, "eval_ag_news_emb_top1_equal": 0.2639999985694885, "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.52583646774292, "eval_ag_news_n_ngrams_match_1": 13.382, "eval_ag_news_n_ngrams_match_2": 2.81, "eval_ag_news_n_ngrams_match_3": 0.714, "eval_ag_news_num_pred_words": 41.96, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.98218673429963, "eval_ag_news_pred_num_tokens": 62.3828125, "eval_ag_news_rouge_score": 0.3575831135786729, "eval_ag_news_runtime": 7.3077, "eval_ag_news_samples_per_second": 68.421, "eval_ag_news_steps_per_second": 0.137, "eval_ag_news_token_set_f1": 0.3497080600987024, "eval_ag_news_token_set_f1_sem": 0.004748773824398774, "eval_ag_news_token_set_precision": 0.32643981583812415, "eval_ag_news_token_set_recall": 0.40006084076159165, "eval_ag_news_true_num_tokens": 56.09375, "step": 930 }, { "epoch": 4.1, "eval_anthropic_toxic_prompts_accuracy": 0.10728125, "eval_anthropic_toxic_prompts_bleu_score": 3.624649536553562, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12608760768171234, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.698235034942627, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004364685988613237, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.06230092048645, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.266, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768, "eval_anthropic_toxic_prompts_num_pred_words": 42.214, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 21.376686675103247, "eval_anthropic_toxic_prompts_pred_num_tokens": 60.625, "eval_anthropic_toxic_prompts_rouge_score": 0.2454785616967755, "eval_anthropic_toxic_prompts_runtime": 7.1632, "eval_anthropic_toxic_prompts_samples_per_second": 69.801, "eval_anthropic_toxic_prompts_steps_per_second": 0.14, "eval_anthropic_toxic_prompts_token_set_f1": 0.3365336026554279, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005508481783304389, "eval_anthropic_toxic_prompts_token_set_precision": 0.4560143105044075, "eval_anthropic_toxic_prompts_token_set_recall": 0.28870884584713874, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 930 }, { "epoch": 4.1, "eval_arxiv_accuracy": 0.42790625, "eval_arxiv_bleu_score": 4.381704552162308, "eval_arxiv_bleu_score_sem": 0.12756008834149637, "eval_arxiv_emb_cos_sim": 0.7607366442680359, "eval_arxiv_emb_cos_sim_sem": 0.005076740919718705, "eval_arxiv_emb_top1_equal": 0.28600001335144043, "eval_arxiv_emb_top1_equal_sem": 0.020229345383440313, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.9774320125579834, "eval_arxiv_n_ngrams_match_1": 15.328, "eval_arxiv_n_ngrams_match_2": 2.892, "eval_arxiv_n_ngrams_match_3": 0.634, "eval_arxiv_num_pred_words": 38.774, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.63732343956472, "eval_arxiv_pred_num_tokens": 62.8515625, "eval_arxiv_rouge_score": 0.36766133482315777, "eval_arxiv_runtime": 7.3221, "eval_arxiv_samples_per_second": 68.287, "eval_arxiv_steps_per_second": 0.137, "eval_arxiv_token_set_f1": 0.37072349741716376, "eval_arxiv_token_set_f1_sem": 0.004570708041320284, "eval_arxiv_token_set_precision": 0.31969291145012907, "eval_arxiv_token_set_recall": 0.4626230168900101, "eval_arxiv_true_num_tokens": 64.0, "step": 930 }, { "epoch": 4.1, "eval_python_code_alpaca_accuracy": 0.15403125, "eval_python_code_alpaca_bleu_score": 5.122233724835081, "eval_python_code_alpaca_bleu_score_sem": 0.15628240003499672, "eval_python_code_alpaca_emb_cos_sim": 0.777225136756897, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003568612987872965, "eval_python_code_alpaca_emb_top1_equal": 0.17599999904632568, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017047853594066943, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.705463409423828, "eval_python_code_alpaca_n_ngrams_match_1": 9.852, "eval_python_code_alpaca_n_ngrams_match_2": 2.842, "eval_python_code_alpaca_n_ngrams_match_3": 0.954, "eval_python_code_alpaca_num_pred_words": 38.956, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.961248267762413, "eval_python_code_alpaca_pred_num_tokens": 61.1015625, "eval_python_code_alpaca_rouge_score": 0.38312828213368866, "eval_python_code_alpaca_runtime": 9.9902, "eval_python_code_alpaca_samples_per_second": 50.049, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.46499631424575827, "eval_python_code_alpaca_token_set_f1_sem": 0.00515758919249843, "eval_python_code_alpaca_token_set_precision": 0.5498324800101624, "eval_python_code_alpaca_token_set_recall": 0.420151556893869, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 930 }, { "epoch": 4.1, "eval_wikibio_accuracy": 0.3703125, "eval_wikibio_bleu_score": 5.0295081586948, "eval_wikibio_bleu_score_sem": 0.2129816091894679, "eval_wikibio_emb_cos_sim": 0.7173275351524353, "eval_wikibio_emb_cos_sim_sem": 0.006135841074850147, "eval_wikibio_emb_top1_equal": 0.1459999978542328, "eval_wikibio_emb_top1_equal_sem": 0.015807205702664997, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.323803186416626, "eval_wikibio_n_ngrams_match_1": 8.628, "eval_wikibio_n_ngrams_match_2": 2.744, "eval_wikibio_n_ngrams_match_3": 0.998, "eval_wikibio_num_pred_words": 31.958, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.76574832467491, "eval_wikibio_pred_num_tokens": 62.890625, "eval_wikibio_rouge_score": 0.3016818815426825, "eval_wikibio_runtime": 7.2876, "eval_wikibio_samples_per_second": 68.61, "eval_wikibio_steps_per_second": 0.137, "eval_wikibio_token_set_f1": 0.2809928597513778, "eval_wikibio_token_set_f1_sem": 0.006686818517156172, "eval_wikibio_token_set_precision": 0.2752523175790538, "eval_wikibio_token_set_recall": 0.31501951298363845, "eval_wikibio_true_num_tokens": 61.1328125, "step": 930 }, { "epoch": 4.1, "eval_bias-bios_accuracy": 0.51159375, "eval_bias-bios_bleu_score": 17.560732357402014, "eval_bias-bios_bleu_score_sem": 0.7532225324274616, "eval_bias-bios_emb_cos_sim": 0.8785954713821411, "eval_bias-bios_emb_cos_sim_sem": 0.0027169508826951332, "eval_bias-bios_emb_top1_equal": 0.36000001430511475, "eval_bias-bios_emb_top1_equal_sem": 0.021487751507037762, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7983323335647583, "eval_bias-bios_n_ngrams_match_1": 22.614, "eval_bias-bios_n_ngrams_match_2": 10.156, "eval_bias-bios_n_ngrams_match_3": 5.602, "eval_bias-bios_num_pred_words": 46.644, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.0395670780886865, "eval_bias-bios_pred_num_tokens": 62.125, "eval_bias-bios_rouge_score": 0.523223725376486, "eval_bias-bios_runtime": 7.4795, "eval_bias-bios_samples_per_second": 66.85, "eval_bias-bios_steps_per_second": 0.134, "eval_bias-bios_token_set_f1": 0.5539285540120094, "eval_bias-bios_token_set_f1_sem": 0.0064311476985358445, "eval_bias-bios_token_set_precision": 0.5429808276328268, "eval_bias-bios_token_set_recall": 0.5750748467740818, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 930 }, { "epoch": 4.12, "learning_rate": 0.001, "loss": 2.0386, "step": 936 }, { "epoch": 4.18, "learning_rate": 0.001, "loss": 1.8774, "step": 948 }, { "epoch": 4.23, "learning_rate": 0.001, "loss": 1.769, "step": 960 }, { "epoch": 4.23, "eval_ag_news_accuracy": 0.30234375, "eval_ag_news_bleu_score": 4.147391091922028, "eval_ag_news_bleu_score_sem": 0.15238698424262714, "eval_ag_news_emb_cos_sim": 0.7980059385299683, "eval_ag_news_emb_cos_sim_sem": 0.004560917853014478, "eval_ag_news_emb_top1_equal": 0.27000001072883606, "eval_ag_news_emb_top1_equal_sem": 0.019874356669179787, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.63006329536438, "eval_ag_news_n_ngrams_match_1": 10.884, "eval_ag_news_n_ngrams_match_2": 2.304, "eval_ag_news_n_ngrams_match_3": 0.594, "eval_ag_news_num_pred_words": 26.08, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.715203739197406, "eval_ag_news_pred_num_tokens": 38.3046875, "eval_ag_news_rouge_score": 0.36068826779136964, "eval_ag_news_runtime": 7.2741, "eval_ag_news_samples_per_second": 68.737, "eval_ag_news_steps_per_second": 0.137, "eval_ag_news_token_set_f1": 0.34376003652816817, "eval_ag_news_token_set_f1_sem": 0.004950570396033107, "eval_ag_news_token_set_precision": 0.285037592368919, "eval_ag_news_token_set_recall": 0.45211257290244156, "eval_ag_news_true_num_tokens": 56.09375, "step": 961 }, { "epoch": 4.23, "eval_anthropic_toxic_prompts_accuracy": 0.11275, "eval_anthropic_toxic_prompts_bleu_score": 6.354488424332666, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.23522810593773286, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7129065990447998, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004523031336356766, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14800000190734863, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015896458012572223, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.8465847969055176, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.62, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.794, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694, "eval_anthropic_toxic_prompts_num_pred_words": 22.834, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 17.22884125884587, "eval_anthropic_toxic_prompts_pred_num_tokens": 32.046875, "eval_anthropic_toxic_prompts_rouge_score": 0.33907338462194764, "eval_anthropic_toxic_prompts_runtime": 6.8732, "eval_anthropic_toxic_prompts_samples_per_second": 72.747, "eval_anthropic_toxic_prompts_steps_per_second": 0.145, "eval_anthropic_toxic_prompts_token_set_f1": 0.3653409896107974, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006149537629688382, "eval_anthropic_toxic_prompts_token_set_precision": 0.4229459544561599, "eval_anthropic_toxic_prompts_token_set_recall": 0.34831732469949417, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 961 }, { "epoch": 4.23, "eval_arxiv_accuracy": 0.4078125, "eval_arxiv_bleu_score": 3.3433365195319773, "eval_arxiv_bleu_score_sem": 0.0999958730659908, "eval_arxiv_emb_cos_sim": 0.748033881187439, "eval_arxiv_emb_cos_sim_sem": 0.004871760220456363, "eval_arxiv_emb_top1_equal": 0.17000000178813934, "eval_arxiv_emb_top1_equal_sem": 0.016815633120741882, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.133113384246826, "eval_arxiv_n_ngrams_match_1": 12.732, "eval_arxiv_n_ngrams_match_2": 2.336, "eval_arxiv_n_ngrams_match_3": 0.508, "eval_arxiv_num_pred_words": 26.09, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.945306005739177, "eval_arxiv_pred_num_tokens": 41.4140625, "eval_arxiv_rouge_score": 0.3638914135343809, "eval_arxiv_runtime": 52.9647, "eval_arxiv_samples_per_second": 9.44, "eval_arxiv_steps_per_second": 0.019, "eval_arxiv_token_set_f1": 0.3630713709509955, "eval_arxiv_token_set_f1_sem": 0.0044595886226552126, "eval_arxiv_token_set_precision": 0.2937237459695398, "eval_arxiv_token_set_recall": 0.4890969392190314, "eval_arxiv_true_num_tokens": 64.0, "step": 961 }, { "epoch": 4.23, "eval_python_code_alpaca_accuracy": 0.16284375, "eval_python_code_alpaca_bleu_score": 8.244648427163419, "eval_python_code_alpaca_bleu_score_sem": 0.27193364945369436, "eval_python_code_alpaca_emb_cos_sim": 0.7976054549217224, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00378482253029405, "eval_python_code_alpaca_emb_top1_equal": 0.2160000056028366, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018421909471797383, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.4601211547851562, "eval_python_code_alpaca_n_ngrams_match_1": 8.85, "eval_python_code_alpaca_n_ngrams_match_2": 2.302, "eval_python_code_alpaca_n_ngrams_match_3": 0.768, "eval_python_code_alpaca_num_pred_words": 21.24, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.706229719816177, "eval_python_code_alpaca_pred_num_tokens": 32.0703125, "eval_python_code_alpaca_rouge_score": 0.4873818187316993, "eval_python_code_alpaca_runtime": 6.9102, "eval_python_code_alpaca_samples_per_second": 72.357, "eval_python_code_alpaca_steps_per_second": 0.145, "eval_python_code_alpaca_token_set_f1": 0.5018531295912411, "eval_python_code_alpaca_token_set_f1_sem": 0.005550867674810159, "eval_python_code_alpaca_token_set_precision": 0.5139247919672837, "eval_python_code_alpaca_token_set_recall": 0.5075546170413242, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 961 }, { "epoch": 4.23, "eval_wikibio_accuracy": 0.35096875, "eval_wikibio_bleu_score": 6.137347588754137, "eval_wikibio_bleu_score_sem": 0.24090041823655922, "eval_wikibio_emb_cos_sim": 0.7398593425750732, "eval_wikibio_emb_cos_sim_sem": 0.0057534999648952215, "eval_wikibio_emb_top1_equal": 0.1679999977350235, "eval_wikibio_emb_top1_equal_sem": 0.016736554076096456, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5328280925750732, "eval_wikibio_n_ngrams_match_1": 8.75, "eval_wikibio_n_ngrams_match_2": 2.724, "eval_wikibio_n_ngrams_match_3": 1.0, "eval_wikibio_num_pred_words": 27.262, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 34.22060994651517, "eval_wikibio_pred_num_tokens": 48.1953125, "eval_wikibio_rouge_score": 0.3567907867484968, "eval_wikibio_runtime": 8.1224, "eval_wikibio_samples_per_second": 61.558, "eval_wikibio_steps_per_second": 0.123, "eval_wikibio_token_set_f1": 0.3103392604417974, "eval_wikibio_token_set_f1_sem": 0.005936015862774939, "eval_wikibio_token_set_precision": 0.29465665802898156, "eval_wikibio_token_set_recall": 0.34358783712012403, "eval_wikibio_true_num_tokens": 61.1328125, "step": 961 }, { "epoch": 4.23, "eval_bias-bios_accuracy": 0.506, "eval_bias-bios_bleu_score": 16.979589075313555, "eval_bias-bios_bleu_score_sem": 0.8283281982214029, "eval_bias-bios_emb_cos_sim": 0.8586215376853943, "eval_bias-bios_emb_cos_sim_sem": 0.003312062910103837, "eval_bias-bios_emb_top1_equal": 0.28200000524520874, "eval_bias-bios_emb_top1_equal_sem": 0.02014357434811239, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.8640848398208618, "eval_bias-bios_n_ngrams_match_1": 18.688, "eval_bias-bios_n_ngrams_match_2": 8.448, "eval_bias-bios_n_ngrams_match_3": 4.798, "eval_bias-bios_num_pred_words": 29.634, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.45003037320653, "eval_bias-bios_pred_num_tokens": 39.71875, "eval_bias-bios_rouge_score": 0.5397636174739893, "eval_bias-bios_runtime": 7.1597, "eval_bias-bios_samples_per_second": 69.836, "eval_bias-bios_steps_per_second": 0.14, "eval_bias-bios_token_set_f1": 0.5489658834860061, "eval_bias-bios_token_set_f1_sem": 0.006750833075212532, "eval_bias-bios_token_set_precision": 0.48311541662210883, "eval_bias-bios_token_set_recall": 0.6515866266764235, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 961 }, { "epoch": 4.28, "learning_rate": 0.001, "loss": 2.1033, "step": 972 }, { "epoch": 4.33, "learning_rate": 0.001, "loss": 2.0322, "step": 984 }, { "epoch": 4.37, "eval_ag_news_accuracy": 0.302625, "eval_ag_news_bleu_score": 4.721562311279806, "eval_ag_news_bleu_score_sem": 0.15220658679469298, "eval_ag_news_emb_cos_sim": 0.8130433559417725, "eval_ag_news_emb_cos_sim_sem": 0.004464113896182924, "eval_ag_news_emb_top1_equal": 0.27399998903274536, "eval_ag_news_emb_top1_equal_sem": 0.019966103981388875, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5447354316711426, "eval_ag_news_n_ngrams_match_1": 13.134, "eval_ag_news_n_ngrams_match_2": 2.762, "eval_ag_news_n_ngrams_match_3": 0.746, "eval_ag_news_num_pred_words": 39.284, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.63052199081744, "eval_ag_news_pred_num_tokens": 59.6953125, "eval_ag_news_rouge_score": 0.3646941890955627, "eval_ag_news_runtime": 7.3132, "eval_ag_news_samples_per_second": 68.369, "eval_ag_news_steps_per_second": 0.137, "eval_ag_news_token_set_f1": 0.35109915978400064, "eval_ag_news_token_set_f1_sem": 0.0047277102046728026, "eval_ag_news_token_set_precision": 0.3224770229379847, "eval_ag_news_token_set_recall": 0.4050023141334169, "eval_ag_news_true_num_tokens": 56.09375, "step": 992 }, { "epoch": 4.37, "eval_anthropic_toxic_prompts_accuracy": 0.10625, "eval_anthropic_toxic_prompts_bleu_score": 3.857535866643261, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1364815254506636, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6963840126991272, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004629406799080843, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15399999916553497, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016158283980625493, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0389821529388428, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.056, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.866, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.698, "eval_anthropic_toxic_prompts_num_pred_words": 37.574, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.883975720078084, "eval_anthropic_toxic_prompts_pred_num_tokens": 55.6796875, "eval_anthropic_toxic_prompts_rouge_score": 0.25629776628720247, "eval_anthropic_toxic_prompts_runtime": 6.8668, "eval_anthropic_toxic_prompts_samples_per_second": 72.815, "eval_anthropic_toxic_prompts_steps_per_second": 0.146, "eval_anthropic_toxic_prompts_token_set_f1": 0.34453358271344703, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005626971181626931, "eval_anthropic_toxic_prompts_token_set_precision": 0.443931261076189, "eval_anthropic_toxic_prompts_token_set_recall": 0.3034545321130603, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 992 }, { "epoch": 4.37, "eval_arxiv_accuracy": 0.42146875, "eval_arxiv_bleu_score": 4.467109503010037, "eval_arxiv_bleu_score_sem": 0.12513951968969245, "eval_arxiv_emb_cos_sim": 0.7589080929756165, "eval_arxiv_emb_cos_sim_sem": 0.0052892959235117465, "eval_arxiv_emb_top1_equal": 0.2759999930858612, "eval_arxiv_emb_top1_equal_sem": 0.02001121794127971, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0264053344726562, "eval_arxiv_n_ngrams_match_1": 15.18, "eval_arxiv_n_ngrams_match_2": 2.974, "eval_arxiv_n_ngrams_match_3": 0.668, "eval_arxiv_num_pred_words": 36.86, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.622966521695513, "eval_arxiv_pred_num_tokens": 59.8984375, "eval_arxiv_rouge_score": 0.3693511442153703, "eval_arxiv_runtime": 7.306, "eval_arxiv_samples_per_second": 68.436, "eval_arxiv_steps_per_second": 0.137, "eval_arxiv_token_set_f1": 0.36958194757201546, "eval_arxiv_token_set_f1_sem": 0.0045242802112849066, "eval_arxiv_token_set_precision": 0.32082253661436355, "eval_arxiv_token_set_recall": 0.4518457470546488, "eval_arxiv_true_num_tokens": 64.0, "step": 992 }, { "epoch": 4.37, "eval_python_code_alpaca_accuracy": 0.1514375, "eval_python_code_alpaca_bleu_score": 5.4034348745502045, "eval_python_code_alpaca_bleu_score_sem": 0.16839146425841325, "eval_python_code_alpaca_emb_cos_sim": 0.7750210762023926, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003707060625619144, "eval_python_code_alpaca_emb_top1_equal": 0.20999999344348907, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018233622097230975, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.709430694580078, "eval_python_code_alpaca_n_ngrams_match_1": 9.768, "eval_python_code_alpaca_n_ngrams_match_2": 2.758, "eval_python_code_alpaca_n_ngrams_match_3": 0.88, "eval_python_code_alpaca_num_pred_words": 35.508, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.02072170196434, "eval_python_code_alpaca_pred_num_tokens": 56.53125, "eval_python_code_alpaca_rouge_score": 0.39803901244698126, "eval_python_code_alpaca_runtime": 6.8684, "eval_python_code_alpaca_samples_per_second": 72.797, "eval_python_code_alpaca_steps_per_second": 0.146, "eval_python_code_alpaca_token_set_f1": 0.47170758684628605, "eval_python_code_alpaca_token_set_f1_sem": 0.005351354049134305, "eval_python_code_alpaca_token_set_precision": 0.5434103449240332, "eval_python_code_alpaca_token_set_recall": 0.43454752980053024, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 992 }, { "epoch": 4.37, "eval_wikibio_accuracy": 0.36184375, "eval_wikibio_bleu_score": 5.358936275570008, "eval_wikibio_bleu_score_sem": 0.21306544598498858, "eval_wikibio_emb_cos_sim": 0.7233623266220093, "eval_wikibio_emb_cos_sim_sem": 0.006553808605227006, "eval_wikibio_emb_top1_equal": 0.18799999356269836, "eval_wikibio_emb_top1_equal_sem": 0.017490679184236527, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4013521671295166, "eval_wikibio_n_ngrams_match_1": 8.566, "eval_wikibio_n_ngrams_match_2": 2.716, "eval_wikibio_n_ngrams_match_3": 1.036, "eval_wikibio_num_pred_words": 31.162, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 30.004643923417493, "eval_wikibio_pred_num_tokens": 61.953125, "eval_wikibio_rouge_score": 0.31111536849106736, "eval_wikibio_runtime": 7.0887, "eval_wikibio_samples_per_second": 70.535, "eval_wikibio_steps_per_second": 0.141, "eval_wikibio_token_set_f1": 0.28584804623663285, "eval_wikibio_token_set_f1_sem": 0.006453280164360098, "eval_wikibio_token_set_precision": 0.27848457039702645, "eval_wikibio_token_set_recall": 0.3193362754800391, "eval_wikibio_true_num_tokens": 61.1328125, "step": 992 }, { "epoch": 4.37, "eval_bias-bios_accuracy": 0.51265625, "eval_bias-bios_bleu_score": 17.964736092698793, "eval_bias-bios_bleu_score_sem": 0.7466608259028716, "eval_bias-bios_emb_cos_sim": 0.8767092823982239, "eval_bias-bios_emb_cos_sim_sem": 0.0030343106439704023, "eval_bias-bios_emb_top1_equal": 0.3580000102519989, "eval_bias-bios_emb_top1_equal_sem": 0.021461435363634866, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7962360382080078, "eval_bias-bios_n_ngrams_match_1": 22.106, "eval_bias-bios_n_ngrams_match_2": 9.886, "eval_bias-bios_n_ngrams_match_3": 5.454, "eval_bias-bios_num_pred_words": 43.802, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.026919622698665, "eval_bias-bios_pred_num_tokens": 59.5234375, "eval_bias-bios_rouge_score": 0.526119687478555, "eval_bias-bios_runtime": 7.8302, "eval_bias-bios_samples_per_second": 63.855, "eval_bias-bios_steps_per_second": 0.128, "eval_bias-bios_token_set_f1": 0.5506974926133947, "eval_bias-bios_token_set_f1_sem": 0.006418396953704471, "eval_bias-bios_token_set_precision": 0.5331520331431275, "eval_bias-bios_token_set_recall": 0.5807658588551965, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 992 }, { "epoch": 4.39, "learning_rate": 0.001, "loss": 1.9006, "step": 996 }, { "epoch": 4.44, "learning_rate": 0.001, "loss": 1.6887, "step": 1008 }, { "epoch": 4.49, "learning_rate": 0.001, "loss": 2.1588, "step": 1020 }, { "epoch": 4.51, "eval_ag_news_accuracy": 0.3020625, "eval_ag_news_bleu_score": 4.7932275818087815, "eval_ag_news_bleu_score_sem": 0.15605911700718159, "eval_ag_news_emb_cos_sim": 0.8050810694694519, "eval_ag_news_emb_cos_sim_sem": 0.004921948817774727, "eval_ag_news_emb_top1_equal": 0.25200000405311584, "eval_ag_news_emb_top1_equal_sem": 0.019435728067390842, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.521515130996704, "eval_ag_news_n_ngrams_match_1": 13.042, "eval_ag_news_n_ngrams_match_2": 2.88, "eval_ag_news_n_ngrams_match_3": 0.808, "eval_ag_news_num_pred_words": 40.654, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.8356550963507, "eval_ag_news_pred_num_tokens": 60.2109375, "eval_ag_news_rouge_score": 0.3544122061798759, "eval_ag_news_runtime": 7.6014, "eval_ag_news_samples_per_second": 65.777, "eval_ag_news_steps_per_second": 0.132, "eval_ag_news_token_set_f1": 0.34653211780222726, "eval_ag_news_token_set_f1_sem": 0.004903302915032384, "eval_ag_news_token_set_precision": 0.3193018972552045, "eval_ag_news_token_set_recall": 0.40219067047503776, "eval_ag_news_true_num_tokens": 56.09375, "step": 1023 }, { "epoch": 4.51, "eval_anthropic_toxic_prompts_accuracy": 0.1068125, "eval_anthropic_toxic_prompts_bleu_score": 3.862666125219136, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14053569940919364, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6820381283760071, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005041296759748043, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13199999928474426, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015152928667412809, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.027169704437256, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.89, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.836, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736, "eval_anthropic_toxic_prompts_num_pred_words": 38.55, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.63873612402303, "eval_anthropic_toxic_prompts_pred_num_tokens": 55.7109375, "eval_anthropic_toxic_prompts_rouge_score": 0.24442206374960782, "eval_anthropic_toxic_prompts_runtime": 7.0663, "eval_anthropic_toxic_prompts_samples_per_second": 70.758, "eval_anthropic_toxic_prompts_steps_per_second": 0.142, "eval_anthropic_toxic_prompts_token_set_f1": 0.3398768275342579, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005639698117516144, "eval_anthropic_toxic_prompts_token_set_precision": 0.4271537549157317, "eval_anthropic_toxic_prompts_token_set_recall": 0.30831531114690275, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1023 }, { "epoch": 4.51, "eval_arxiv_accuracy": 0.42240625, "eval_arxiv_bleu_score": 4.171253458571327, "eval_arxiv_bleu_score_sem": 0.11767789407650925, "eval_arxiv_emb_cos_sim": 0.7384297251701355, "eval_arxiv_emb_cos_sim_sem": 0.005850672441030442, "eval_arxiv_emb_top1_equal": 0.2540000081062317, "eval_arxiv_emb_top1_equal_sem": 0.01948659572650023, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.008838415145874, "eval_arxiv_n_ngrams_match_1": 14.468, "eval_arxiv_n_ngrams_match_2": 2.732, "eval_arxiv_n_ngrams_match_3": 0.598, "eval_arxiv_num_pred_words": 36.848, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.263848070145425, "eval_arxiv_pred_num_tokens": 61.4609375, "eval_arxiv_rouge_score": 0.35353412039225657, "eval_arxiv_runtime": 24.2177, "eval_arxiv_samples_per_second": 20.646, "eval_arxiv_steps_per_second": 0.041, "eval_arxiv_token_set_f1": 0.35591688837586377, "eval_arxiv_token_set_f1_sem": 0.004753441988684993, "eval_arxiv_token_set_precision": 0.30392920024519354, "eval_arxiv_token_set_recall": 0.45268519173881183, "eval_arxiv_true_num_tokens": 64.0, "step": 1023 }, { "epoch": 4.51, "eval_python_code_alpaca_accuracy": 0.1519375, "eval_python_code_alpaca_bleu_score": 5.7001715909469395, "eval_python_code_alpaca_bleu_score_sem": 0.18245331772390755, "eval_python_code_alpaca_emb_cos_sim": 0.7690838575363159, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036216657731970425, "eval_python_code_alpaca_emb_top1_equal": 0.17599999904632568, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017047853594066943, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.6924941539764404, "eval_python_code_alpaca_n_ngrams_match_1": 9.596, "eval_python_code_alpaca_n_ngrams_match_2": 2.94, "eval_python_code_alpaca_n_ngrams_match_3": 1.024, "eval_python_code_alpaca_num_pred_words": 35.698, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.768464848072853, "eval_python_code_alpaca_pred_num_tokens": 57.9609375, "eval_python_code_alpaca_rouge_score": 0.39724312590614486, "eval_python_code_alpaca_runtime": 36.425, "eval_python_code_alpaca_samples_per_second": 13.727, "eval_python_code_alpaca_steps_per_second": 0.027, "eval_python_code_alpaca_token_set_f1": 0.472579092229167, "eval_python_code_alpaca_token_set_f1_sem": 0.005334868398535928, "eval_python_code_alpaca_token_set_precision": 0.5364848862944417, "eval_python_code_alpaca_token_set_recall": 0.44106302354087196, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1023 }, { "epoch": 4.51, "eval_wikibio_accuracy": 0.3705, "eval_wikibio_bleu_score": 4.88755922984987, "eval_wikibio_bleu_score_sem": 0.20708995809366076, "eval_wikibio_emb_cos_sim": 0.7016727924346924, "eval_wikibio_emb_cos_sim_sem": 0.007042247955428024, "eval_wikibio_emb_top1_equal": 0.14800000190734863, "eval_wikibio_emb_top1_equal_sem": 0.015896458012572223, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.313309907913208, "eval_wikibio_n_ngrams_match_1": 8.268, "eval_wikibio_n_ngrams_match_2": 2.63, "eval_wikibio_n_ngrams_match_3": 0.976, "eval_wikibio_num_pred_words": 31.332, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.475917889788466, "eval_wikibio_pred_num_tokens": 62.890625, "eval_wikibio_rouge_score": 0.2918099611132722, "eval_wikibio_runtime": 7.0935, "eval_wikibio_samples_per_second": 70.487, "eval_wikibio_steps_per_second": 0.141, "eval_wikibio_token_set_f1": 0.2694020672814847, "eval_wikibio_token_set_f1_sem": 0.006978672843545908, "eval_wikibio_token_set_precision": 0.26418978779936264, "eval_wikibio_token_set_recall": 0.30268523625472155, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1023 }, { "epoch": 4.51, "eval_bias-bios_accuracy": 0.50959375, "eval_bias-bios_bleu_score": 17.670913883343722, "eval_bias-bios_bleu_score_sem": 0.7618895808099503, "eval_bias-bios_emb_cos_sim": 0.8704634308815002, "eval_bias-bios_emb_cos_sim_sem": 0.002964730134396973, "eval_bias-bios_emb_top1_equal": 0.32199999690055847, "eval_bias-bios_emb_top1_equal_sem": 0.02091666653838802, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.806833267211914, "eval_bias-bios_n_ngrams_match_1": 22.242, "eval_bias-bios_n_ngrams_match_2": 9.992, "eval_bias-bios_n_ngrams_match_3": 5.52, "eval_bias-bios_num_pred_words": 45.478, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.091127884065418, "eval_bias-bios_pred_num_tokens": 60.0625, "eval_bias-bios_rouge_score": 0.5196810999362653, "eval_bias-bios_runtime": 7.4704, "eval_bias-bios_samples_per_second": 66.931, "eval_bias-bios_steps_per_second": 0.134, "eval_bias-bios_token_set_f1": 0.5505760399134664, "eval_bias-bios_token_set_f1_sem": 0.006596670216275572, "eval_bias-bios_token_set_precision": 0.5310260817494754, "eval_bias-bios_token_set_recall": 0.5836186251248524, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1023 }, { "epoch": 4.55, "learning_rate": 0.001, "loss": 2.0462, "step": 1032 }, { "epoch": 4.6, "learning_rate": 0.001, "loss": 1.9454, "step": 1044 }, { "epoch": 4.64, "eval_ag_news_accuracy": 0.303625, "eval_ag_news_bleu_score": 4.607424035746342, "eval_ag_news_bleu_score_sem": 0.15785109714710252, "eval_ag_news_emb_cos_sim": 0.8068847060203552, "eval_ag_news_emb_cos_sim_sem": 0.004727621191909161, "eval_ag_news_emb_top1_equal": 0.2720000147819519, "eval_ag_news_emb_top1_equal_sem": 0.019920483557355567, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.562575101852417, "eval_ag_news_n_ngrams_match_1": 12.09, "eval_ag_news_n_ngrams_match_2": 2.424, "eval_ag_news_n_ngrams_match_3": 0.66, "eval_ag_news_num_pred_words": 32.644, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.25386264555959, "eval_ag_news_pred_num_tokens": 49.21875, "eval_ag_news_rouge_score": 0.36812557927211753, "eval_ag_news_runtime": 8.3463, "eval_ag_news_samples_per_second": 59.907, "eval_ag_news_steps_per_second": 0.12, "eval_ag_news_token_set_f1": 0.3474054458929426, "eval_ag_news_token_set_f1_sem": 0.004683477639321787, "eval_ag_news_token_set_precision": 0.30639136557472807, "eval_ag_news_token_set_recall": 0.4190597447752051, "eval_ag_news_true_num_tokens": 56.09375, "step": 1054 }, { "epoch": 4.64, "eval_anthropic_toxic_prompts_accuracy": 0.110125, "eval_anthropic_toxic_prompts_bleu_score": 5.163171339929796, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.19282672443503882, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7005923986434937, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0047202354786315135, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15000000596046448, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01598471338779901, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.9066479206085205, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.904, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.854, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.704, "eval_anthropic_toxic_prompts_num_pred_words": 28.996, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 18.295368138206676, "eval_anthropic_toxic_prompts_pred_num_tokens": 41.984375, "eval_anthropic_toxic_prompts_rouge_score": 0.30157651451983636, "eval_anthropic_toxic_prompts_runtime": 6.9928, "eval_anthropic_toxic_prompts_samples_per_second": 71.502, "eval_anthropic_toxic_prompts_steps_per_second": 0.143, "eval_anthropic_toxic_prompts_token_set_f1": 0.35570073189073564, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0061060657186972804, "eval_anthropic_toxic_prompts_token_set_precision": 0.43802447258263794, "eval_anthropic_toxic_prompts_token_set_recall": 0.3248866462032167, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1054 }, { "epoch": 4.64, "eval_arxiv_accuracy": 0.41659375, "eval_arxiv_bleu_score": 3.955898042661497, "eval_arxiv_bleu_score_sem": 0.11722614020701647, "eval_arxiv_emb_cos_sim": 0.7505651712417603, "eval_arxiv_emb_cos_sim_sem": 0.0055917039969810125, "eval_arxiv_emb_top1_equal": 0.18199999630451202, "eval_arxiv_emb_top1_equal_sem": 0.017272772986938162, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0766408443450928, "eval_arxiv_n_ngrams_match_1": 14.196, "eval_arxiv_n_ngrams_match_2": 2.698, "eval_arxiv_n_ngrams_match_3": 0.568, "eval_arxiv_num_pred_words": 31.414, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.685435158513755, "eval_arxiv_pred_num_tokens": 50.890625, "eval_arxiv_rouge_score": 0.37409423610863923, "eval_arxiv_runtime": 7.3004, "eval_arxiv_samples_per_second": 68.49, "eval_arxiv_steps_per_second": 0.137, "eval_arxiv_token_set_f1": 0.3682223121599511, "eval_arxiv_token_set_f1_sem": 0.004627859384051069, "eval_arxiv_token_set_precision": 0.3100685398445484, "eval_arxiv_token_set_recall": 0.4692943092560599, "eval_arxiv_true_num_tokens": 64.0, "step": 1054 }, { "epoch": 4.64, "eval_python_code_alpaca_accuracy": 0.15634375, "eval_python_code_alpaca_bleu_score": 6.79960326411184, "eval_python_code_alpaca_bleu_score_sem": 0.22452214571025933, "eval_python_code_alpaca_emb_cos_sim": 0.7928995490074158, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037320899502600487, "eval_python_code_alpaca_emb_top1_equal": 0.2160000056028366, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018421909471797383, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.5504839420318604, "eval_python_code_alpaca_n_ngrams_match_1": 9.574, "eval_python_code_alpaca_n_ngrams_match_2": 2.626, "eval_python_code_alpaca_n_ngrams_match_3": 0.856, "eval_python_code_alpaca_num_pred_words": 28.114, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.813303178442965, "eval_python_code_alpaca_pred_num_tokens": 43.5546875, "eval_python_code_alpaca_rouge_score": 0.4508352048107117, "eval_python_code_alpaca_runtime": 8.0363, "eval_python_code_alpaca_samples_per_second": 62.217, "eval_python_code_alpaca_steps_per_second": 0.124, "eval_python_code_alpaca_token_set_f1": 0.49210118813659204, "eval_python_code_alpaca_token_set_f1_sem": 0.005441665430889188, "eval_python_code_alpaca_token_set_precision": 0.5387057133146517, "eval_python_code_alpaca_token_set_recall": 0.46976460400315756, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1054 }, { "epoch": 4.64, "eval_wikibio_accuracy": 0.358375, "eval_wikibio_bleu_score": 5.526680676705421, "eval_wikibio_bleu_score_sem": 0.22318342201779084, "eval_wikibio_emb_cos_sim": 0.7270724773406982, "eval_wikibio_emb_cos_sim_sem": 0.006319029155182091, "eval_wikibio_emb_top1_equal": 0.1720000058412552, "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.438575506210327, "eval_wikibio_n_ngrams_match_1": 8.586, "eval_wikibio_n_ngrams_match_2": 2.596, "eval_wikibio_n_ngrams_match_3": 0.942, "eval_wikibio_num_pred_words": 29.138, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 31.142564167076408, "eval_wikibio_pred_num_tokens": 56.09375, "eval_wikibio_rouge_score": 0.330226387836722, "eval_wikibio_runtime": 7.1468, "eval_wikibio_samples_per_second": 69.961, "eval_wikibio_steps_per_second": 0.14, "eval_wikibio_token_set_f1": 0.29123985480521514, "eval_wikibio_token_set_f1_sem": 0.006346977449761717, "eval_wikibio_token_set_precision": 0.28265532766102136, "eval_wikibio_token_set_recall": 0.3221428125241798, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1054 }, { "epoch": 4.64, "eval_bias-bios_accuracy": 0.5160625, "eval_bias-bios_bleu_score": 18.49066373707306, "eval_bias-bios_bleu_score_sem": 0.824870220846744, "eval_bias-bios_emb_cos_sim": 0.8730402588844299, "eval_bias-bios_emb_cos_sim_sem": 0.0032544679411020154, "eval_bias-bios_emb_top1_equal": 0.3240000009536743, "eval_bias-bios_emb_top1_equal_sem": 0.020950555653521236, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7815382480621338, "eval_bias-bios_n_ngrams_match_1": 20.964, "eval_bias-bios_n_ngrams_match_2": 9.392, "eval_bias-bios_n_ngrams_match_3": 5.23, "eval_bias-bios_num_pred_words": 37.032, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.938985027968603, "eval_bias-bios_pred_num_tokens": 49.4609375, "eval_bias-bios_rouge_score": 0.5454588049088351, "eval_bias-bios_runtime": 8.2478, "eval_bias-bios_samples_per_second": 60.622, "eval_bias-bios_steps_per_second": 0.121, "eval_bias-bios_token_set_f1": 0.5572734682256394, "eval_bias-bios_token_set_f1_sem": 0.006750857588431413, "eval_bias-bios_token_set_precision": 0.5193511851690316, "eval_bias-bios_token_set_recall": 0.6139545043369491, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1054 }, { "epoch": 4.65, "learning_rate": 0.001, "loss": 1.7226, "step": 1056 }, { "epoch": 4.7, "learning_rate": 0.001, "loss": 2.0507, "step": 1068 }, { "epoch": 4.76, "learning_rate": 0.001, "loss": 2.0374, "step": 1080 }, { "epoch": 4.78, "eval_ag_news_accuracy": 0.3025, "eval_ag_news_bleu_score": 4.782426774883124, "eval_ag_news_bleu_score_sem": 0.1495675503894871, "eval_ag_news_emb_cos_sim": 0.8184526562690735, "eval_ag_news_emb_cos_sim_sem": 0.004131730809081501, "eval_ag_news_emb_top1_equal": 0.25600001215934753, "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.540161609649658, "eval_ag_news_n_ngrams_match_1": 13.778, "eval_ag_news_n_ngrams_match_2": 2.968, "eval_ag_news_n_ngrams_match_3": 0.822, "eval_ag_news_num_pred_words": 42.946, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.47248982771483, "eval_ag_news_pred_num_tokens": 62.890625, "eval_ag_news_rouge_score": 0.36404645168806504, "eval_ag_news_runtime": 7.3968, "eval_ag_news_samples_per_second": 67.597, "eval_ag_news_steps_per_second": 0.135, "eval_ag_news_token_set_f1": 0.3549916818578896, "eval_ag_news_token_set_f1_sem": 0.004641491153567951, "eval_ag_news_token_set_precision": 0.33502667272869446, "eval_ag_news_token_set_recall": 0.39560025222116335, "eval_ag_news_true_num_tokens": 56.09375, "step": 1085 }, { "epoch": 4.78, "eval_anthropic_toxic_prompts_accuracy": 0.10765625, "eval_anthropic_toxic_prompts_bleu_score": 3.546828835753263, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12631718784025125, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.698980987071991, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004317670122587383, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055010489467818, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.107421636581421, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.032, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.784, "eval_anthropic_toxic_prompts_num_pred_words": 43.392, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.363309263125586, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.65625, "eval_anthropic_toxic_prompts_rouge_score": 0.23832087035857508, "eval_anthropic_toxic_prompts_runtime": 7.0669, "eval_anthropic_toxic_prompts_samples_per_second": 70.752, "eval_anthropic_toxic_prompts_steps_per_second": 0.142, "eval_anthropic_toxic_prompts_token_set_f1": 0.33135563913515376, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005503391397071844, "eval_anthropic_toxic_prompts_token_set_precision": 0.4564273236473413, "eval_anthropic_toxic_prompts_token_set_recall": 0.28195039472658895, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1085 }, { "epoch": 4.78, "eval_arxiv_accuracy": 0.4284375, "eval_arxiv_bleu_score": 4.4366820296721725, "eval_arxiv_bleu_score_sem": 0.12741093796420108, "eval_arxiv_emb_cos_sim": 0.7520226836204529, "eval_arxiv_emb_cos_sim_sem": 0.005331376429749253, "eval_arxiv_emb_top1_equal": 0.28600001335144043, "eval_arxiv_emb_top1_equal_sem": 0.020229345383440313, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.9971020221710205, "eval_arxiv_n_ngrams_match_1": 15.13, "eval_arxiv_n_ngrams_match_2": 2.982, "eval_arxiv_n_ngrams_match_3": 0.668, "eval_arxiv_num_pred_words": 38.58, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.027413743022723, "eval_arxiv_pred_num_tokens": 62.8984375, "eval_arxiv_rouge_score": 0.3612686858556743, "eval_arxiv_runtime": 7.4014, "eval_arxiv_samples_per_second": 67.554, "eval_arxiv_steps_per_second": 0.135, "eval_arxiv_token_set_f1": 0.3633472670510122, "eval_arxiv_token_set_f1_sem": 0.004831455911736207, "eval_arxiv_token_set_precision": 0.3146368295803298, "eval_arxiv_token_set_recall": 0.4532731992238954, "eval_arxiv_true_num_tokens": 64.0, "step": 1085 }, { "epoch": 4.78, "eval_python_code_alpaca_accuracy": 0.1553125, "eval_python_code_alpaca_bleu_score": 5.122593845366089, "eval_python_code_alpaca_bleu_score_sem": 0.15172203544953022, "eval_python_code_alpaca_emb_cos_sim": 0.7787840366363525, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003238402698582763, "eval_python_code_alpaca_emb_top1_equal": 0.18000000715255737, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017198591983670585, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.743194341659546, "eval_python_code_alpaca_n_ngrams_match_1": 10.09, "eval_python_code_alpaca_n_ngrams_match_2": 3.016, "eval_python_code_alpaca_n_ngrams_match_3": 1.034, "eval_python_code_alpaca_num_pred_words": 40.474, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.536534915406467, "eval_python_code_alpaca_pred_num_tokens": 62.7109375, "eval_python_code_alpaca_rouge_score": 0.3824998945624835, "eval_python_code_alpaca_runtime": 53.5162, "eval_python_code_alpaca_samples_per_second": 9.343, "eval_python_code_alpaca_steps_per_second": 0.019, "eval_python_code_alpaca_token_set_f1": 0.46524449680098723, "eval_python_code_alpaca_token_set_f1_sem": 0.005022565353754791, "eval_python_code_alpaca_token_set_precision": 0.5631874903720834, "eval_python_code_alpaca_token_set_recall": 0.41347913436767303, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1085 }, { "epoch": 4.78, "eval_wikibio_accuracy": 0.37053125, "eval_wikibio_bleu_score": 5.041326690755854, "eval_wikibio_bleu_score_sem": 0.20965006906787886, "eval_wikibio_emb_cos_sim": 0.7176839113235474, "eval_wikibio_emb_cos_sim_sem": 0.006469605609540181, "eval_wikibio_emb_top1_equal": 0.16200000047683716, "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.337543487548828, "eval_wikibio_n_ngrams_match_1": 8.682, "eval_wikibio_n_ngrams_match_2": 2.672, "eval_wikibio_n_ngrams_match_3": 0.986, "eval_wikibio_num_pred_words": 31.822, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.149891143122552, "eval_wikibio_pred_num_tokens": 62.9765625, "eval_wikibio_rouge_score": 0.3031944117428183, "eval_wikibio_runtime": 7.3254, "eval_wikibio_samples_per_second": 68.255, "eval_wikibio_steps_per_second": 0.137, "eval_wikibio_token_set_f1": 0.28195704162537605, "eval_wikibio_token_set_f1_sem": 0.0066943326071722984, "eval_wikibio_token_set_precision": 0.27719178191689897, "eval_wikibio_token_set_recall": 0.3106932649060452, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1085 }, { "epoch": 4.78, "eval_bias-bios_accuracy": 0.51134375, "eval_bias-bios_bleu_score": 17.141887959631816, "eval_bias-bios_bleu_score_sem": 0.6962557324948682, "eval_bias-bios_emb_cos_sim": 0.8785771727561951, "eval_bias-bios_emb_cos_sim_sem": 0.002824276632086532, "eval_bias-bios_emb_top1_equal": 0.3400000035762787, "eval_bias-bios_emb_top1_equal_sem": 0.021206118792612732, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8023052215576172, "eval_bias-bios_n_ngrams_match_1": 22.798, "eval_bias-bios_n_ngrams_match_2": 10.262, "eval_bias-bios_n_ngrams_match_3": 5.67, "eval_bias-bios_num_pred_words": 47.208, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.06360932857619, "eval_bias-bios_pred_num_tokens": 62.6875, "eval_bias-bios_rouge_score": 0.5199242058822136, "eval_bias-bios_runtime": 7.4164, "eval_bias-bios_samples_per_second": 67.418, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.5520066710280114, "eval_bias-bios_token_set_f1_sem": 0.006506081911934038, "eval_bias-bios_token_set_precision": 0.5448837177365514, "eval_bias-bios_token_set_recall": 0.5702486809543807, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1085 }, { "epoch": 4.81, "learning_rate": 0.001, "loss": 1.9815, "step": 1092 }, { "epoch": 4.86, "learning_rate": 0.001, "loss": 1.7388, "step": 1104 }, { "epoch": 4.92, "learning_rate": 0.001, "loss": 1.9799, "step": 1116 }, { "epoch": 4.92, "eval_ag_news_accuracy": 0.30546875, "eval_ag_news_bleu_score": 4.628609823295798, "eval_ag_news_bleu_score_sem": 0.16814417003404045, "eval_ag_news_emb_cos_sim": 0.8036626577377319, "eval_ag_news_emb_cos_sim_sem": 0.004962024792248347, "eval_ag_news_emb_top1_equal": 0.25200000405311584, "eval_ag_news_emb_top1_equal_sem": 0.019435728067390842, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.502821683883667, "eval_ag_news_n_ngrams_match_1": 11.762, "eval_ag_news_n_ngrams_match_2": 2.468, "eval_ag_news_n_ngrams_match_3": 0.67, "eval_ag_news_num_pred_words": 32.45, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.20902525082459, "eval_ag_news_pred_num_tokens": 49.375, "eval_ag_news_rouge_score": 0.35847457034358254, "eval_ag_news_runtime": 7.1368, "eval_ag_news_samples_per_second": 70.06, "eval_ag_news_steps_per_second": 0.14, "eval_ag_news_token_set_f1": 0.34363322119153306, "eval_ag_news_token_set_f1_sem": 0.005106203858593481, "eval_ag_news_token_set_precision": 0.29934172472786247, "eval_ag_news_token_set_recall": 0.4300573230492533, "eval_ag_news_true_num_tokens": 56.09375, "step": 1116 }, { "epoch": 4.92, "eval_anthropic_toxic_prompts_accuracy": 0.10875, "eval_anthropic_toxic_prompts_bleu_score": 5.205884991798989, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1865173621633143, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6996538639068604, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004887105417573062, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345322399934358, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.9634671211242676, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.804, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.89, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.74, "eval_anthropic_toxic_prompts_num_pred_words": 28.776, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 19.36499628497135, "eval_anthropic_toxic_prompts_pred_num_tokens": 42.9765625, "eval_anthropic_toxic_prompts_rouge_score": 0.3026698968444351, "eval_anthropic_toxic_prompts_runtime": 6.9544, "eval_anthropic_toxic_prompts_samples_per_second": 71.896, "eval_anthropic_toxic_prompts_steps_per_second": 0.144, "eval_anthropic_toxic_prompts_token_set_f1": 0.35883487316207335, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006119140100683589, "eval_anthropic_toxic_prompts_token_set_precision": 0.43265422966135547, "eval_anthropic_toxic_prompts_token_set_recall": 0.33372935076770943, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1116 }, { "epoch": 4.92, "eval_arxiv_accuracy": 0.41740625, "eval_arxiv_bleu_score": 3.872287891682529, "eval_arxiv_bleu_score_sem": 0.11487334041813434, "eval_arxiv_emb_cos_sim": 0.744478702545166, "eval_arxiv_emb_cos_sim_sem": 0.006124686202112212, "eval_arxiv_emb_top1_equal": 0.1720000058412552, "eval_arxiv_emb_top1_equal_sem": 0.016893869835550357, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.015841007232666, "eval_arxiv_n_ngrams_match_1": 13.73, "eval_arxiv_n_ngrams_match_2": 2.58, "eval_arxiv_n_ngrams_match_3": 0.586, "eval_arxiv_num_pred_words": 31.274, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.40624552605183, "eval_arxiv_pred_num_tokens": 54.0078125, "eval_arxiv_rouge_score": 0.3609061809223396, "eval_arxiv_runtime": 7.3764, "eval_arxiv_samples_per_second": 67.784, "eval_arxiv_steps_per_second": 0.136, "eval_arxiv_token_set_f1": 0.36442828917901954, "eval_arxiv_token_set_f1_sem": 0.0049055757087992, "eval_arxiv_token_set_precision": 0.30121546718967745, "eval_arxiv_token_set_recall": 0.47774161387520014, "eval_arxiv_true_num_tokens": 64.0, "step": 1116 }, { "epoch": 4.92, "eval_python_code_alpaca_accuracy": 0.1583125, "eval_python_code_alpaca_bleu_score": 7.442459981245183, "eval_python_code_alpaca_bleu_score_sem": 0.2587730959133015, "eval_python_code_alpaca_emb_cos_sim": 0.7887560129165649, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036784270755643492, "eval_python_code_alpaca_emb_top1_equal": 0.1979999989271164, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017838958581409683, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.526684045791626, "eval_python_code_alpaca_n_ngrams_match_1": 9.42, "eval_python_code_alpaca_n_ngrams_match_2": 2.816, "eval_python_code_alpaca_n_ngrams_match_3": 0.932, "eval_python_code_alpaca_num_pred_words": 27.836, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.51194822530937, "eval_python_code_alpaca_pred_num_tokens": 46.1484375, "eval_python_code_alpaca_rouge_score": 0.45174719006638997, "eval_python_code_alpaca_runtime": 6.8582, "eval_python_code_alpaca_samples_per_second": 72.905, "eval_python_code_alpaca_steps_per_second": 0.146, "eval_python_code_alpaca_token_set_f1": 0.4940857636888703, "eval_python_code_alpaca_token_set_f1_sem": 0.00574673042749639, "eval_python_code_alpaca_token_set_precision": 0.5306708638242008, "eval_python_code_alpaca_token_set_recall": 0.4792872902979338, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1116 }, { "epoch": 4.92, "eval_wikibio_accuracy": 0.37003125, "eval_wikibio_bleu_score": 5.271042883413745, "eval_wikibio_bleu_score_sem": 0.21668692554035335, "eval_wikibio_emb_cos_sim": 0.7317224144935608, "eval_wikibio_emb_cos_sim_sem": 0.006157877596249158, "eval_wikibio_emb_top1_equal": 0.20399999618530273, "eval_wikibio_emb_top1_equal_sem": 0.018039369108186407, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.2863619327545166, "eval_wikibio_n_ngrams_match_1": 8.604, "eval_wikibio_n_ngrams_match_2": 2.594, "eval_wikibio_n_ngrams_match_3": 0.97, "eval_wikibio_num_pred_words": 31.038, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 26.745384937438654, "eval_wikibio_pred_num_tokens": 59.7421875, "eval_wikibio_rouge_score": 0.3227614958549562, "eval_wikibio_runtime": 7.1142, "eval_wikibio_samples_per_second": 70.282, "eval_wikibio_steps_per_second": 0.141, "eval_wikibio_token_set_f1": 0.28746519585548624, "eval_wikibio_token_set_f1_sem": 0.006471419437000773, "eval_wikibio_token_set_precision": 0.2785137949281113, "eval_wikibio_token_set_recall": 0.3176058242144122, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1116 }, { "epoch": 4.92, "eval_bias-bios_accuracy": 0.51221875, "eval_bias-bios_bleu_score": 18.650420864840633, "eval_bias-bios_bleu_score_sem": 0.8302788590751484, "eval_bias-bios_emb_cos_sim": 0.8733921647071838, "eval_bias-bios_emb_cos_sim_sem": 0.003121800325381792, "eval_bias-bios_emb_top1_equal": 0.28600001335144043, "eval_bias-bios_emb_top1_equal_sem": 0.020229345383440313, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7778593301773071, "eval_bias-bios_n_ngrams_match_1": 20.736, "eval_bias-bios_n_ngrams_match_2": 9.35, "eval_bias-bios_n_ngrams_match_3": 5.248, "eval_bias-bios_num_pred_words": 36.104, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.9171761308998185, "eval_bias-bios_pred_num_tokens": 50.6796875, "eval_bias-bios_rouge_score": 0.5457379709662528, "eval_bias-bios_runtime": 7.3864, "eval_bias-bios_samples_per_second": 67.692, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.5581272984981764, "eval_bias-bios_token_set_f1_sem": 0.00681536792835148, "eval_bias-bios_token_set_precision": 0.5114805191768111, "eval_bias-bios_token_set_recall": 0.6244049600041734, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1116 }, { "epoch": 4.97, "learning_rate": 0.001, "loss": 1.9734, "step": 1128 }, { "epoch": 5.02, "learning_rate": 0.001, "loss": 1.8862, "step": 1140 }, { "epoch": 5.05, "eval_ag_news_accuracy": 0.3008125, "eval_ag_news_bleu_score": 4.751560589852374, "eval_ag_news_bleu_score_sem": 0.1603748846731102, "eval_ag_news_emb_cos_sim": 0.7978442311286926, "eval_ag_news_emb_cos_sim_sem": 0.005238710484881748, "eval_ag_news_emb_top1_equal": 0.24199999868869781, "eval_ag_news_emb_top1_equal_sem": 0.019173085092707744, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.528089761734009, "eval_ag_news_n_ngrams_match_1": 12.1, "eval_ag_news_n_ngrams_match_2": 2.658, "eval_ag_news_n_ngrams_match_3": 0.766, "eval_ag_news_num_pred_words": 35.188, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.058844925758244, "eval_ag_news_pred_num_tokens": 53.625, "eval_ag_news_rouge_score": 0.3522136604799726, "eval_ag_news_runtime": 7.297, "eval_ag_news_samples_per_second": 68.521, "eval_ag_news_steps_per_second": 0.137, "eval_ag_news_token_set_f1": 0.3421194657906841, "eval_ag_news_token_set_f1_sem": 0.00512623722261911, "eval_ag_news_token_set_precision": 0.3016949150083078, "eval_ag_news_token_set_recall": 0.42125821865705554, "eval_ag_news_true_num_tokens": 56.09375, "step": 1147 }, { "epoch": 5.05, "eval_anthropic_toxic_prompts_accuracy": 0.10775, "eval_anthropic_toxic_prompts_bleu_score": 4.824045774109767, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.17779367422682138, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6928799748420715, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004798333249000901, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1420000046491623, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015625630310786714, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.983499050140381, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.702, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684, "eval_anthropic_toxic_prompts_num_pred_words": 30.648, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 19.75682596548496, "eval_anthropic_toxic_prompts_pred_num_tokens": 43.3984375, "eval_anthropic_toxic_prompts_rouge_score": 0.28186001156785756, "eval_anthropic_toxic_prompts_runtime": 6.8855, "eval_anthropic_toxic_prompts_samples_per_second": 72.617, "eval_anthropic_toxic_prompts_steps_per_second": 0.145, "eval_anthropic_toxic_prompts_token_set_f1": 0.3553819370248394, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005984688098916189, "eval_anthropic_toxic_prompts_token_set_precision": 0.42502285473371, "eval_anthropic_toxic_prompts_token_set_recall": 0.3329052847488175, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1147 }, { "epoch": 5.05, "eval_arxiv_accuracy": 0.423875, "eval_arxiv_bleu_score": 4.2970257142972335, "eval_arxiv_bleu_score_sem": 0.1242882050526764, "eval_arxiv_emb_cos_sim": 0.744454026222229, "eval_arxiv_emb_cos_sim_sem": 0.00547184292686335, "eval_arxiv_emb_top1_equal": 0.21799999475479126, "eval_arxiv_emb_top1_equal_sem": 0.018483376892288548, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0156562328338623, "eval_arxiv_n_ngrams_match_1": 14.664, "eval_arxiv_n_ngrams_match_2": 2.916, "eval_arxiv_n_ngrams_match_3": 0.634, "eval_arxiv_num_pred_words": 34.544, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.402475322632174, "eval_arxiv_pred_num_tokens": 58.109375, "eval_arxiv_rouge_score": 0.3705193267222546, "eval_arxiv_runtime": 7.2639, "eval_arxiv_samples_per_second": 68.833, "eval_arxiv_steps_per_second": 0.138, "eval_arxiv_token_set_f1": 0.36950581432877866, "eval_arxiv_token_set_f1_sem": 0.004561741515291868, "eval_arxiv_token_set_precision": 0.31003179888853766, "eval_arxiv_token_set_recall": 0.47645288506817857, "eval_arxiv_true_num_tokens": 64.0, "step": 1147 }, { "epoch": 5.05, "eval_python_code_alpaca_accuracy": 0.16003125, "eval_python_code_alpaca_bleu_score": 7.109532152823405, "eval_python_code_alpaca_bleu_score_sem": 0.23092374536841825, "eval_python_code_alpaca_emb_cos_sim": 0.7907090187072754, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038112096453169096, "eval_python_code_alpaca_emb_top1_equal": 0.21799999475479126, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018483376892288548, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.54667329788208, "eval_python_code_alpaca_n_ngrams_match_1": 9.642, "eval_python_code_alpaca_n_ngrams_match_2": 2.98, "eval_python_code_alpaca_n_ngrams_match_3": 1.058, "eval_python_code_alpaca_num_pred_words": 30.814, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.764569152634095, "eval_python_code_alpaca_pred_num_tokens": 48.4921875, "eval_python_code_alpaca_rouge_score": 0.43772150015430555, "eval_python_code_alpaca_runtime": 23.6322, "eval_python_code_alpaca_samples_per_second": 21.158, "eval_python_code_alpaca_steps_per_second": 0.042, "eval_python_code_alpaca_token_set_f1": 0.5007978263098356, "eval_python_code_alpaca_token_set_f1_sem": 0.005543943666993787, "eval_python_code_alpaca_token_set_precision": 0.5448615964600481, "eval_python_code_alpaca_token_set_recall": 0.4806543376566172, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1147 }, { "epoch": 5.05, "eval_wikibio_accuracy": 0.3758125, "eval_wikibio_bleu_score": 4.798178200869212, "eval_wikibio_bleu_score_sem": 0.22454293054991328, "eval_wikibio_emb_cos_sim": 0.6980165243148804, "eval_wikibio_emb_cos_sim_sem": 0.007304763647675317, "eval_wikibio_emb_top1_equal": 0.1599999964237213, "eval_wikibio_emb_top1_equal_sem": 0.016411540042267993, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.2932658195495605, "eval_wikibio_n_ngrams_match_1": 7.85, "eval_wikibio_n_ngrams_match_2": 2.37, "eval_wikibio_n_ngrams_match_3": 0.872, "eval_wikibio_num_pred_words": 29.784, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 26.930670908069647, "eval_wikibio_pred_num_tokens": 61.1796875, "eval_wikibio_rouge_score": 0.28846571983854963, "eval_wikibio_runtime": 36.7626, "eval_wikibio_samples_per_second": 13.601, "eval_wikibio_steps_per_second": 0.027, "eval_wikibio_token_set_f1": 0.26126189669956695, "eval_wikibio_token_set_f1_sem": 0.006870438775008788, "eval_wikibio_token_set_precision": 0.25099417299572807, "eval_wikibio_token_set_recall": 0.30064562359800917, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1147 }, { "epoch": 5.05, "eval_bias-bios_accuracy": 0.511, "eval_bias-bios_bleu_score": 18.25300675333223, "eval_bias-bios_bleu_score_sem": 0.8207254399584505, "eval_bias-bios_emb_cos_sim": 0.8679388761520386, "eval_bias-bios_emb_cos_sim_sem": 0.0030592396753830933, "eval_bias-bios_emb_top1_equal": 0.328000009059906, "eval_bias-bios_emb_top1_equal_sem": 0.02101702640661987, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7814617156982422, "eval_bias-bios_n_ngrams_match_1": 21.26, "eval_bias-bios_n_ngrams_match_2": 9.388, "eval_bias-bios_n_ngrams_match_3": 5.212, "eval_bias-bios_num_pred_words": 39.654, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.938530520797771, "eval_bias-bios_pred_num_tokens": 53.5859375, "eval_bias-bios_rouge_score": 0.5355492854480801, "eval_bias-bios_runtime": 8.217, "eval_bias-bios_samples_per_second": 60.849, "eval_bias-bios_steps_per_second": 0.122, "eval_bias-bios_token_set_f1": 0.5539771399755472, "eval_bias-bios_token_set_f1_sem": 0.0066418923081446425, "eval_bias-bios_token_set_precision": 0.516171193789998, "eval_bias-bios_token_set_recall": 0.6108871601437479, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1147 }, { "epoch": 5.07, "learning_rate": 0.001, "loss": 2.0541, "step": 1152 }, { "epoch": 5.13, "learning_rate": 0.001, "loss": 1.9959, "step": 1164 }, { "epoch": 5.18, "learning_rate": 0.001, "loss": 1.8152, "step": 1176 }, { "epoch": 5.19, "eval_ag_news_accuracy": 0.30721875, "eval_ag_news_bleu_score": 4.8485594330572, "eval_ag_news_bleu_score_sem": 0.16286452590046857, "eval_ag_news_emb_cos_sim": 0.8151611685752869, "eval_ag_news_emb_cos_sim_sem": 0.004361782482996463, "eval_ag_news_emb_top1_equal": 0.25600001215934753, "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.531165838241577, "eval_ag_news_n_ngrams_match_1": 12.808, "eval_ag_news_n_ngrams_match_2": 2.622, "eval_ag_news_n_ngrams_match_3": 0.728, "eval_ag_news_num_pred_words": 35.324, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.16377384045543, "eval_ag_news_pred_num_tokens": 54.0546875, "eval_ag_news_rouge_score": 0.37183171897717315, "eval_ag_news_runtime": 7.4621, "eval_ag_news_samples_per_second": 67.005, "eval_ag_news_steps_per_second": 0.134, "eval_ag_news_token_set_f1": 0.35285156123195754, "eval_ag_news_token_set_f1_sem": 0.004871174499504987, "eval_ag_news_token_set_precision": 0.3177566795698645, "eval_ag_news_token_set_recall": 0.41301102402278256, "eval_ag_news_true_num_tokens": 56.09375, "step": 1178 }, { "epoch": 5.19, "eval_anthropic_toxic_prompts_accuracy": 0.10784375, "eval_anthropic_toxic_prompts_bleu_score": 4.754495104532313, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.17230160467881073, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7017025351524353, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004532275639757245, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14800000190734863, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015896458012572223, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.959667682647705, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.864, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.796, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.678, "eval_anthropic_toxic_prompts_num_pred_words": 30.582, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 19.291559770090323, "eval_anthropic_toxic_prompts_pred_num_tokens": 45.6796875, "eval_anthropic_toxic_prompts_rouge_score": 0.2863486347156138, "eval_anthropic_toxic_prompts_runtime": 6.9905, "eval_anthropic_toxic_prompts_samples_per_second": 71.526, "eval_anthropic_toxic_prompts_steps_per_second": 0.143, "eval_anthropic_toxic_prompts_token_set_f1": 0.34945743337792634, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005829221450704748, "eval_anthropic_toxic_prompts_token_set_precision": 0.43410117442174334, "eval_anthropic_toxic_prompts_token_set_recall": 0.3167146498514724, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1178 }, { "epoch": 5.19, "eval_arxiv_accuracy": 0.41959375, "eval_arxiv_bleu_score": 4.171391212411956, "eval_arxiv_bleu_score_sem": 0.12731606291014222, "eval_arxiv_emb_cos_sim": 0.7597247958183289, "eval_arxiv_emb_cos_sim_sem": 0.005162535612253285, "eval_arxiv_emb_top1_equal": 0.23800000548362732, "eval_arxiv_emb_top1_equal_sem": 0.019064072684441876, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.033210515975952, "eval_arxiv_n_ngrams_match_1": 14.73, "eval_arxiv_n_ngrams_match_2": 2.744, "eval_arxiv_n_ngrams_match_3": 0.578, "eval_arxiv_num_pred_words": 34.308, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.763788166980333, "eval_arxiv_pred_num_tokens": 56.265625, "eval_arxiv_rouge_score": 0.37153285277011533, "eval_arxiv_runtime": 7.1673, "eval_arxiv_samples_per_second": 69.762, "eval_arxiv_steps_per_second": 0.14, "eval_arxiv_token_set_f1": 0.3688551921177521, "eval_arxiv_token_set_f1_sem": 0.004459365592802378, "eval_arxiv_token_set_precision": 0.3162146658557265, "eval_arxiv_token_set_recall": 0.45769358422893, "eval_arxiv_true_num_tokens": 64.0, "step": 1178 }, { "epoch": 5.19, "eval_python_code_alpaca_accuracy": 0.157125, "eval_python_code_alpaca_bleu_score": 6.484596128014531, "eval_python_code_alpaca_bleu_score_sem": 0.19621191927439421, "eval_python_code_alpaca_emb_cos_sim": 0.7900090217590332, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003316913303868283, "eval_python_code_alpaca_emb_top1_equal": 0.21199999749660492, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01829703673906991, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.5738043785095215, "eval_python_code_alpaca_n_ngrams_match_1": 9.782, "eval_python_code_alpaca_n_ngrams_match_2": 2.848, "eval_python_code_alpaca_n_ngrams_match_3": 0.938, "eval_python_code_alpaca_num_pred_words": 31.012, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.11562645543709, "eval_python_code_alpaca_pred_num_tokens": 48.453125, "eval_python_code_alpaca_rouge_score": 0.43364782590062695, "eval_python_code_alpaca_runtime": 6.9669, "eval_python_code_alpaca_samples_per_second": 71.768, "eval_python_code_alpaca_steps_per_second": 0.144, "eval_python_code_alpaca_token_set_f1": 0.4890011179316131, "eval_python_code_alpaca_token_set_f1_sem": 0.005414977771996831, "eval_python_code_alpaca_token_set_precision": 0.5485789963357921, "eval_python_code_alpaca_token_set_recall": 0.4570615049588744, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1178 }, { "epoch": 5.19, "eval_wikibio_accuracy": 0.3601875, "eval_wikibio_bleu_score": 5.865862348150063, "eval_wikibio_bleu_score_sem": 0.21719391064798121, "eval_wikibio_emb_cos_sim": 0.7565488815307617, "eval_wikibio_emb_cos_sim_sem": 0.005104238257453877, "eval_wikibio_emb_top1_equal": 0.16200000047683716, "eval_wikibio_emb_top1_equal_sem": 0.016494124351899474, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4136314392089844, "eval_wikibio_n_ngrams_match_1": 9.378, "eval_wikibio_n_ngrams_match_2": 2.944, "eval_wikibio_n_ngrams_match_3": 1.1, "eval_wikibio_num_pred_words": 32.134, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 30.375350455050448, "eval_wikibio_pred_num_tokens": 58.1015625, "eval_wikibio_rouge_score": 0.3496561867392557, "eval_wikibio_runtime": 7.5836, "eval_wikibio_samples_per_second": 65.932, "eval_wikibio_steps_per_second": 0.132, "eval_wikibio_token_set_f1": 0.31190722363983975, "eval_wikibio_token_set_f1_sem": 0.0057202644914374, "eval_wikibio_token_set_precision": 0.3071798029170863, "eval_wikibio_token_set_recall": 0.33384434619047204, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1178 }, { "epoch": 5.19, "eval_bias-bios_accuracy": 0.51753125, "eval_bias-bios_bleu_score": 18.51520860116379, "eval_bias-bios_bleu_score_sem": 0.8058685076135255, "eval_bias-bios_emb_cos_sim": 0.8777711391448975, "eval_bias-bios_emb_cos_sim_sem": 0.0031768019980541037, "eval_bias-bios_emb_top1_equal": 0.38199999928474426, "eval_bias-bios_emb_top1_equal_sem": 0.02175082231064121, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7584089040756226, "eval_bias-bios_n_ngrams_match_1": 21.4, "eval_bias-bios_n_ngrams_match_2": 9.614, "eval_bias-bios_n_ngrams_match_3": 5.364, "eval_bias-bios_num_pred_words": 38.926, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.803196602396394, "eval_bias-bios_pred_num_tokens": 53.125, "eval_bias-bios_rouge_score": 0.5422110673865645, "eval_bias-bios_runtime": 7.3443, "eval_bias-bios_samples_per_second": 68.08, "eval_bias-bios_steps_per_second": 0.136, "eval_bias-bios_token_set_f1": 0.5559782567207496, "eval_bias-bios_token_set_f1_sem": 0.006687994638385981, "eval_bias-bios_token_set_precision": 0.524176482491051, "eval_bias-bios_token_set_recall": 0.6043461224917017, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1178 }, { "epoch": 5.23, "learning_rate": 0.001, "loss": 1.753, "step": 1188 }, { "epoch": 5.29, "learning_rate": 0.001, "loss": 2.0388, "step": 1200 }, { "epoch": 5.33, "eval_ag_news_accuracy": 0.30171875, "eval_ag_news_bleu_score": 4.66786630563764, "eval_ag_news_bleu_score_sem": 0.15154646564550858, "eval_ag_news_emb_cos_sim": 0.8145875334739685, "eval_ag_news_emb_cos_sim_sem": 0.004933308608570307, "eval_ag_news_emb_top1_equal": 0.3179999887943268, "eval_ag_news_emb_top1_equal_sem": 0.02084757283415153, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5350406169891357, "eval_ag_news_n_ngrams_match_1": 13.4, "eval_ag_news_n_ngrams_match_2": 2.782, "eval_ag_news_n_ngrams_match_3": 0.77, "eval_ag_news_num_pred_words": 41.146, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.296407702758216, "eval_ag_news_pred_num_tokens": 62.65625, "eval_ag_news_rouge_score": 0.35984020185901033, "eval_ag_news_runtime": 16.4581, "eval_ag_news_samples_per_second": 30.38, "eval_ag_news_steps_per_second": 0.061, "eval_ag_news_token_set_f1": 0.35244076946010666, "eval_ag_news_token_set_f1_sem": 0.004875945923880267, "eval_ag_news_token_set_precision": 0.3272305377699167, "eval_ag_news_token_set_recall": 0.40297350161661416, "eval_ag_news_true_num_tokens": 56.09375, "step": 1209 }, { "epoch": 5.33, "eval_anthropic_toxic_prompts_accuracy": 0.106125, "eval_anthropic_toxic_prompts_bleu_score": 3.557568302579417, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12541775282119264, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6978194713592529, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00425731925561031, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345323732734733, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1190552711486816, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.284, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.006, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.776, "eval_anthropic_toxic_prompts_num_pred_words": 42.482, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.62499505785644, "eval_anthropic_toxic_prompts_pred_num_tokens": 61.359375, "eval_anthropic_toxic_prompts_rouge_score": 0.24315983815738715, "eval_anthropic_toxic_prompts_runtime": 7.1217, "eval_anthropic_toxic_prompts_samples_per_second": 70.207, "eval_anthropic_toxic_prompts_steps_per_second": 0.14, "eval_anthropic_toxic_prompts_token_set_f1": 0.33252564143152274, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005464439635387934, "eval_anthropic_toxic_prompts_token_set_precision": 0.45493149859126963, "eval_anthropic_toxic_prompts_token_set_recall": 0.2833442137428397, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1209 }, { "epoch": 5.33, "eval_arxiv_accuracy": 0.42709375, "eval_arxiv_bleu_score": 4.502508568694519, "eval_arxiv_bleu_score_sem": 0.12789589710631666, "eval_arxiv_emb_cos_sim": 0.7558550834655762, "eval_arxiv_emb_cos_sim_sem": 0.005511884581727316, "eval_arxiv_emb_top1_equal": 0.2919999957084656, "eval_arxiv_emb_top1_equal_sem": 0.020354376719412405, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 2.991607427597046, "eval_arxiv_n_ngrams_match_1": 15.408, "eval_arxiv_n_ngrams_match_2": 2.98, "eval_arxiv_n_ngrams_match_3": 0.688, "eval_arxiv_num_pred_words": 38.328, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 19.917672990705192, "eval_arxiv_pred_num_tokens": 62.875, "eval_arxiv_rouge_score": 0.36719712273634053, "eval_arxiv_runtime": 7.4164, "eval_arxiv_samples_per_second": 67.418, "eval_arxiv_steps_per_second": 0.135, "eval_arxiv_token_set_f1": 0.37198021860120206, "eval_arxiv_token_set_f1_sem": 0.004738087124722292, "eval_arxiv_token_set_precision": 0.3226906643637732, "eval_arxiv_token_set_recall": 0.4562920586465375, "eval_arxiv_true_num_tokens": 64.0, "step": 1209 }, { "epoch": 5.33, "eval_python_code_alpaca_accuracy": 0.155625, "eval_python_code_alpaca_bleu_score": 5.315662477971717, "eval_python_code_alpaca_bleu_score_sem": 0.16177645145891859, "eval_python_code_alpaca_emb_cos_sim": 0.7812846899032593, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003370661144587884, "eval_python_code_alpaca_emb_top1_equal": 0.1599999964237213, "eval_python_code_alpaca_emb_top1_equal_sem": 0.016411540042267993, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7277579307556152, "eval_python_code_alpaca_n_ngrams_match_1": 10.196, "eval_python_code_alpaca_n_ngrams_match_2": 3.118, "eval_python_code_alpaca_n_ngrams_match_3": 1.054, "eval_python_code_alpaca_num_pred_words": 39.74, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.298548134955924, "eval_python_code_alpaca_pred_num_tokens": 61.9453125, "eval_python_code_alpaca_rouge_score": 0.38760636668818693, "eval_python_code_alpaca_runtime": 7.2205, "eval_python_code_alpaca_samples_per_second": 69.247, "eval_python_code_alpaca_steps_per_second": 0.138, "eval_python_code_alpaca_token_set_f1": 0.4748953776740159, "eval_python_code_alpaca_token_set_f1_sem": 0.005199266059350083, "eval_python_code_alpaca_token_set_precision": 0.5677066819762692, "eval_python_code_alpaca_token_set_recall": 0.42523396627114723, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1209 }, { "epoch": 5.33, "eval_wikibio_accuracy": 0.36765625, "eval_wikibio_bleu_score": 5.031924327295865, "eval_wikibio_bleu_score_sem": 0.20167395499192434, "eval_wikibio_emb_cos_sim": 0.7238757610321045, "eval_wikibio_emb_cos_sim_sem": 0.0063559130727577655, "eval_wikibio_emb_top1_equal": 0.15800000727176666, "eval_wikibio_emb_top1_equal_sem": 0.016328049428381567, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.33139705657959, "eval_wikibio_n_ngrams_match_1": 8.876, "eval_wikibio_n_ngrams_match_2": 2.702, "eval_wikibio_n_ngrams_match_3": 1.0, "eval_wikibio_num_pred_words": 32.756, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.977400424604888, "eval_wikibio_pred_num_tokens": 62.984375, "eval_wikibio_rouge_score": 0.31450113973078364, "eval_wikibio_runtime": 8.0852, "eval_wikibio_samples_per_second": 61.841, "eval_wikibio_steps_per_second": 0.124, "eval_wikibio_token_set_f1": 0.2843410825236571, "eval_wikibio_token_set_f1_sem": 0.006492023793848845, "eval_wikibio_token_set_precision": 0.28389924039721054, "eval_wikibio_token_set_recall": 0.3075195178220353, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1209 }, { "epoch": 5.33, "eval_bias-bios_accuracy": 0.51425, "eval_bias-bios_bleu_score": 17.79308714162288, "eval_bias-bios_bleu_score_sem": 0.7377277793877473, "eval_bias-bios_emb_cos_sim": 0.8805733919143677, "eval_bias-bios_emb_cos_sim_sem": 0.0029906071200763297, "eval_bias-bios_emb_top1_equal": 0.3720000088214874, "eval_bias-bios_emb_top1_equal_sem": 0.021637198413078103, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7840691804885864, "eval_bias-bios_n_ngrams_match_1": 22.868, "eval_bias-bios_n_ngrams_match_2": 10.366, "eval_bias-bios_n_ngrams_match_3": 5.742, "eval_bias-bios_num_pred_words": 46.95, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.9540352352511, "eval_bias-bios_pred_num_tokens": 62.234375, "eval_bias-bios_rouge_score": 0.5226844675142479, "eval_bias-bios_runtime": 8.2095, "eval_bias-bios_samples_per_second": 60.905, "eval_bias-bios_steps_per_second": 0.122, "eval_bias-bios_token_set_f1": 0.5565963382730305, "eval_bias-bios_token_set_f1_sem": 0.006584560499707022, "eval_bias-bios_token_set_precision": 0.548469607354972, "eval_bias-bios_token_set_recall": 0.5765310501058738, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1209 }, { "epoch": 5.34, "learning_rate": 0.001, "loss": 2.0017, "step": 1212 }, { "epoch": 5.39, "learning_rate": 0.001, "loss": 1.8471, "step": 1224 }, { "epoch": 5.44, "learning_rate": 0.001, "loss": 1.6843, "step": 1236 }, { "epoch": 5.46, "eval_ag_news_accuracy": 0.3058125, "eval_ag_news_bleu_score": 4.228804372282374, "eval_ag_news_bleu_score_sem": 0.16390794415177778, "eval_ag_news_emb_cos_sim": 0.8003663420677185, "eval_ag_news_emb_cos_sim_sem": 0.004822506915801103, "eval_ag_news_emb_top1_equal": 0.2619999945163727, "eval_ag_news_emb_top1_equal_sem": 0.019684689846225335, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.58817720413208, "eval_ag_news_n_ngrams_match_1": 10.972, "eval_ag_news_n_ngrams_match_2": 2.226, "eval_ag_news_n_ngrams_match_3": 0.576, "eval_ag_news_num_pred_words": 27.282, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.16808876143634, "eval_ag_news_pred_num_tokens": 42.375, "eval_ag_news_rouge_score": 0.35711205410770497, "eval_ag_news_runtime": 7.3843, "eval_ag_news_samples_per_second": 67.711, "eval_ag_news_steps_per_second": 0.135, "eval_ag_news_token_set_f1": 0.3399839974085392, "eval_ag_news_token_set_f1_sem": 0.004867167151513755, "eval_ag_news_token_set_precision": 0.28651641622207985, "eval_ag_news_token_set_recall": 0.43905283960004043, "eval_ag_news_true_num_tokens": 56.09375, "step": 1240 }, { "epoch": 5.46, "eval_anthropic_toxic_prompts_accuracy": 0.110375, "eval_anthropic_toxic_prompts_bleu_score": 5.934900892843169, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.22076600473926677, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7030026912689209, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004729080608319704, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15000000596046448, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01598471338779901, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.920982599258423, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.55, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.668, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.616, "eval_anthropic_toxic_prompts_num_pred_words": 23.468, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 18.55951506880603, "eval_anthropic_toxic_prompts_pred_num_tokens": 33.9296875, "eval_anthropic_toxic_prompts_rouge_score": 0.32892340617179394, "eval_anthropic_toxic_prompts_runtime": 7.206, "eval_anthropic_toxic_prompts_samples_per_second": 69.386, "eval_anthropic_toxic_prompts_steps_per_second": 0.139, "eval_anthropic_toxic_prompts_token_set_f1": 0.35867153576994054, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00592727226660465, "eval_anthropic_toxic_prompts_token_set_precision": 0.4169835141258158, "eval_anthropic_toxic_prompts_token_set_recall": 0.3380541481103269, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1240 }, { "epoch": 5.46, "eval_arxiv_accuracy": 0.415, "eval_arxiv_bleu_score": 3.549095877403326, "eval_arxiv_bleu_score_sem": 0.10135517036798959, "eval_arxiv_emb_cos_sim": 0.7511804103851318, "eval_arxiv_emb_cos_sim_sem": 0.00503978736332344, "eval_arxiv_emb_top1_equal": 0.17399999499320984, "eval_arxiv_emb_top1_equal_sem": 0.016971269551723376, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0753817558288574, "eval_arxiv_n_ngrams_match_1": 13.332, "eval_arxiv_n_ngrams_match_2": 2.414, "eval_arxiv_n_ngrams_match_3": 0.494, "eval_arxiv_num_pred_words": 28.782, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.658148457926593, "eval_arxiv_pred_num_tokens": 47.6796875, "eval_arxiv_rouge_score": 0.36617538747351475, "eval_arxiv_runtime": 7.5013, "eval_arxiv_samples_per_second": 66.655, "eval_arxiv_steps_per_second": 0.133, "eval_arxiv_token_set_f1": 0.3651676595945922, "eval_arxiv_token_set_f1_sem": 0.004322873802341711, "eval_arxiv_token_set_precision": 0.3020699734995879, "eval_arxiv_token_set_recall": 0.475372664607063, "eval_arxiv_true_num_tokens": 64.0, "step": 1240 }, { "epoch": 5.46, "eval_python_code_alpaca_accuracy": 0.16234375, "eval_python_code_alpaca_bleu_score": 8.203572226303173, "eval_python_code_alpaca_bleu_score_sem": 0.2808840321062194, "eval_python_code_alpaca_emb_cos_sim": 0.7998011112213135, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003771246625675064, "eval_python_code_alpaca_emb_top1_equal": 0.21199999749660492, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01829703673906991, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.4834351539611816, "eval_python_code_alpaca_n_ngrams_match_1": 9.048, "eval_python_code_alpaca_n_ngrams_match_2": 2.492, "eval_python_code_alpaca_n_ngrams_match_3": 0.828, "eval_python_code_alpaca_num_pred_words": 22.6, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.98235503550787, "eval_python_code_alpaca_pred_num_tokens": 35.3125, "eval_python_code_alpaca_rouge_score": 0.48279035321359737, "eval_python_code_alpaca_runtime": 7.066, "eval_python_code_alpaca_samples_per_second": 70.762, "eval_python_code_alpaca_steps_per_second": 0.142, "eval_python_code_alpaca_token_set_f1": 0.5002778659188205, "eval_python_code_alpaca_token_set_f1_sem": 0.005630725813805154, "eval_python_code_alpaca_token_set_precision": 0.5206289500177254, "eval_python_code_alpaca_token_set_recall": 0.49733067164037936, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1240 }, { "epoch": 5.46, "eval_wikibio_accuracy": 0.35940625, "eval_wikibio_bleu_score": 6.0215781318391235, "eval_wikibio_bleu_score_sem": 0.22700354104088374, "eval_wikibio_emb_cos_sim": 0.7536024451255798, "eval_wikibio_emb_cos_sim_sem": 0.005223740804270685, "eval_wikibio_emb_top1_equal": 0.20600000023841858, "eval_wikibio_emb_top1_equal_sem": 0.018104793612990725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4180655479431152, "eval_wikibio_n_ngrams_match_1": 9.248, "eval_wikibio_n_ngrams_match_2": 2.878, "eval_wikibio_n_ngrams_match_3": 1.05, "eval_wikibio_num_pred_words": 30.128, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 30.510337113397938, "eval_wikibio_pred_num_tokens": 53.328125, "eval_wikibio_rouge_score": 0.35471940693449555, "eval_wikibio_runtime": 7.3318, "eval_wikibio_samples_per_second": 68.196, "eval_wikibio_steps_per_second": 0.136, "eval_wikibio_token_set_f1": 0.3156909691627191, "eval_wikibio_token_set_f1_sem": 0.005623957716607399, "eval_wikibio_token_set_precision": 0.30678649564693816, "eval_wikibio_token_set_recall": 0.3410327489783421, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1240 }, { "epoch": 5.46, "eval_bias-bios_accuracy": 0.51178125, "eval_bias-bios_bleu_score": 17.90672965013963, "eval_bias-bios_bleu_score_sem": 0.8619788872673458, "eval_bias-bios_emb_cos_sim": 0.8670877814292908, "eval_bias-bios_emb_cos_sim_sem": 0.0034544596353609193, "eval_bias-bios_emb_top1_equal": 0.3100000023841858, "eval_bias-bios_emb_top1_equal_sem": 0.020704040896175106, "eval_bias-bios_exact_match": 0.004, "eval_bias-bios_exact_match_sem": 0.002825591608118863, "eval_bias-bios_loss": 1.797906756401062, "eval_bias-bios_n_ngrams_match_1": 19.478, "eval_bias-bios_n_ngrams_match_2": 8.804, "eval_bias-bios_n_ngrams_match_3": 5.016, "eval_bias-bios_num_pred_words": 32.024, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.036997323114941, "eval_bias-bios_pred_num_tokens": 43.1640625, "eval_bias-bios_rouge_score": 0.5462596720572078, "eval_bias-bios_runtime": 7.3889, "eval_bias-bios_samples_per_second": 67.669, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.554597593898957, "eval_bias-bios_token_set_f1_sem": 0.006986531889248037, "eval_bias-bios_token_set_precision": 0.4988897214671942, "eval_bias-bios_token_set_recall": 0.6389780375672935, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1240 }, { "epoch": 5.5, "learning_rate": 0.001, "loss": 2.0709, "step": 1248 }, { "epoch": 5.55, "learning_rate": 0.001, "loss": 2.0069, "step": 1260 }, { "epoch": 5.6, "eval_ag_news_accuracy": 0.3023125, "eval_ag_news_bleu_score": 4.57337402880596, "eval_ag_news_bleu_score_sem": 0.14544406424535272, "eval_ag_news_emb_cos_sim": 0.8080353140830994, "eval_ag_news_emb_cos_sim_sem": 0.004815184510541266, "eval_ag_news_emb_top1_equal": 0.27799999713897705, "eval_ag_news_emb_top1_equal_sem": 0.0200558347666307, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5642201900482178, "eval_ag_news_n_ngrams_match_1": 12.77, "eval_ag_news_n_ngrams_match_2": 2.6, "eval_ag_news_n_ngrams_match_3": 0.656, "eval_ag_news_num_pred_words": 37.444, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.31190608905573, "eval_ag_news_pred_num_tokens": 55.9453125, "eval_ag_news_rouge_score": 0.36077600319081815, "eval_ag_news_runtime": 7.4135, "eval_ag_news_samples_per_second": 67.445, "eval_ag_news_steps_per_second": 0.135, "eval_ag_news_token_set_f1": 0.34675509763818224, "eval_ag_news_token_set_f1_sem": 0.004742689533799806, "eval_ag_news_token_set_precision": 0.3147625166746689, "eval_ag_news_token_set_recall": 0.4019475532268016, "eval_ag_news_true_num_tokens": 56.09375, "step": 1271 }, { "epoch": 5.6, "eval_anthropic_toxic_prompts_accuracy": 0.106625, "eval_anthropic_toxic_prompts_bleu_score": 4.09428074697677, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14453180124028525, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6872526407241821, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004509919579468347, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.16599999368190765, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016656615375209204, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0059316158294678, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.926, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.77, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.64, "eval_anthropic_toxic_prompts_num_pred_words": 34.678, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.205030657050173, "eval_anthropic_toxic_prompts_pred_num_tokens": 50.5, "eval_anthropic_toxic_prompts_rouge_score": 0.2663162692915284, "eval_anthropic_toxic_prompts_runtime": 7.2083, "eval_anthropic_toxic_prompts_samples_per_second": 69.364, "eval_anthropic_toxic_prompts_steps_per_second": 0.139, "eval_anthropic_toxic_prompts_token_set_f1": 0.3440817895035503, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0057425914365728204, "eval_anthropic_toxic_prompts_token_set_precision": 0.4288524547204851, "eval_anthropic_toxic_prompts_token_set_recall": 0.31069896013840936, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1271 }, { "epoch": 5.6, "eval_arxiv_accuracy": 0.42084375, "eval_arxiv_bleu_score": 4.39097555111001, "eval_arxiv_bleu_score_sem": 0.11792424158293455, "eval_arxiv_emb_cos_sim": 0.7578997611999512, "eval_arxiv_emb_cos_sim_sem": 0.004245361370646482, "eval_arxiv_emb_top1_equal": 0.23000000417232513, "eval_arxiv_emb_top1_equal_sem": 0.018839050665941787, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0509696006774902, "eval_arxiv_n_ngrams_match_1": 15.062, "eval_arxiv_n_ngrams_match_2": 2.924, "eval_arxiv_n_ngrams_match_3": 0.646, "eval_arxiv_num_pred_words": 36.756, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.13582780354345, "eval_arxiv_pred_num_tokens": 58.8359375, "eval_arxiv_rouge_score": 0.3717338045573107, "eval_arxiv_runtime": 7.611, "eval_arxiv_samples_per_second": 65.695, "eval_arxiv_steps_per_second": 0.131, "eval_arxiv_token_set_f1": 0.37075290043526415, "eval_arxiv_token_set_f1_sem": 0.004148483552312404, "eval_arxiv_token_set_precision": 0.3164356851337472, "eval_arxiv_token_set_recall": 0.4646605842101968, "eval_arxiv_true_num_tokens": 64.0, "step": 1271 }, { "epoch": 5.6, "eval_python_code_alpaca_accuracy": 0.15196875, "eval_python_code_alpaca_bleu_score": 5.519793427598636, "eval_python_code_alpaca_bleu_score_sem": 0.17277299116506056, "eval_python_code_alpaca_emb_cos_sim": 0.7638775706291199, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003983409786157877, "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346172969186033, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.689002513885498, "eval_python_code_alpaca_n_ngrams_match_1": 9.65, "eval_python_code_alpaca_n_ngrams_match_2": 2.62, "eval_python_code_alpaca_n_ngrams_match_3": 0.794, "eval_python_code_alpaca_num_pred_words": 33.444, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.716988604683149, "eval_python_code_alpaca_pred_num_tokens": 52.4375, "eval_python_code_alpaca_rouge_score": 0.4092617957255468, "eval_python_code_alpaca_runtime": 7.0203, "eval_python_code_alpaca_samples_per_second": 71.222, "eval_python_code_alpaca_steps_per_second": 0.142, "eval_python_code_alpaca_token_set_f1": 0.478976922517982, "eval_python_code_alpaca_token_set_f1_sem": 0.005203506678295278, "eval_python_code_alpaca_token_set_precision": 0.5368041712791153, "eval_python_code_alpaca_token_set_recall": 0.45078687179454685, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1271 }, { "epoch": 5.6, "eval_wikibio_accuracy": 0.3555, "eval_wikibio_bleu_score": 5.2488485705282795, "eval_wikibio_bleu_score_sem": 0.21298491513749515, "eval_wikibio_emb_cos_sim": 0.71724534034729, "eval_wikibio_emb_cos_sim_sem": 0.0063786226583461125, "eval_wikibio_emb_top1_equal": 0.15000000596046448, "eval_wikibio_emb_top1_equal_sem": 0.01598471338779901, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4222168922424316, "eval_wikibio_n_ngrams_match_1": 8.358, "eval_wikibio_n_ngrams_match_2": 2.702, "eval_wikibio_n_ngrams_match_3": 1.016, "eval_wikibio_num_pred_words": 30.586, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 30.637259294001566, "eval_wikibio_pred_num_tokens": 61.0390625, "eval_wikibio_rouge_score": 0.30777700457295964, "eval_wikibio_runtime": 7.2697, "eval_wikibio_samples_per_second": 68.779, "eval_wikibio_steps_per_second": 0.138, "eval_wikibio_token_set_f1": 0.2778107394683041, "eval_wikibio_token_set_f1_sem": 0.006709734336985542, "eval_wikibio_token_set_precision": 0.27087812641060205, "eval_wikibio_token_set_recall": 0.31154529694026967, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1271 }, { "epoch": 5.6, "eval_bias-bios_accuracy": 0.5176875, "eval_bias-bios_bleu_score": 18.540615663972712, "eval_bias-bios_bleu_score_sem": 0.797121209731287, "eval_bias-bios_emb_cos_sim": 0.872423529624939, "eval_bias-bios_emb_cos_sim_sem": 0.0031692606803326844, "eval_bias-bios_emb_top1_equal": 0.3400000035762787, "eval_bias-bios_emb_top1_equal_sem": 0.021206117459812355, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7669774293899536, "eval_bias-bios_n_ngrams_match_1": 21.822, "eval_bias-bios_n_ngrams_match_2": 9.798, "eval_bias-bios_n_ngrams_match_3": 5.486, "eval_bias-bios_num_pred_words": 41.998, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.853135083419125, "eval_bias-bios_pred_num_tokens": 56.09375, "eval_bias-bios_rouge_score": 0.5323654174564787, "eval_bias-bios_runtime": 7.5551, "eval_bias-bios_samples_per_second": 66.181, "eval_bias-bios_steps_per_second": 0.132, "eval_bias-bios_token_set_f1": 0.5523502621363561, "eval_bias-bios_token_set_f1_sem": 0.006720164885935815, "eval_bias-bios_token_set_precision": 0.5282085906273399, "eval_bias-bios_token_set_recall": 0.5897559634360018, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1271 }, { "epoch": 5.6, "learning_rate": 0.001, "loss": 1.897, "step": 1272 }, { "epoch": 5.66, "learning_rate": 0.001, "loss": 1.6563, "step": 1284 }, { "epoch": 5.71, "learning_rate": 0.001, "loss": 2.0459, "step": 1296 }, { "epoch": 5.74, "eval_ag_news_accuracy": 0.3018125, "eval_ag_news_bleu_score": 4.580675351931296, "eval_ag_news_bleu_score_sem": 0.1390842938196433, "eval_ag_news_emb_cos_sim": 0.8149040937423706, "eval_ag_news_emb_cos_sim_sem": 0.0040050554638469404, "eval_ag_news_emb_top1_equal": 0.2840000092983246, "eval_ag_news_emb_top1_equal_sem": 0.020186705101045338, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.540907621383667, "eval_ag_news_n_ngrams_match_1": 13.236, "eval_ag_news_n_ngrams_match_2": 2.768, "eval_ag_news_n_ngrams_match_3": 0.722, "eval_ag_news_num_pred_words": 42.296, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.498216304560465, "eval_ag_news_pred_num_tokens": 61.8515625, "eval_ag_news_rouge_score": 0.3541570127113056, "eval_ag_news_runtime": 7.5701, "eval_ag_news_samples_per_second": 66.049, "eval_ag_news_steps_per_second": 0.132, "eval_ag_news_token_set_f1": 0.3461913907982923, "eval_ag_news_token_set_f1_sem": 0.004594991147194266, "eval_ag_news_token_set_precision": 0.32251028718278496, "eval_ag_news_token_set_recall": 0.39702258975572, "eval_ag_news_true_num_tokens": 56.09375, "step": 1302 }, { "epoch": 5.74, "eval_anthropic_toxic_prompts_accuracy": 0.10628125, "eval_anthropic_toxic_prompts_bleu_score": 3.4424543830075285, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12195139246443613, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.684529185295105, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004560404391670038, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.11599999666213989, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01433523564539069, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1139750480651855, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.188, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.886, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, "eval_anthropic_toxic_prompts_num_pred_words": 42.012, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.510346502395596, "eval_anthropic_toxic_prompts_pred_num_tokens": 60.1953125, "eval_anthropic_toxic_prompts_rouge_score": 0.23978661530396744, "eval_anthropic_toxic_prompts_runtime": 7.1684, "eval_anthropic_toxic_prompts_samples_per_second": 69.75, "eval_anthropic_toxic_prompts_steps_per_second": 0.14, "eval_anthropic_toxic_prompts_token_set_f1": 0.3326568724343772, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00553262305039052, "eval_anthropic_toxic_prompts_token_set_precision": 0.44904456927897024, "eval_anthropic_toxic_prompts_token_set_recall": 0.2878560045999485, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1302 }, { "epoch": 5.74, "eval_arxiv_accuracy": 0.42740625, "eval_arxiv_bleu_score": 4.420526084563949, "eval_arxiv_bleu_score_sem": 0.12220117044334794, "eval_arxiv_emb_cos_sim": 0.7494902610778809, "eval_arxiv_emb_cos_sim_sem": 0.004779517106507803, "eval_arxiv_emb_top1_equal": 0.25600001215934753, "eval_arxiv_emb_top1_equal_sem": 0.019536923601457774, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.009330987930298, "eval_arxiv_n_ngrams_match_1": 15.288, "eval_arxiv_n_ngrams_match_2": 2.898, "eval_arxiv_n_ngrams_match_3": 0.646, "eval_arxiv_num_pred_words": 39.268, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.273831948904096, "eval_arxiv_pred_num_tokens": 62.5625, "eval_arxiv_rouge_score": 0.362846579847411, "eval_arxiv_runtime": 8.3104, "eval_arxiv_samples_per_second": 60.165, "eval_arxiv_steps_per_second": 0.12, "eval_arxiv_token_set_f1": 0.36629274140253887, "eval_arxiv_token_set_f1_sem": 0.0044614996118577475, "eval_arxiv_token_set_precision": 0.317322733552626, "eval_arxiv_token_set_recall": 0.452008425847499, "eval_arxiv_true_num_tokens": 64.0, "step": 1302 }, { "epoch": 5.74, "eval_python_code_alpaca_accuracy": 0.153, "eval_python_code_alpaca_bleu_score": 5.177949737443402, "eval_python_code_alpaca_bleu_score_sem": 0.1580344328908884, "eval_python_code_alpaca_emb_cos_sim": 0.769737720489502, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0035329592450433463, "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346174301986407, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.746436834335327, "eval_python_code_alpaca_n_ngrams_match_1": 9.89, "eval_python_code_alpaca_n_ngrams_match_2": 2.796, "eval_python_code_alpaca_n_ngrams_match_3": 0.946, "eval_python_code_alpaca_num_pred_words": 38.078, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.586993778113719, "eval_python_code_alpaca_pred_num_tokens": 59.9921875, "eval_python_code_alpaca_rouge_score": 0.391759995715714, "eval_python_code_alpaca_runtime": 7.2752, "eval_python_code_alpaca_samples_per_second": 68.726, "eval_python_code_alpaca_steps_per_second": 0.137, "eval_python_code_alpaca_token_set_f1": 0.46521206467444476, "eval_python_code_alpaca_token_set_f1_sem": 0.005278026580205972, "eval_python_code_alpaca_token_set_precision": 0.5487120124810396, "eval_python_code_alpaca_token_set_recall": 0.4198012272368391, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1302 }, { "epoch": 5.74, "eval_wikibio_accuracy": 0.36684375, "eval_wikibio_bleu_score": 5.070507207255898, "eval_wikibio_bleu_score_sem": 0.20825270698295933, "eval_wikibio_emb_cos_sim": 0.7176789045333862, "eval_wikibio_emb_cos_sim_sem": 0.0064573131916822474, "eval_wikibio_emb_top1_equal": 0.18000000715255737, "eval_wikibio_emb_top1_equal_sem": 0.017198591983670585, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3135361671447754, "eval_wikibio_n_ngrams_match_1": 8.63, "eval_wikibio_n_ngrams_match_2": 2.738, "eval_wikibio_n_ngrams_match_3": 0.996, "eval_wikibio_num_pred_words": 32.56, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.48213527320049, "eval_wikibio_pred_num_tokens": 62.9375, "eval_wikibio_rouge_score": 0.30395403219075934, "eval_wikibio_runtime": 7.4136, "eval_wikibio_samples_per_second": 67.444, "eval_wikibio_steps_per_second": 0.135, "eval_wikibio_token_set_f1": 0.2788986587618517, "eval_wikibio_token_set_f1_sem": 0.006604972211474385, "eval_wikibio_token_set_precision": 0.27549524981074697, "eval_wikibio_token_set_recall": 0.3093327647420698, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1302 }, { "epoch": 5.74, "eval_bias-bios_accuracy": 0.512875, "eval_bias-bios_bleu_score": 17.587711221111313, "eval_bias-bios_bleu_score_sem": 0.7582759478456785, "eval_bias-bios_emb_cos_sim": 0.8744062781333923, "eval_bias-bios_emb_cos_sim_sem": 0.003047957520209353, "eval_bias-bios_emb_top1_equal": 0.35199999809265137, "eval_bias-bios_emb_top1_equal_sem": 0.02138004257753857, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7857671976089478, "eval_bias-bios_n_ngrams_match_1": 22.486, "eval_bias-bios_n_ngrams_match_2": 10.062, "eval_bias-bios_n_ngrams_match_3": 5.588, "eval_bias-bios_num_pred_words": 46.814, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.964153877398367, "eval_bias-bios_pred_num_tokens": 61.7265625, "eval_bias-bios_rouge_score": 0.5167177720702403, "eval_bias-bios_runtime": 7.5771, "eval_bias-bios_samples_per_second": 65.989, "eval_bias-bios_steps_per_second": 0.132, "eval_bias-bios_token_set_f1": 0.5499504893910986, "eval_bias-bios_token_set_f1_sem": 0.006585196715222386, "eval_bias-bios_token_set_precision": 0.541049468479298, "eval_bias-bios_token_set_recall": 0.5705891060146241, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1302 }, { "epoch": 5.76, "learning_rate": 0.001, "loss": 1.9889, "step": 1308 }, { "epoch": 5.81, "learning_rate": 0.001, "loss": 1.921, "step": 1320 }, { "epoch": 5.87, "learning_rate": 0.001, "loss": 1.6839, "step": 1332 }, { "epoch": 5.87, "eval_ag_news_accuracy": 0.30190625, "eval_ag_news_bleu_score": 4.510702202082297, "eval_ag_news_bleu_score_sem": 0.1562655783341779, "eval_ag_news_emb_cos_sim": 0.8097511529922485, "eval_ag_news_emb_cos_sim_sem": 0.004043597051888928, "eval_ag_news_emb_top1_equal": 0.2800000011920929, "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6359164714813232, "eval_ag_news_n_ngrams_match_1": 11.704, "eval_ag_news_n_ngrams_match_2": 2.412, "eval_ag_news_n_ngrams_match_3": 0.638, "eval_ag_news_num_pred_words": 30.68, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.93660478653705, "eval_ag_news_pred_num_tokens": 46.9609375, "eval_ag_news_rouge_score": 0.3586267610476457, "eval_ag_news_runtime": 7.3684, "eval_ag_news_samples_per_second": 67.858, "eval_ag_news_steps_per_second": 0.136, "eval_ag_news_token_set_f1": 0.3445078990006429, "eval_ag_news_token_set_f1_sem": 0.004640395360757867, "eval_ag_news_token_set_precision": 0.2995115498991181, "eval_ag_news_token_set_recall": 0.4238478297632044, "eval_ag_news_true_num_tokens": 56.09375, "step": 1333 }, { "epoch": 5.87, "eval_anthropic_toxic_prompts_accuracy": 0.10834375, "eval_anthropic_toxic_prompts_bleu_score": 5.335873092259973, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.200454930491489, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6999314427375793, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004881605616825935, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15000000596046448, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01598471338779901, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.979020833969116, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.722, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.698, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.624, "eval_anthropic_toxic_prompts_num_pred_words": 26.368, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 19.668548438407626, "eval_anthropic_toxic_prompts_pred_num_tokens": 37.5859375, "eval_anthropic_toxic_prompts_rouge_score": 0.310660466066935, "eval_anthropic_toxic_prompts_runtime": 6.9444, "eval_anthropic_toxic_prompts_samples_per_second": 72.0, "eval_anthropic_toxic_prompts_steps_per_second": 0.144, "eval_anthropic_toxic_prompts_token_set_f1": 0.34998396653391634, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006147953719054929, "eval_anthropic_toxic_prompts_token_set_precision": 0.42293888028152654, "eval_anthropic_toxic_prompts_token_set_recall": 0.32233621200100004, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1333 }, { "epoch": 5.87, "eval_arxiv_accuracy": 0.41125, "eval_arxiv_bleu_score": 3.840186807066469, "eval_arxiv_bleu_score_sem": 0.10961283946182737, "eval_arxiv_emb_cos_sim": 0.7524436116218567, "eval_arxiv_emb_cos_sim_sem": 0.0045383505438661535, "eval_arxiv_emb_top1_equal": 0.15399999916553497, "eval_arxiv_emb_top1_equal_sem": 0.016158283980625493, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.1682863235473633, "eval_arxiv_n_ngrams_match_1": 13.868, "eval_arxiv_n_ngrams_match_2": 2.616, "eval_arxiv_n_ngrams_match_3": 0.574, "eval_arxiv_num_pred_words": 30.132, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 23.76672096869516, "eval_arxiv_pred_num_tokens": 49.046875, "eval_arxiv_rouge_score": 0.3729047020927051, "eval_arxiv_runtime": 7.256, "eval_arxiv_samples_per_second": 68.909, "eval_arxiv_steps_per_second": 0.138, "eval_arxiv_token_set_f1": 0.37123502434387323, "eval_arxiv_token_set_f1_sem": 0.004258247789180813, "eval_arxiv_token_set_precision": 0.3095713620207935, "eval_arxiv_token_set_recall": 0.4767404887052051, "eval_arxiv_true_num_tokens": 64.0, "step": 1333 }, { "epoch": 5.87, "eval_python_code_alpaca_accuracy": 0.158, "eval_python_code_alpaca_bleu_score": 7.187326839919982, "eval_python_code_alpaca_bleu_score_sem": 0.231031260944302, "eval_python_code_alpaca_emb_cos_sim": 0.7905387282371521, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0032995285889778547, "eval_python_code_alpaca_emb_top1_equal": 0.20999999344348907, "eval_python_code_alpaca_emb_top1_equal_sem": 0.0182336207644306, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.565624713897705, "eval_python_code_alpaca_n_ngrams_match_1": 9.234, "eval_python_code_alpaca_n_ngrams_match_2": 2.432, "eval_python_code_alpaca_n_ngrams_match_3": 0.782, "eval_python_code_alpaca_num_pred_words": 25.644, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.008782599028763, "eval_python_code_alpaca_pred_num_tokens": 38.703125, "eval_python_code_alpaca_rouge_score": 0.46299269220710015, "eval_python_code_alpaca_runtime": 6.9473, "eval_python_code_alpaca_samples_per_second": 71.971, "eval_python_code_alpaca_steps_per_second": 0.144, "eval_python_code_alpaca_token_set_f1": 0.4906910225784128, "eval_python_code_alpaca_token_set_f1_sem": 0.005492782455485606, "eval_python_code_alpaca_token_set_precision": 0.5259909652905059, "eval_python_code_alpaca_token_set_recall": 0.47641560708165553, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1333 }, { "epoch": 5.87, "eval_wikibio_accuracy": 0.348375, "eval_wikibio_bleu_score": 6.015703013618571, "eval_wikibio_bleu_score_sem": 0.24475698963490608, "eval_wikibio_emb_cos_sim": 0.7452252507209778, "eval_wikibio_emb_cos_sim_sem": 0.005492733573140019, "eval_wikibio_emb_top1_equal": 0.18000000715255737, "eval_wikibio_emb_top1_equal_sem": 0.017198591983670585, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.548689126968384, "eval_wikibio_n_ngrams_match_1": 8.922, "eval_wikibio_n_ngrams_match_2": 2.732, "eval_wikibio_n_ngrams_match_3": 0.974, "eval_wikibio_num_pred_words": 28.69, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 34.76771154689603, "eval_wikibio_pred_num_tokens": 51.3984375, "eval_wikibio_rouge_score": 0.3529826805446006, "eval_wikibio_runtime": 7.288, "eval_wikibio_samples_per_second": 68.606, "eval_wikibio_steps_per_second": 0.137, "eval_wikibio_token_set_f1": 0.30786314677145327, "eval_wikibio_token_set_f1_sem": 0.005744181551871143, "eval_wikibio_token_set_precision": 0.2945685099531984, "eval_wikibio_token_set_recall": 0.34009161709370883, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1333 }, { "epoch": 5.87, "eval_bias-bios_accuracy": 0.51409375, "eval_bias-bios_bleu_score": 18.601895243693658, "eval_bias-bios_bleu_score_sem": 0.8395960259213998, "eval_bias-bios_emb_cos_sim": 0.8698825836181641, "eval_bias-bios_emb_cos_sim_sem": 0.003303766227769929, "eval_bias-bios_emb_top1_equal": 0.3179999887943268, "eval_bias-bios_emb_top1_equal_sem": 0.02084757283415153, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.8061383962631226, "eval_bias-bios_n_ngrams_match_1": 20.428, "eval_bias-bios_n_ngrams_match_2": 9.206, "eval_bias-bios_n_ngrams_match_3": 5.208, "eval_bias-bios_num_pred_words": 34.672, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.086896806450112, "eval_bias-bios_pred_num_tokens": 47.1875, "eval_bias-bios_rouge_score": 0.547141850002661, "eval_bias-bios_runtime": 7.4016, "eval_bias-bios_samples_per_second": 67.553, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.554887604686961, "eval_bias-bios_token_set_f1_sem": 0.0068598685232604895, "eval_bias-bios_token_set_precision": 0.5098140838716572, "eval_bias-bios_token_set_recall": 0.6214830682766334, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1333 }, { "epoch": 5.92, "learning_rate": 0.001, "loss": 1.9777, "step": 1344 }, { "epoch": 5.97, "learning_rate": 0.001, "loss": 1.9223, "step": 1356 }, { "epoch": 6.01, "eval_ag_news_accuracy": 0.30090625, "eval_ag_news_bleu_score": 4.229521606856611, "eval_ag_news_bleu_score_sem": 0.16599247987237792, "eval_ag_news_emb_cos_sim": 0.793785810470581, "eval_ag_news_emb_cos_sim_sem": 0.00425375801300851, "eval_ag_news_emb_top1_equal": 0.23000000417232513, "eval_ag_news_emb_top1_equal_sem": 0.018839050665941787, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6538124084472656, "eval_ag_news_n_ngrams_match_1": 10.808, "eval_ag_news_n_ngrams_match_2": 2.226, "eval_ag_news_n_ngrams_match_3": 0.64, "eval_ag_news_num_pred_words": 27.13, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.62162715075604, "eval_ag_news_pred_num_tokens": 39.921875, "eval_ag_news_rouge_score": 0.34952438531194774, "eval_ag_news_runtime": 7.4075, "eval_ag_news_samples_per_second": 67.499, "eval_ag_news_steps_per_second": 0.135, "eval_ag_news_token_set_f1": 0.33620576435916294, "eval_ag_news_token_set_f1_sem": 0.0046712207071122, "eval_ag_news_token_set_precision": 0.2825569694260224, "eval_ag_news_token_set_recall": 0.43444870854553147, "eval_ag_news_true_num_tokens": 56.09375, "step": 1364 }, { "epoch": 6.01, "eval_anthropic_toxic_prompts_accuracy": 0.11184375, "eval_anthropic_toxic_prompts_bleu_score": 6.414028894652578, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.23846858566105303, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7042384147644043, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004670806244326763, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016328049428381567, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.9145073890686035, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.608, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.684, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.61, "eval_anthropic_toxic_prompts_num_pred_words": 21.97, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 18.43972655416395, "eval_anthropic_toxic_prompts_pred_num_tokens": 30.3046875, "eval_anthropic_toxic_prompts_rouge_score": 0.34220573788923403, "eval_anthropic_toxic_prompts_runtime": 7.0386, "eval_anthropic_toxic_prompts_samples_per_second": 71.037, "eval_anthropic_toxic_prompts_steps_per_second": 0.142, "eval_anthropic_toxic_prompts_token_set_f1": 0.3608022356241281, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006174602651222466, "eval_anthropic_toxic_prompts_token_set_precision": 0.41874715816277824, "eval_anthropic_toxic_prompts_token_set_recall": 0.3403751793504618, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1364 }, { "epoch": 6.01, "eval_arxiv_accuracy": 0.4129375, "eval_arxiv_bleu_score": 3.5049575512352105, "eval_arxiv_bleu_score_sem": 0.10327675713543369, "eval_arxiv_emb_cos_sim": 0.746076226234436, "eval_arxiv_emb_cos_sim_sem": 0.003951010408643731, "eval_arxiv_emb_top1_equal": 0.16599999368190765, "eval_arxiv_emb_top1_equal_sem": 0.016656615375209204, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.1462719440460205, "eval_arxiv_n_ngrams_match_1": 13.094, "eval_arxiv_n_ngrams_match_2": 2.47, "eval_arxiv_n_ngrams_match_3": 0.534, "eval_arxiv_num_pred_words": 27.968, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 23.24922839209836, "eval_arxiv_pred_num_tokens": 44.546875, "eval_arxiv_rouge_score": 0.36512357189825984, "eval_arxiv_runtime": 7.3317, "eval_arxiv_samples_per_second": 68.197, "eval_arxiv_steps_per_second": 0.136, "eval_arxiv_token_set_f1": 0.36415789710214524, "eval_arxiv_token_set_f1_sem": 0.003994316825023978, "eval_arxiv_token_set_precision": 0.2976549374135428, "eval_arxiv_token_set_recall": 0.48209242048316353, "eval_arxiv_true_num_tokens": 64.0, "step": 1364 }, { "epoch": 6.01, "eval_python_code_alpaca_accuracy": 0.16503125, "eval_python_code_alpaca_bleu_score": 8.367728619471684, "eval_python_code_alpaca_bleu_score_sem": 0.28630724669936525, "eval_python_code_alpaca_emb_cos_sim": 0.7989581823348999, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036343343739607984, "eval_python_code_alpaca_emb_top1_equal": 0.23000000417232513, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018839050665941787, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.4538772106170654, "eval_python_code_alpaca_n_ngrams_match_1": 8.802, "eval_python_code_alpaca_n_ngrams_match_2": 2.304, "eval_python_code_alpaca_n_ngrams_match_3": 0.732, "eval_python_code_alpaca_num_pred_words": 20.898, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.633364395268169, "eval_python_code_alpaca_pred_num_tokens": 31.390625, "eval_python_code_alpaca_rouge_score": 0.49621776374006266, "eval_python_code_alpaca_runtime": 7.0134, "eval_python_code_alpaca_samples_per_second": 71.292, "eval_python_code_alpaca_steps_per_second": 0.143, "eval_python_code_alpaca_token_set_f1": 0.5008596100567906, "eval_python_code_alpaca_token_set_f1_sem": 0.005744874789612027, "eval_python_code_alpaca_token_set_precision": 0.5180000103019801, "eval_python_code_alpaca_token_set_recall": 0.5024754775785634, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1364 }, { "epoch": 6.01, "eval_wikibio_accuracy": 0.3526875, "eval_wikibio_bleu_score": 6.186516290284805, "eval_wikibio_bleu_score_sem": 0.23684423168356836, "eval_wikibio_emb_cos_sim": 0.7449456453323364, "eval_wikibio_emb_cos_sim_sem": 0.004893123011265849, "eval_wikibio_emb_top1_equal": 0.17399999499320984, "eval_wikibio_emb_top1_equal_sem": 0.016971270884523753, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5559232234954834, "eval_wikibio_n_ngrams_match_1": 9.038, "eval_wikibio_n_ngrams_match_2": 2.814, "eval_wikibio_n_ngrams_match_3": 1.056, "eval_wikibio_num_pred_words": 28.844, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 35.02013646052084, "eval_wikibio_pred_num_tokens": 51.984375, "eval_wikibio_rouge_score": 0.3573138234309702, "eval_wikibio_runtime": 7.3573, "eval_wikibio_samples_per_second": 67.96, "eval_wikibio_steps_per_second": 0.136, "eval_wikibio_token_set_f1": 0.3157430502946985, "eval_wikibio_token_set_f1_sem": 0.0055280111079246685, "eval_wikibio_token_set_precision": 0.3016019113436564, "eval_wikibio_token_set_recall": 0.3466330422570567, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1364 }, { "epoch": 6.01, "eval_bias-bios_accuracy": 0.50953125, "eval_bias-bios_bleu_score": 17.60533299468726, "eval_bias-bios_bleu_score_sem": 0.8486476305948097, "eval_bias-bios_emb_cos_sim": 0.8615785241127014, "eval_bias-bios_emb_cos_sim_sem": 0.0031654218820528065, "eval_bias-bios_emb_top1_equal": 0.2720000147819519, "eval_bias-bios_emb_top1_equal_sem": 0.019920483557355567, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.8134593963623047, "eval_bias-bios_n_ngrams_match_1": 18.908, "eval_bias-bios_n_ngrams_match_2": 8.648, "eval_bias-bios_n_ngrams_match_3": 4.96, "eval_bias-bios_num_pred_words": 31.066, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 6.131622497203516, "eval_bias-bios_pred_num_tokens": 41.2578125, "eval_bias-bios_rouge_score": 0.5365375885706414, "eval_bias-bios_runtime": 7.4354, "eval_bias-bios_samples_per_second": 67.246, "eval_bias-bios_steps_per_second": 0.134, "eval_bias-bios_token_set_f1": 0.5439127466893935, "eval_bias-bios_token_set_f1_sem": 0.00694620487083124, "eval_bias-bios_token_set_precision": 0.48595299762233435, "eval_bias-bios_token_set_recall": 0.6326538885690199, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1364 }, { "epoch": 6.03, "learning_rate": 0.001, "loss": 1.8676, "step": 1368 }, { "epoch": 6.08, "learning_rate": 0.001, "loss": 1.9923, "step": 1380 }, { "epoch": 6.13, "learning_rate": 0.001, "loss": 1.9533, "step": 1392 }, { "epoch": 6.15, "eval_ag_news_accuracy": 0.3015625, "eval_ag_news_bleu_score": 4.958398698029417, "eval_ag_news_bleu_score_sem": 0.163501632207498, "eval_ag_news_emb_cos_sim": 0.8198105096817017, "eval_ag_news_emb_cos_sim_sem": 0.003713689308283834, "eval_ag_news_emb_top1_equal": 0.28600001335144043, "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.560136079788208, "eval_ag_news_n_ngrams_match_1": 13.476, "eval_ag_news_n_ngrams_match_2": 2.88, "eval_ag_news_n_ngrams_match_3": 0.832, "eval_ag_news_num_pred_words": 41.864, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.16798247111252, "eval_ag_news_pred_num_tokens": 61.4453125, "eval_ag_news_rouge_score": 0.36747310775866715, "eval_ag_news_runtime": 8.0532, "eval_ag_news_samples_per_second": 62.087, "eval_ag_news_steps_per_second": 0.124, "eval_ag_news_token_set_f1": 0.35342223201297795, "eval_ag_news_token_set_f1_sem": 0.004599465585950473, "eval_ag_news_token_set_precision": 0.3303619830838599, "eval_ag_news_token_set_recall": 0.4019049295022457, "eval_ag_news_true_num_tokens": 56.09375, "step": 1395 }, { "epoch": 6.15, "eval_anthropic_toxic_prompts_accuracy": 0.1064375, "eval_anthropic_toxic_prompts_bleu_score": 3.6235744911046486, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.127329776218279, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6942067742347717, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004392782753317032, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1379999965429306, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015439843269723665, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1174263954162598, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.19, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.742, "eval_anthropic_toxic_prompts_num_pred_words": 41.358, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.588171750897825, "eval_anthropic_toxic_prompts_pred_num_tokens": 58.4921875, "eval_anthropic_toxic_prompts_rouge_score": 0.24408343271962318, "eval_anthropic_toxic_prompts_runtime": 7.6259, "eval_anthropic_toxic_prompts_samples_per_second": 65.566, "eval_anthropic_toxic_prompts_steps_per_second": 0.131, "eval_anthropic_toxic_prompts_token_set_f1": 0.33171928516895843, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005368515808050288, "eval_anthropic_toxic_prompts_token_set_precision": 0.451752766617253, "eval_anthropic_toxic_prompts_token_set_recall": 0.283778100154781, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1395 }, { "epoch": 6.15, "eval_arxiv_accuracy": 0.425, "eval_arxiv_bleu_score": 4.534491599177458, "eval_arxiv_bleu_score_sem": 0.12468185772288573, "eval_arxiv_emb_cos_sim": 0.7561259865760803, "eval_arxiv_emb_cos_sim_sem": 0.004883616146191509, "eval_arxiv_emb_top1_equal": 0.2460000067949295, "eval_arxiv_emb_top1_equal_sem": 0.019279819745132862, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.026859760284424, "eval_arxiv_n_ngrams_match_1": 15.424, "eval_arxiv_n_ngrams_match_2": 3.03, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 38.806, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.63234025967113, "eval_arxiv_pred_num_tokens": 62.359375, "eval_arxiv_rouge_score": 0.36722323683211977, "eval_arxiv_runtime": 7.4273, "eval_arxiv_samples_per_second": 67.319, "eval_arxiv_steps_per_second": 0.135, "eval_arxiv_token_set_f1": 0.3709845887261192, "eval_arxiv_token_set_f1_sem": 0.004405283616695815, "eval_arxiv_token_set_precision": 0.3211236842225695, "eval_arxiv_token_set_recall": 0.45768708574331657, "eval_arxiv_true_num_tokens": 64.0, "step": 1395 }, { "epoch": 6.15, "eval_python_code_alpaca_accuracy": 0.1545625, "eval_python_code_alpaca_bleu_score": 5.5736478407264585, "eval_python_code_alpaca_bleu_score_sem": 0.17406260818897018, "eval_python_code_alpaca_emb_cos_sim": 0.7750619053840637, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003336053983252675, "eval_python_code_alpaca_emb_top1_equal": 0.16200000047683716, "eval_python_code_alpaca_emb_top1_equal_sem": 0.016494124351899474, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7529311180114746, "eval_python_code_alpaca_n_ngrams_match_1": 10.01, "eval_python_code_alpaca_n_ngrams_match_2": 2.986, "eval_python_code_alpaca_n_ngrams_match_3": 1.014, "eval_python_code_alpaca_num_pred_words": 37.058, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.68854954641924, "eval_python_code_alpaca_pred_num_tokens": 58.078125, "eval_python_code_alpaca_rouge_score": 0.4001777824628884, "eval_python_code_alpaca_runtime": 7.5451, "eval_python_code_alpaca_samples_per_second": 66.268, "eval_python_code_alpaca_steps_per_second": 0.133, "eval_python_code_alpaca_token_set_f1": 0.4776881355635347, "eval_python_code_alpaca_token_set_f1_sem": 0.005200739185162961, "eval_python_code_alpaca_token_set_precision": 0.5604280670828278, "eval_python_code_alpaca_token_set_recall": 0.4339519068232182, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1395 }, { "epoch": 6.15, "eval_wikibio_accuracy": 0.36215625, "eval_wikibio_bleu_score": 4.99687688158487, "eval_wikibio_bleu_score_sem": 0.21497068371007713, "eval_wikibio_emb_cos_sim": 0.7186086773872375, "eval_wikibio_emb_cos_sim_sem": 0.00626282829537244, "eval_wikibio_emb_top1_equal": 0.17000000178813934, "eval_wikibio_emb_top1_equal_sem": 0.016815633120741882, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3449625968933105, "eval_wikibio_n_ngrams_match_1": 8.356, "eval_wikibio_n_ngrams_match_2": 2.622, "eval_wikibio_n_ngrams_match_3": 0.98, "eval_wikibio_num_pred_words": 31.684, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.35951491285399, "eval_wikibio_pred_num_tokens": 62.859375, "eval_wikibio_rouge_score": 0.30098109064809037, "eval_wikibio_runtime": 7.3137, "eval_wikibio_samples_per_second": 68.365, "eval_wikibio_steps_per_second": 0.137, "eval_wikibio_token_set_f1": 0.2726735406063593, "eval_wikibio_token_set_f1_sem": 0.006712610493410692, "eval_wikibio_token_set_precision": 0.26585764155408015, "eval_wikibio_token_set_recall": 0.30445040567456516, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1395 }, { "epoch": 6.15, "eval_bias-bios_accuracy": 0.515375, "eval_bias-bios_bleu_score": 18.27200721878929, "eval_bias-bios_bleu_score_sem": 0.7860741507629497, "eval_bias-bios_emb_cos_sim": 0.8807288408279419, "eval_bias-bios_emb_cos_sim_sem": 0.0026622839095169744, "eval_bias-bios_emb_top1_equal": 0.3580000102519989, "eval_bias-bios_emb_top1_equal_sem": 0.021461435363634866, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7732970714569092, "eval_bias-bios_n_ngrams_match_1": 22.77, "eval_bias-bios_n_ngrams_match_2": 10.378, "eval_bias-bios_n_ngrams_match_3": 5.788, "eval_bias-bios_num_pred_words": 46.242, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.890241929611657, "eval_bias-bios_pred_num_tokens": 60.8984375, "eval_bias-bios_rouge_score": 0.5304071182171766, "eval_bias-bios_runtime": 8.3616, "eval_bias-bios_samples_per_second": 59.797, "eval_bias-bios_steps_per_second": 0.12, "eval_bias-bios_token_set_f1": 0.5578896663828681, "eval_bias-bios_token_set_f1_sem": 0.006577120420087825, "eval_bias-bios_token_set_precision": 0.5450745752720823, "eval_bias-bios_token_set_recall": 0.5807688151511985, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1395 }, { "epoch": 6.19, "learning_rate": 0.001, "loss": 1.7614, "step": 1404 }, { "epoch": 6.24, "learning_rate": 0.001, "loss": 1.7616, "step": 1416 }, { "epoch": 6.28, "eval_ag_news_accuracy": 0.30121875, "eval_ag_news_bleu_score": 4.759509818524586, "eval_ag_news_bleu_score_sem": 0.1544282222876058, "eval_ag_news_emb_cos_sim": 0.8108397126197815, "eval_ag_news_emb_cos_sim_sem": 0.004678499834491018, "eval_ag_news_emb_top1_equal": 0.2540000081062317, "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.532440185546875, "eval_ag_news_n_ngrams_match_1": 13.188, "eval_ag_news_n_ngrams_match_2": 2.724, "eval_ag_news_n_ngrams_match_3": 0.712, "eval_ag_news_num_pred_words": 39.954, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.20733810569427, "eval_ag_news_pred_num_tokens": 60.5546875, "eval_ag_news_rouge_score": 0.3631419565768096, "eval_ag_news_runtime": 7.4938, "eval_ag_news_samples_per_second": 66.722, "eval_ag_news_steps_per_second": 0.133, "eval_ag_news_token_set_f1": 0.3508523169951062, "eval_ag_news_token_set_f1_sem": 0.004783773326326581, "eval_ag_news_token_set_precision": 0.32298989144493473, "eval_ag_news_token_set_recall": 0.4043442158097807, "eval_ag_news_true_num_tokens": 56.09375, "step": 1426 }, { "epoch": 6.28, "eval_anthropic_toxic_prompts_accuracy": 0.106375, "eval_anthropic_toxic_prompts_bleu_score": 3.8482111778361916, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13929177545922003, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6884832978248596, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004413079303826577, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12800000607967377, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014955912783191019, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0690646171569824, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.074, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.9, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722, "eval_anthropic_toxic_prompts_num_pred_words": 38.91, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 21.521762169844234, "eval_anthropic_toxic_prompts_pred_num_tokens": 56.6875, "eval_anthropic_toxic_prompts_rouge_score": 0.2507626807826677, "eval_anthropic_toxic_prompts_runtime": 6.9858, "eval_anthropic_toxic_prompts_samples_per_second": 71.574, "eval_anthropic_toxic_prompts_steps_per_second": 0.143, "eval_anthropic_toxic_prompts_token_set_f1": 0.34025409496271963, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00581995043985978, "eval_anthropic_toxic_prompts_token_set_precision": 0.4401520663488164, "eval_anthropic_toxic_prompts_token_set_recall": 0.29992367690094496, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1426 }, { "epoch": 6.28, "eval_arxiv_accuracy": 0.425625, "eval_arxiv_bleu_score": 4.292847318437543, "eval_arxiv_bleu_score_sem": 0.12571980529092805, "eval_arxiv_emb_cos_sim": 0.7370375394821167, "eval_arxiv_emb_cos_sim_sem": 0.005921472129746834, "eval_arxiv_emb_top1_equal": 0.23800000548362732, "eval_arxiv_emb_top1_equal_sem": 0.019064072684441876, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.01349139213562, "eval_arxiv_n_ngrams_match_1": 14.782, "eval_arxiv_n_ngrams_match_2": 2.812, "eval_arxiv_n_ngrams_match_3": 0.638, "eval_arxiv_num_pred_words": 36.926, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.35835498768817, "eval_arxiv_pred_num_tokens": 61.0078125, "eval_arxiv_rouge_score": 0.3582139659097523, "eval_arxiv_runtime": 7.4003, "eval_arxiv_samples_per_second": 67.565, "eval_arxiv_steps_per_second": 0.135, "eval_arxiv_token_set_f1": 0.3634346970856803, "eval_arxiv_token_set_f1_sem": 0.004765512360675474, "eval_arxiv_token_set_precision": 0.31038220900070124, "eval_arxiv_token_set_recall": 0.4631410541105638, "eval_arxiv_true_num_tokens": 64.0, "step": 1426 }, { "epoch": 6.28, "eval_python_code_alpaca_accuracy": 0.15415625, "eval_python_code_alpaca_bleu_score": 5.5198433610264175, "eval_python_code_alpaca_bleu_score_sem": 0.17136673825122412, "eval_python_code_alpaca_emb_cos_sim": 0.765462338924408, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004074766921057206, "eval_python_code_alpaca_emb_top1_equal": 0.18199999630451202, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017272772986938162, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.6881864070892334, "eval_python_code_alpaca_n_ngrams_match_1": 9.646, "eval_python_code_alpaca_n_ngrams_match_2": 2.738, "eval_python_code_alpaca_n_ngrams_match_3": 0.916, "eval_python_code_alpaca_num_pred_words": 35.164, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.704982869909538, "eval_python_code_alpaca_pred_num_tokens": 55.21875, "eval_python_code_alpaca_rouge_score": 0.4021857778620161, "eval_python_code_alpaca_runtime": 7.1266, "eval_python_code_alpaca_samples_per_second": 70.16, "eval_python_code_alpaca_steps_per_second": 0.14, "eval_python_code_alpaca_token_set_f1": 0.4768254189100492, "eval_python_code_alpaca_token_set_f1_sem": 0.00559437213796573, "eval_python_code_alpaca_token_set_precision": 0.5371631900196111, "eval_python_code_alpaca_token_set_recall": 0.4464693605196623, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1426 }, { "epoch": 6.28, "eval_wikibio_accuracy": 0.37003125, "eval_wikibio_bleu_score": 5.059556992142152, "eval_wikibio_bleu_score_sem": 0.21789251518816838, "eval_wikibio_emb_cos_sim": 0.7171035408973694, "eval_wikibio_emb_cos_sim_sem": 0.006326739405351032, "eval_wikibio_emb_top1_equal": 0.15000000596046448, "eval_wikibio_emb_top1_equal_sem": 0.015984712054998636, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.315477132797241, "eval_wikibio_n_ngrams_match_1": 8.372, "eval_wikibio_n_ngrams_match_2": 2.636, "eval_wikibio_n_ngrams_match_3": 0.996, "eval_wikibio_num_pred_words": 31.736, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.535528954710415, "eval_wikibio_pred_num_tokens": 62.8125, "eval_wikibio_rouge_score": 0.30072556489442737, "eval_wikibio_runtime": 7.042, "eval_wikibio_samples_per_second": 71.002, "eval_wikibio_steps_per_second": 0.142, "eval_wikibio_token_set_f1": 0.2749744801258639, "eval_wikibio_token_set_f1_sem": 0.006671092272697617, "eval_wikibio_token_set_precision": 0.2685623083750441, "eval_wikibio_token_set_recall": 0.310867379805107, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1426 }, { "epoch": 6.28, "eval_bias-bios_accuracy": 0.51678125, "eval_bias-bios_bleu_score": 18.45761988385277, "eval_bias-bios_bleu_score_sem": 0.7958526816092888, "eval_bias-bios_emb_cos_sim": 0.8778645992279053, "eval_bias-bios_emb_cos_sim_sem": 0.0028580947755997856, "eval_bias-bios_emb_top1_equal": 0.31200000643730164, "eval_bias-bios_emb_top1_equal_sem": 0.020740595612058172, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7571202516555786, "eval_bias-bios_n_ngrams_match_1": 22.41, "eval_bias-bios_n_ngrams_match_2": 10.152, "eval_bias-bios_n_ngrams_match_3": 5.648, "eval_bias-bios_num_pred_words": 44.464, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.795723115448467, "eval_bias-bios_pred_num_tokens": 59.8359375, "eval_bias-bios_rouge_score": 0.5313359871031016, "eval_bias-bios_runtime": 8.4445, "eval_bias-bios_samples_per_second": 59.21, "eval_bias-bios_steps_per_second": 0.118, "eval_bias-bios_token_set_f1": 0.557497262539606, "eval_bias-bios_token_set_f1_sem": 0.006626022613662489, "eval_bias-bios_token_set_precision": 0.5390052520117803, "eval_bias-bios_token_set_recall": 0.5882333649734502, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1426 }, { "epoch": 6.29, "learning_rate": 0.001, "loss": 2.0052, "step": 1428 }, { "epoch": 6.34, "learning_rate": 0.001, "loss": 1.9724, "step": 1440 }, { "epoch": 6.4, "learning_rate": 0.001, "loss": 1.8063, "step": 1452 }, { "epoch": 6.42, "eval_ag_news_accuracy": 0.3015625, "eval_ag_news_bleu_score": 4.4805017796612825, "eval_ag_news_bleu_score_sem": 0.1517655061779348, "eval_ag_news_emb_cos_sim": 0.8106436729431152, "eval_ag_news_emb_cos_sim_sem": 0.00428181512690115, "eval_ag_news_emb_top1_equal": 0.26600000262260437, "eval_ag_news_emb_top1_equal_sem": 0.01978055817719369, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5997157096862793, "eval_ag_news_n_ngrams_match_1": 12.064, "eval_ag_news_n_ngrams_match_2": 2.416, "eval_ag_news_n_ngrams_match_3": 0.612, "eval_ag_news_num_pred_words": 32.786, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.58783139893886, "eval_ag_news_pred_num_tokens": 50.3671875, "eval_ag_news_rouge_score": 0.3644739897183449, "eval_ag_news_runtime": 7.339, "eval_ag_news_samples_per_second": 68.129, "eval_ag_news_steps_per_second": 0.136, "eval_ag_news_token_set_f1": 0.34682066457855015, "eval_ag_news_token_set_f1_sem": 0.004680649556318142, "eval_ag_news_token_set_precision": 0.3066558121768779, "eval_ag_news_token_set_recall": 0.4171577397900149, "eval_ag_news_true_num_tokens": 56.09375, "step": 1457 }, { "epoch": 6.42, "eval_anthropic_toxic_prompts_accuracy": 0.10828125, "eval_anthropic_toxic_prompts_bleu_score": 4.917360176354823, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.17999685495020398, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6960086822509766, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004688394211274368, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.000969171524048, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.748, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.674, "eval_anthropic_toxic_prompts_num_pred_words": 29.014, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.10501268977357, "eval_anthropic_toxic_prompts_pred_num_tokens": 42.0, "eval_anthropic_toxic_prompts_rouge_score": 0.29196549015552187, "eval_anthropic_toxic_prompts_runtime": 7.0438, "eval_anthropic_toxic_prompts_samples_per_second": 70.984, "eval_anthropic_toxic_prompts_steps_per_second": 0.142, "eval_anthropic_toxic_prompts_token_set_f1": 0.34287485631247483, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005876938571321538, "eval_anthropic_toxic_prompts_token_set_precision": 0.4244424668846003, "eval_anthropic_toxic_prompts_token_set_recall": 0.31100293355640496, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1457 }, { "epoch": 6.42, "eval_arxiv_accuracy": 0.41771875, "eval_arxiv_bleu_score": 3.934917926586904, "eval_arxiv_bleu_score_sem": 0.10808345887239815, "eval_arxiv_emb_cos_sim": 0.7565560936927795, "eval_arxiv_emb_cos_sim_sem": 0.0043263203302207395, "eval_arxiv_emb_top1_equal": 0.20200000703334808, "eval_arxiv_emb_top1_equal_sem": 0.017973259543989376, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.091585874557495, "eval_arxiv_n_ngrams_match_1": 14.304, "eval_arxiv_n_ngrams_match_2": 2.642, "eval_arxiv_n_ngrams_match_3": 0.566, "eval_arxiv_num_pred_words": 32.542, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.011958515343032, "eval_arxiv_pred_num_tokens": 52.9609375, "eval_arxiv_rouge_score": 0.3719034083947236, "eval_arxiv_runtime": 7.3761, "eval_arxiv_samples_per_second": 67.787, "eval_arxiv_steps_per_second": 0.136, "eval_arxiv_token_set_f1": 0.3687926222207479, "eval_arxiv_token_set_f1_sem": 0.004272372260008827, "eval_arxiv_token_set_precision": 0.3122666822215735, "eval_arxiv_token_set_recall": 0.4644894310912833, "eval_arxiv_true_num_tokens": 64.0, "step": 1457 }, { "epoch": 6.42, "eval_python_code_alpaca_accuracy": 0.15671875, "eval_python_code_alpaca_bleu_score": 6.599258142542607, "eval_python_code_alpaca_bleu_score_sem": 0.20786536106594386, "eval_python_code_alpaca_emb_cos_sim": 0.7821627855300903, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037482584848083575, "eval_python_code_alpaca_emb_top1_equal": 0.19599999487400055, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017770749856622144, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.6141581535339355, "eval_python_code_alpaca_n_ngrams_match_1": 9.334, "eval_python_code_alpaca_n_ngrams_match_2": 2.564, "eval_python_code_alpaca_n_ngrams_match_3": 0.826, "eval_python_code_alpaca_num_pred_words": 27.932, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.655715521741897, "eval_python_code_alpaca_pred_num_tokens": 44.6328125, "eval_python_code_alpaca_rouge_score": 0.4446707271822361, "eval_python_code_alpaca_runtime": 7.005, "eval_python_code_alpaca_samples_per_second": 71.378, "eval_python_code_alpaca_steps_per_second": 0.143, "eval_python_code_alpaca_token_set_f1": 0.48588868276907005, "eval_python_code_alpaca_token_set_f1_sem": 0.005428207810769336, "eval_python_code_alpaca_token_set_precision": 0.5299259184573405, "eval_python_code_alpaca_token_set_recall": 0.4649175014085176, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1457 }, { "epoch": 6.42, "eval_wikibio_accuracy": 0.35565625, "eval_wikibio_bleu_score": 5.7748851156901, "eval_wikibio_bleu_score_sem": 0.23454135758726272, "eval_wikibio_emb_cos_sim": 0.7541660666465759, "eval_wikibio_emb_cos_sim_sem": 0.004788653453077914, "eval_wikibio_emb_top1_equal": 0.16200000047683716, "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.462639093399048, "eval_wikibio_n_ngrams_match_1": 9.024, "eval_wikibio_n_ngrams_match_2": 2.756, "eval_wikibio_n_ngrams_match_3": 1.002, "eval_wikibio_num_pred_words": 30.664, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 31.901055384561012, "eval_wikibio_pred_num_tokens": 56.9609375, "eval_wikibio_rouge_score": 0.3481066238213619, "eval_wikibio_runtime": 8.6821, "eval_wikibio_samples_per_second": 57.589, "eval_wikibio_steps_per_second": 0.115, "eval_wikibio_token_set_f1": 0.3042383637835181, "eval_wikibio_token_set_f1_sem": 0.005621777232315361, "eval_wikibio_token_set_precision": 0.29629385842357076, "eval_wikibio_token_set_recall": 0.32962238408366523, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1457 }, { "epoch": 6.42, "eval_bias-bios_accuracy": 0.51865625, "eval_bias-bios_bleu_score": 19.25680543440157, "eval_bias-bios_bleu_score_sem": 0.8417259010557914, "eval_bias-bios_emb_cos_sim": 0.8749097585678101, "eval_bias-bios_emb_cos_sim_sem": 0.003254033114979696, "eval_bias-bios_emb_top1_equal": 0.3319999873638153, "eval_bias-bios_emb_top1_equal_sem": 0.02108176585203148, "eval_bias-bios_exact_match": 0.004, "eval_bias-bios_exact_match_sem": 0.002825591608118863, "eval_bias-bios_loss": 1.7528284788131714, "eval_bias-bios_n_ngrams_match_1": 21.16, "eval_bias-bios_n_ngrams_match_2": 9.692, "eval_bias-bios_n_ngrams_match_3": 5.494, "eval_bias-bios_num_pred_words": 37.402, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.77090248872356, "eval_bias-bios_pred_num_tokens": 52.0, "eval_bias-bios_rouge_score": 0.5478462449556378, "eval_bias-bios_runtime": 7.3773, "eval_bias-bios_samples_per_second": 67.775, "eval_bias-bios_steps_per_second": 0.136, "eval_bias-bios_token_set_f1": 0.5589008692099815, "eval_bias-bios_token_set_f1_sem": 0.006875126001028403, "eval_bias-bios_token_set_precision": 0.5246909824127267, "eval_bias-bios_token_set_recall": 0.609506893287974, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1457 }, { "epoch": 6.45, "learning_rate": 0.001, "loss": 1.695, "step": 1464 }, { "epoch": 6.5, "learning_rate": 0.001, "loss": 2.0224, "step": 1476 }, { "epoch": 6.56, "learning_rate": 0.001, "loss": 1.9642, "step": 1488 }, { "epoch": 6.56, "eval_ag_news_accuracy": 0.2999375, "eval_ag_news_bleu_score": 4.732659847556908, "eval_ag_news_bleu_score_sem": 0.15382722884594552, "eval_ag_news_emb_cos_sim": 0.8149835467338562, "eval_ag_news_emb_cos_sim_sem": 0.004877806802557305, "eval_ag_news_emb_top1_equal": 0.28600001335144043, "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.57092547416687, "eval_ag_news_n_ngrams_match_1": 13.68, "eval_ag_news_n_ngrams_match_2": 2.836, "eval_ag_news_n_ngrams_match_3": 0.766, "eval_ag_news_num_pred_words": 42.318, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.54947805580302, "eval_ag_news_pred_num_tokens": 62.8828125, "eval_ag_news_rouge_score": 0.3603819369485558, "eval_ag_news_runtime": 7.7275, "eval_ag_news_samples_per_second": 64.704, "eval_ag_news_steps_per_second": 0.129, "eval_ag_news_token_set_f1": 0.3534624801060994, "eval_ag_news_token_set_f1_sem": 0.004661502713234828, "eval_ag_news_token_set_precision": 0.3299273403261311, "eval_ag_news_token_set_recall": 0.4007806565988763, "eval_ag_news_true_num_tokens": 56.09375, "step": 1488 }, { "epoch": 6.56, "eval_anthropic_toxic_prompts_accuracy": 0.10621875, "eval_anthropic_toxic_prompts_bleu_score": 3.5245039471587516, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12692739102441497, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6925877928733826, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0043533211998162515, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1379999965429306, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015439843269723665, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.17211651802063, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.368, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.754, "eval_anthropic_toxic_prompts_num_pred_words": 43.272, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.857926688316812, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.5625, "eval_anthropic_toxic_prompts_rouge_score": 0.23856933493684165, "eval_anthropic_toxic_prompts_runtime": 7.0117, "eval_anthropic_toxic_prompts_samples_per_second": 71.31, "eval_anthropic_toxic_prompts_steps_per_second": 0.143, "eval_anthropic_toxic_prompts_token_set_f1": 0.33716291320329317, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005488927012354413, "eval_anthropic_toxic_prompts_token_set_precision": 0.46357587025721775, "eval_anthropic_toxic_prompts_token_set_recall": 0.2879152966800727, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1488 }, { "epoch": 6.56, "eval_arxiv_accuracy": 0.4288125, "eval_arxiv_bleu_score": 4.661399367438937, "eval_arxiv_bleu_score_sem": 0.12783894900256684, "eval_arxiv_emb_cos_sim": 0.7645853161811829, "eval_arxiv_emb_cos_sim_sem": 0.004385083831951277, "eval_arxiv_emb_top1_equal": 0.2980000078678131, "eval_arxiv_emb_top1_equal_sem": 0.020475119103777986, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.00892972946167, "eval_arxiv_n_ngrams_match_1": 15.502, "eval_arxiv_n_ngrams_match_2": 3.086, "eval_arxiv_n_ngrams_match_3": 0.706, "eval_arxiv_num_pred_words": 39.224, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.26569853405298, "eval_arxiv_pred_num_tokens": 62.984375, "eval_arxiv_rouge_score": 0.3712170892610218, "eval_arxiv_runtime": 7.5208, "eval_arxiv_samples_per_second": 66.482, "eval_arxiv_steps_per_second": 0.133, "eval_arxiv_token_set_f1": 0.3734715294639753, "eval_arxiv_token_set_f1_sem": 0.004153338418403119, "eval_arxiv_token_set_precision": 0.32291839735695105, "eval_arxiv_token_set_recall": 0.4580518879073966, "eval_arxiv_true_num_tokens": 64.0, "step": 1488 }, { "epoch": 6.56, "eval_python_code_alpaca_accuracy": 0.15253125, "eval_python_code_alpaca_bleu_score": 5.117748099309339, "eval_python_code_alpaca_bleu_score_sem": 0.16347555365801147, "eval_python_code_alpaca_emb_cos_sim": 0.769605278968811, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036531835032631754, "eval_python_code_alpaca_emb_top1_equal": 0.17800000309944153, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01712362329538143, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8052680492401123, "eval_python_code_alpaca_n_ngrams_match_1": 9.946, "eval_python_code_alpaca_n_ngrams_match_2": 2.932, "eval_python_code_alpaca_n_ngrams_match_3": 0.966, "eval_python_code_alpaca_num_pred_words": 39.292, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.531506569992075, "eval_python_code_alpaca_pred_num_tokens": 62.65625, "eval_python_code_alpaca_rouge_score": 0.3836086668387766, "eval_python_code_alpaca_runtime": 7.2389, "eval_python_code_alpaca_samples_per_second": 69.071, "eval_python_code_alpaca_steps_per_second": 0.138, "eval_python_code_alpaca_token_set_f1": 0.46205300074061845, "eval_python_code_alpaca_token_set_f1_sem": 0.005207518408869205, "eval_python_code_alpaca_token_set_precision": 0.5537377665363031, "eval_python_code_alpaca_token_set_recall": 0.41235007315082095, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1488 }, { "epoch": 6.56, "eval_wikibio_accuracy": 0.372, "eval_wikibio_bleu_score": 5.16626984542665, "eval_wikibio_bleu_score_sem": 0.2033688183142454, "eval_wikibio_emb_cos_sim": 0.7348429560661316, "eval_wikibio_emb_cos_sim_sem": 0.005632250449986981, "eval_wikibio_emb_top1_equal": 0.1720000058412552, "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3192598819732666, "eval_wikibio_n_ngrams_match_1": 8.746, "eval_wikibio_n_ngrams_match_2": 2.706, "eval_wikibio_n_ngrams_match_3": 0.988, "eval_wikibio_num_pred_words": 32.386, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.639886208393996, "eval_wikibio_pred_num_tokens": 62.96875, "eval_wikibio_rouge_score": 0.31316939003335964, "eval_wikibio_runtime": 8.1102, "eval_wikibio_samples_per_second": 61.651, "eval_wikibio_steps_per_second": 0.123, "eval_wikibio_token_set_f1": 0.2861699744726475, "eval_wikibio_token_set_f1_sem": 0.00638652547434589, "eval_wikibio_token_set_precision": 0.2807087931884633, "eval_wikibio_token_set_recall": 0.31595880847210117, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1488 }, { "epoch": 6.56, "eval_bias-bios_accuracy": 0.51503125, "eval_bias-bios_bleu_score": 17.79482156618152, "eval_bias-bios_bleu_score_sem": 0.7485445091910192, "eval_bias-bios_emb_cos_sim": 0.8782715797424316, "eval_bias-bios_emb_cos_sim_sem": 0.003119295993477308, "eval_bias-bios_emb_top1_equal": 0.33399999141693115, "eval_bias-bios_emb_top1_equal_sem": 0.021113493164956566, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.779577374458313, "eval_bias-bios_n_ngrams_match_1": 22.808, "eval_bias-bios_n_ngrams_match_2": 10.398, "eval_bias-bios_n_ngrams_match_3": 5.816, "eval_bias-bios_num_pred_words": 46.766, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.927350839308307, "eval_bias-bios_pred_num_tokens": 62.484375, "eval_bias-bios_rouge_score": 0.5222122418814229, "eval_bias-bios_runtime": 7.5869, "eval_bias-bios_samples_per_second": 65.903, "eval_bias-bios_steps_per_second": 0.132, "eval_bias-bios_token_set_f1": 0.5567971157863137, "eval_bias-bios_token_set_f1_sem": 0.006579296670950696, "eval_bias-bios_token_set_precision": 0.5482021158150124, "eval_bias-bios_token_set_recall": 0.5790242704328651, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1488 }, { "epoch": 6.61, "learning_rate": 0.001, "loss": 1.8493, "step": 1500 }, { "epoch": 6.66, "learning_rate": 0.001, "loss": 1.6162, "step": 1512 }, { "epoch": 6.69, "eval_ag_news_accuracy": 0.3058125, "eval_ag_news_bleu_score": 4.7463020709686194, "eval_ag_news_bleu_score_sem": 0.1605780732079416, "eval_ag_news_emb_cos_sim": 0.8073976039886475, "eval_ag_news_emb_cos_sim_sem": 0.005124277910691155, "eval_ag_news_emb_top1_equal": 0.2800000011920929, "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.532249689102173, "eval_ag_news_n_ngrams_match_1": 12.448, "eval_ag_news_n_ngrams_match_2": 2.55, "eval_ag_news_n_ngrams_match_3": 0.66, "eval_ag_news_num_pred_words": 34.98, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.200822350036255, "eval_ag_news_pred_num_tokens": 53.34375, "eval_ag_news_rouge_score": 0.3641022807574177, "eval_ag_news_runtime": 11.3731, "eval_ag_news_samples_per_second": 43.963, "eval_ag_news_steps_per_second": 0.088, "eval_ag_news_token_set_f1": 0.3476928951651583, "eval_ag_news_token_set_f1_sem": 0.004917711975399316, "eval_ag_news_token_set_precision": 0.3099224145207519, "eval_ag_news_token_set_recall": 0.4156977928068492, "eval_ag_news_true_num_tokens": 56.09375, "step": 1519 }, { "epoch": 6.69, "eval_anthropic_toxic_prompts_accuracy": 0.10853125, "eval_anthropic_toxic_prompts_bleu_score": 5.128832221923525, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.19070154322972455, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7086655497550964, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00440713668015488, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17000000178813934, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016815633120741882, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0389885902404785, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.902, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.846, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, "eval_anthropic_toxic_prompts_num_pred_words": 28.914, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.884110156961853, "eval_anthropic_toxic_prompts_pred_num_tokens": 42.640625, "eval_anthropic_toxic_prompts_rouge_score": 0.2993498581809596, "eval_anthropic_toxic_prompts_runtime": 7.1117, "eval_anthropic_toxic_prompts_samples_per_second": 70.306, "eval_anthropic_toxic_prompts_steps_per_second": 0.141, "eval_anthropic_toxic_prompts_token_set_f1": 0.35337104182115847, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005880778721392854, "eval_anthropic_toxic_prompts_token_set_precision": 0.43707066740847084, "eval_anthropic_toxic_prompts_token_set_recall": 0.3173444768330494, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1519 }, { "epoch": 6.69, "eval_arxiv_accuracy": 0.42440625, "eval_arxiv_bleu_score": 4.118168026943256, "eval_arxiv_bleu_score_sem": 0.1250072563230236, "eval_arxiv_emb_cos_sim": 0.742168664932251, "eval_arxiv_emb_cos_sim_sem": 0.0065224204899973345, "eval_arxiv_emb_top1_equal": 0.24799999594688416, "eval_arxiv_emb_top1_equal_sem": 0.01933234274230791, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.022693395614624, "eval_arxiv_n_ngrams_match_1": 14.448, "eval_arxiv_n_ngrams_match_2": 2.698, "eval_arxiv_n_ngrams_match_3": 0.616, "eval_arxiv_num_pred_words": 33.828, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.546557231935324, "eval_arxiv_pred_num_tokens": 57.03125, "eval_arxiv_rouge_score": 0.362829673463994, "eval_arxiv_runtime": 7.3621, "eval_arxiv_samples_per_second": 67.915, "eval_arxiv_steps_per_second": 0.136, "eval_arxiv_token_set_f1": 0.3622128562665494, "eval_arxiv_token_set_f1_sem": 0.00485702315818566, "eval_arxiv_token_set_precision": 0.3067772119241535, "eval_arxiv_token_set_recall": 0.45555796091217454, "eval_arxiv_true_num_tokens": 64.0, "step": 1519 }, { "epoch": 6.69, "eval_python_code_alpaca_accuracy": 0.156625, "eval_python_code_alpaca_bleu_score": 7.257567396442399, "eval_python_code_alpaca_bleu_score_sem": 0.23629107675727168, "eval_python_code_alpaca_emb_cos_sim": 0.7840350866317749, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003614340702336455, "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.5811822414398193, "eval_python_code_alpaca_n_ngrams_match_1": 9.49, "eval_python_code_alpaca_n_ngrams_match_2": 2.69, "eval_python_code_alpaca_n_ngrams_match_3": 0.898, "eval_python_code_alpaca_num_pred_words": 27.904, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.212749589699158, "eval_python_code_alpaca_pred_num_tokens": 44.4765625, "eval_python_code_alpaca_rouge_score": 0.45019775968067777, "eval_python_code_alpaca_runtime": 7.081, "eval_python_code_alpaca_samples_per_second": 70.611, "eval_python_code_alpaca_steps_per_second": 0.141, "eval_python_code_alpaca_token_set_f1": 0.4959035467348254, "eval_python_code_alpaca_token_set_f1_sem": 0.0055782694432414326, "eval_python_code_alpaca_token_set_precision": 0.5380365728375572, "eval_python_code_alpaca_token_set_recall": 0.47629206841719984, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1519 }, { "epoch": 6.69, "eval_wikibio_accuracy": 0.370375, "eval_wikibio_bleu_score": 5.341609284477066, "eval_wikibio_bleu_score_sem": 0.2066184654207597, "eval_wikibio_emb_cos_sim": 0.7368956804275513, "eval_wikibio_emb_cos_sim_sem": 0.00575103694980236, "eval_wikibio_emb_top1_equal": 0.15800000727176666, "eval_wikibio_emb_top1_equal_sem": 0.01632805076118194, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.31675124168396, "eval_wikibio_n_ngrams_match_1": 8.94, "eval_wikibio_n_ngrams_match_2": 2.756, "eval_wikibio_n_ngrams_match_3": 0.99, "eval_wikibio_num_pred_words": 32.058, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.570634576295262, "eval_wikibio_pred_num_tokens": 60.6875, "eval_wikibio_rouge_score": 0.32612518061916174, "eval_wikibio_runtime": 8.7268, "eval_wikibio_samples_per_second": 57.294, "eval_wikibio_steps_per_second": 0.115, "eval_wikibio_token_set_f1": 0.2963580125635239, "eval_wikibio_token_set_f1_sem": 0.00629818151845586, "eval_wikibio_token_set_precision": 0.2896516821409088, "eval_wikibio_token_set_recall": 0.32416889589872927, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1519 }, { "epoch": 6.69, "eval_bias-bios_accuracy": 0.51909375, "eval_bias-bios_bleu_score": 19.268902172980635, "eval_bias-bios_bleu_score_sem": 0.857516824635127, "eval_bias-bios_emb_cos_sim": 0.8769698143005371, "eval_bias-bios_emb_cos_sim_sem": 0.00319190762430341, "eval_bias-bios_emb_top1_equal": 0.36000001430511475, "eval_bias-bios_emb_top1_equal_sem": 0.021487752839838135, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7438139915466309, "eval_bias-bios_n_ngrams_match_1": 21.502, "eval_bias-bios_n_ngrams_match_2": 9.814, "eval_bias-bios_n_ngrams_match_3": 5.53, "eval_bias-bios_num_pred_words": 38.682, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.719114533349184, "eval_bias-bios_pred_num_tokens": 52.0078125, "eval_bias-bios_rouge_score": 0.5474474528418282, "eval_bias-bios_runtime": 7.4191, "eval_bias-bios_samples_per_second": 67.394, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.5632959401204308, "eval_bias-bios_token_set_f1_sem": 0.0068805471549051025, "eval_bias-bios_token_set_precision": 0.5276031872114068, "eval_bias-bios_token_set_recall": 0.6158242049632531, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1519 }, { "epoch": 6.71, "learning_rate": 0.001, "loss": 2.0335, "step": 1524 }, { "epoch": 6.77, "learning_rate": 0.001, "loss": 1.9593, "step": 1536 }, { "epoch": 6.82, "learning_rate": 0.001, "loss": 1.8879, "step": 1548 }, { "epoch": 6.83, "eval_ag_news_accuracy": 0.30028125, "eval_ag_news_bleu_score": 4.795721070948663, "eval_ag_news_bleu_score_sem": 0.15974204528730274, "eval_ag_news_emb_cos_sim": 0.8041967749595642, "eval_ag_news_emb_cos_sim_sem": 0.0050863287524158226, "eval_ag_news_emb_top1_equal": 0.27000001072883606, "eval_ag_news_emb_top1_equal_sem": 0.019874356669179787, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5922787189483643, "eval_ag_news_n_ngrams_match_1": 12.668, "eval_ag_news_n_ngrams_match_2": 2.672, "eval_ag_news_n_ngrams_match_3": 0.782, "eval_ag_news_num_pred_words": 36.772, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.31673734717072, "eval_ag_news_pred_num_tokens": 55.5078125, "eval_ag_news_rouge_score": 0.3619051833548086, "eval_ag_news_runtime": 7.442, "eval_ag_news_samples_per_second": 67.187, "eval_ag_news_steps_per_second": 0.134, "eval_ag_news_token_set_f1": 0.3482497468589517, "eval_ag_news_token_set_f1_sem": 0.005093154354263263, "eval_ag_news_token_set_precision": 0.31438605603788017, "eval_ag_news_token_set_recall": 0.4080925892825803, "eval_ag_news_true_num_tokens": 56.09375, "step": 1550 }, { "epoch": 6.83, "eval_anthropic_toxic_prompts_accuracy": 0.10615625, "eval_anthropic_toxic_prompts_bleu_score": 4.083847585562941, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.15555929967558885, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6819911599159241, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004785336112944726, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.10999999940395355, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014006868285818916, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0443711280822754, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.884, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.74, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.646, "eval_anthropic_toxic_prompts_num_pred_words": 34.422, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.996822737917157, "eval_anthropic_toxic_prompts_pred_num_tokens": 49.984375, "eval_anthropic_toxic_prompts_rouge_score": 0.2612879461490797, "eval_anthropic_toxic_prompts_runtime": 7.8422, "eval_anthropic_toxic_prompts_samples_per_second": 63.757, "eval_anthropic_toxic_prompts_steps_per_second": 0.128, "eval_anthropic_toxic_prompts_token_set_f1": 0.3376565967945119, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005536374664466343, "eval_anthropic_toxic_prompts_token_set_precision": 0.42873340074689775, "eval_anthropic_toxic_prompts_token_set_recall": 0.30131964308814996, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1550 }, { "epoch": 6.83, "eval_arxiv_accuracy": 0.4220625, "eval_arxiv_bleu_score": 4.31068497115973, "eval_arxiv_bleu_score_sem": 0.1182485148643836, "eval_arxiv_emb_cos_sim": 0.751672625541687, "eval_arxiv_emb_cos_sim_sem": 0.004438314879325501, "eval_arxiv_emb_top1_equal": 0.23600000143051147, "eval_arxiv_emb_top1_equal_sem": 0.019008700160065242, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0553781986236572, "eval_arxiv_n_ngrams_match_1": 14.768, "eval_arxiv_n_ngrams_match_2": 2.834, "eval_arxiv_n_ngrams_match_3": 0.618, "eval_arxiv_num_pred_words": 35.84, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.22921286793967, "eval_arxiv_pred_num_tokens": 57.1015625, "eval_arxiv_rouge_score": 0.3674967211275739, "eval_arxiv_runtime": 7.4582, "eval_arxiv_samples_per_second": 67.04, "eval_arxiv_steps_per_second": 0.134, "eval_arxiv_token_set_f1": 0.36944767629366165, "eval_arxiv_token_set_f1_sem": 0.004118295290691409, "eval_arxiv_token_set_precision": 0.3121757890742581, "eval_arxiv_token_set_recall": 0.4675134012606021, "eval_arxiv_true_num_tokens": 64.0, "step": 1550 }, { "epoch": 6.83, "eval_python_code_alpaca_accuracy": 0.15375, "eval_python_code_alpaca_bleu_score": 6.087948865482973, "eval_python_code_alpaca_bleu_score_sem": 0.20284576134331592, "eval_python_code_alpaca_emb_cos_sim": 0.7750284671783447, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003545411265746177, "eval_python_code_alpaca_emb_top1_equal": 0.17399999499320984, "eval_python_code_alpaca_emb_top1_equal_sem": 0.016971270884523753, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.67142915725708, "eval_python_code_alpaca_n_ngrams_match_1": 9.704, "eval_python_code_alpaca_n_ngrams_match_2": 2.756, "eval_python_code_alpaca_n_ngrams_match_3": 0.908, "eval_python_code_alpaca_num_pred_words": 32.86, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.46062093334414, "eval_python_code_alpaca_pred_num_tokens": 51.171875, "eval_python_code_alpaca_rouge_score": 0.41505979945750626, "eval_python_code_alpaca_runtime": 7.0339, "eval_python_code_alpaca_samples_per_second": 71.085, "eval_python_code_alpaca_steps_per_second": 0.142, "eval_python_code_alpaca_token_set_f1": 0.4861361656943412, "eval_python_code_alpaca_token_set_f1_sem": 0.00544849215466565, "eval_python_code_alpaca_token_set_precision": 0.546222964812534, "eval_python_code_alpaca_token_set_recall": 0.4546387188311011, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1550 }, { "epoch": 6.83, "eval_wikibio_accuracy": 0.3648125, "eval_wikibio_bleu_score": 5.078593240932916, "eval_wikibio_bleu_score_sem": 0.22592161566311428, "eval_wikibio_emb_cos_sim": 0.7141668796539307, "eval_wikibio_emb_cos_sim_sem": 0.0065623571868317744, "eval_wikibio_emb_top1_equal": 0.1860000044107437, "eval_wikibio_emb_top1_equal_sem": 0.017418806591218323, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3612303733825684, "eval_wikibio_n_ngrams_match_1": 8.014, "eval_wikibio_n_ngrams_match_2": 2.408, "eval_wikibio_n_ngrams_match_3": 0.934, "eval_wikibio_num_pred_words": 29.244, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.824634133153058, "eval_wikibio_pred_num_tokens": 60.46875, "eval_wikibio_rouge_score": 0.30091374134289106, "eval_wikibio_runtime": 7.2028, "eval_wikibio_samples_per_second": 69.418, "eval_wikibio_steps_per_second": 0.139, "eval_wikibio_token_set_f1": 0.27374934123816314, "eval_wikibio_token_set_f1_sem": 0.006816941537408769, "eval_wikibio_token_set_precision": 0.2599069063291573, "eval_wikibio_token_set_recall": 0.3172531394934058, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1550 }, { "epoch": 6.83, "eval_bias-bios_accuracy": 0.51903125, "eval_bias-bios_bleu_score": 18.776008983057356, "eval_bias-bios_bleu_score_sem": 0.8303969478486409, "eval_bias-bios_emb_cos_sim": 0.8751217126846313, "eval_bias-bios_emb_cos_sim_sem": 0.0029728119026704246, "eval_bias-bios_emb_top1_equal": 0.32600000500679016, "eval_bias-bios_emb_top1_equal_sem": 0.020984011608532603, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.747039794921875, "eval_bias-bios_n_ngrams_match_1": 21.814, "eval_bias-bios_n_ngrams_match_2": 9.83, "eval_bias-bios_n_ngrams_match_3": 5.526, "eval_bias-bios_num_pred_words": 40.97, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.7375930603379715, "eval_bias-bios_pred_num_tokens": 55.421875, "eval_bias-bios_rouge_score": 0.5384821581566905, "eval_bias-bios_runtime": 7.4374, "eval_bias-bios_samples_per_second": 67.228, "eval_bias-bios_steps_per_second": 0.134, "eval_bias-bios_token_set_f1": 0.557628993881636, "eval_bias-bios_token_set_f1_sem": 0.006844604271877559, "eval_bias-bios_token_set_precision": 0.530631198064721, "eval_bias-bios_token_set_recall": 0.5978989905704735, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1550 }, { "epoch": 6.87, "learning_rate": 0.001, "loss": 1.6451, "step": 1560 }, { "epoch": 6.93, "learning_rate": 0.001, "loss": 1.9438, "step": 1572 }, { "epoch": 6.96, "eval_ag_news_accuracy": 0.29990625, "eval_ag_news_bleu_score": 4.615222668755698, "eval_ag_news_bleu_score_sem": 0.15290728039133886, "eval_ag_news_emb_cos_sim": 0.8042817115783691, "eval_ag_news_emb_cos_sim_sem": 0.0051403454856110645, "eval_ag_news_emb_top1_equal": 0.27799999713897705, "eval_ag_news_emb_top1_equal_sem": 0.0200558347666307, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5743329524993896, "eval_ag_news_n_ngrams_match_1": 12.996, "eval_ag_news_n_ngrams_match_2": 2.748, "eval_ag_news_n_ngrams_match_3": 0.726, "eval_ag_news_num_pred_words": 40.506, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.67081874749316, "eval_ag_news_pred_num_tokens": 61.625, "eval_ag_news_rouge_score": 0.3565626567209188, "eval_ag_news_runtime": 7.9203, "eval_ag_news_samples_per_second": 63.129, "eval_ag_news_steps_per_second": 0.126, "eval_ag_news_token_set_f1": 0.34549790329751373, "eval_ag_news_token_set_f1_sem": 0.004972836903833072, "eval_ag_news_token_set_precision": 0.3158240685548052, "eval_ag_news_token_set_recall": 0.4027427808393305, "eval_ag_news_true_num_tokens": 56.09375, "step": 1581 }, { "epoch": 6.96, "eval_anthropic_toxic_prompts_accuracy": 0.10490625, "eval_anthropic_toxic_prompts_bleu_score": 3.541700637049288, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12635207715416444, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6750056147575378, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005035798625001384, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13199999928474426, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015152928667412809, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1228177547454834, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.918, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.848, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.698, "eval_anthropic_toxic_prompts_num_pred_words": 40.474, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.710281574587817, "eval_anthropic_toxic_prompts_pred_num_tokens": 58.1171875, "eval_anthropic_toxic_prompts_rouge_score": 0.23509872150435485, "eval_anthropic_toxic_prompts_runtime": 7.1647, "eval_anthropic_toxic_prompts_samples_per_second": 69.787, "eval_anthropic_toxic_prompts_steps_per_second": 0.14, "eval_anthropic_toxic_prompts_token_set_f1": 0.33558649257134376, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005768792814210663, "eval_anthropic_toxic_prompts_token_set_precision": 0.43283570980153097, "eval_anthropic_toxic_prompts_token_set_recall": 0.2996083086572435, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1581 }, { "epoch": 6.96, "eval_arxiv_accuracy": 0.42675, "eval_arxiv_bleu_score": 4.320251352031537, "eval_arxiv_bleu_score_sem": 0.12188966763297118, "eval_arxiv_emb_cos_sim": 0.7463886737823486, "eval_arxiv_emb_cos_sim_sem": 0.005261778593770951, "eval_arxiv_emb_top1_equal": 0.25600001215934753, "eval_arxiv_emb_top1_equal_sem": 0.019536923601457774, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.028258800506592, "eval_arxiv_n_ngrams_match_1": 14.756, "eval_arxiv_n_ngrams_match_2": 2.816, "eval_arxiv_n_ngrams_match_3": 0.634, "eval_arxiv_num_pred_words": 38.016, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.66122593497113, "eval_arxiv_pred_num_tokens": 61.765625, "eval_arxiv_rouge_score": 0.354438503993915, "eval_arxiv_runtime": 8.1309, "eval_arxiv_samples_per_second": 61.494, "eval_arxiv_steps_per_second": 0.123, "eval_arxiv_token_set_f1": 0.36116556687680584, "eval_arxiv_token_set_f1_sem": 0.004720914996476666, "eval_arxiv_token_set_precision": 0.30851772412283185, "eval_arxiv_token_set_recall": 0.45619999237268627, "eval_arxiv_true_num_tokens": 64.0, "step": 1581 }, { "epoch": 6.96, "eval_python_code_alpaca_accuracy": 0.15046875, "eval_python_code_alpaca_bleu_score": 5.126805918317772, "eval_python_code_alpaca_bleu_score_sem": 0.16463181667868096, "eval_python_code_alpaca_emb_cos_sim": 0.7541212439537048, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004230157117169431, "eval_python_code_alpaca_emb_top1_equal": 0.15800000727176666, "eval_python_code_alpaca_emb_top1_equal_sem": 0.016328049428381567, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.77302885055542, "eval_python_code_alpaca_n_ngrams_match_1": 9.444, "eval_python_code_alpaca_n_ngrams_match_2": 2.65, "eval_python_code_alpaca_n_ngrams_match_3": 0.896, "eval_python_code_alpaca_num_pred_words": 37.202, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.007043602981074, "eval_python_code_alpaca_pred_num_tokens": 58.7421875, "eval_python_code_alpaca_rouge_score": 0.37821108219102967, "eval_python_code_alpaca_runtime": 11.3494, "eval_python_code_alpaca_samples_per_second": 44.055, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.46395396815143314, "eval_python_code_alpaca_token_set_f1_sem": 0.005517060349556724, "eval_python_code_alpaca_token_set_precision": 0.5237061818807659, "eval_python_code_alpaca_token_set_recall": 0.4361381112417352, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1581 }, { "epoch": 6.96, "eval_wikibio_accuracy": 0.37090625, "eval_wikibio_bleu_score": 4.52711017427634, "eval_wikibio_bleu_score_sem": 0.22225989051906928, "eval_wikibio_emb_cos_sim": 0.6868708729743958, "eval_wikibio_emb_cos_sim_sem": 0.007236773502149267, "eval_wikibio_emb_top1_equal": 0.15399999916553497, "eval_wikibio_emb_top1_equal_sem": 0.016158283980625493, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3025898933410645, "eval_wikibio_n_ngrams_match_1": 7.266, "eval_wikibio_n_ngrams_match_2": 2.21, "eval_wikibio_n_ngrams_match_3": 0.852, "eval_wikibio_num_pred_words": 28.48, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.182948771864357, "eval_wikibio_pred_num_tokens": 62.8125, "eval_wikibio_rouge_score": 0.2665925355815556, "eval_wikibio_runtime": 7.4128, "eval_wikibio_samples_per_second": 67.451, "eval_wikibio_steps_per_second": 0.135, "eval_wikibio_token_set_f1": 0.24587304205342644, "eval_wikibio_token_set_f1_sem": 0.007242710929915511, "eval_wikibio_token_set_precision": 0.23366527740250978, "eval_wikibio_token_set_recall": 0.29149566163466467, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1581 }, { "epoch": 6.96, "eval_bias-bios_accuracy": 0.51584375, "eval_bias-bios_bleu_score": 18.48892877899192, "eval_bias-bios_bleu_score_sem": 0.7878024060804362, "eval_bias-bios_emb_cos_sim": 0.876237154006958, "eval_bias-bios_emb_cos_sim_sem": 0.0031529775249521316, "eval_bias-bios_emb_top1_equal": 0.3540000021457672, "eval_bias-bios_emb_top1_equal_sem": 0.021407582231685648, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7648732662200928, "eval_bias-bios_n_ngrams_match_1": 22.492, "eval_bias-bios_n_ngrams_match_2": 10.32, "eval_bias-bios_n_ngrams_match_3": 5.81, "eval_bias-bios_num_pred_words": 45.448, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.840832080450535, "eval_bias-bios_pred_num_tokens": 61.015625, "eval_bias-bios_rouge_score": 0.5264991873889933, "eval_bias-bios_runtime": 7.638, "eval_bias-bios_samples_per_second": 65.462, "eval_bias-bios_steps_per_second": 0.131, "eval_bias-bios_token_set_f1": 0.5566655063326945, "eval_bias-bios_token_set_f1_sem": 0.0068319374696395666, "eval_bias-bios_token_set_precision": 0.5408795242977424, "eval_bias-bios_token_set_recall": 0.5843492224289123, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1581 }, { "epoch": 6.98, "learning_rate": 0.001, "loss": 1.8587, "step": 1584 }, { "epoch": 7.03, "learning_rate": 0.001, "loss": 1.8517, "step": 1596 }, { "epoch": 7.08, "learning_rate": 0.001, "loss": 1.9463, "step": 1608 }, { "epoch": 7.1, "eval_ag_news_accuracy": 0.30053125, "eval_ag_news_bleu_score": 4.537715333796728, "eval_ag_news_bleu_score_sem": 0.13889492068202675, "eval_ag_news_emb_cos_sim": 0.8129717111587524, "eval_ag_news_emb_cos_sim_sem": 0.004927985070672762, "eval_ag_news_emb_top1_equal": 0.2980000078678131, "eval_ag_news_emb_top1_equal_sem": 0.020475119103777986, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5397188663482666, "eval_ag_news_n_ngrams_match_1": 13.44, "eval_ag_news_n_ngrams_match_2": 2.832, "eval_ag_news_n_ngrams_match_3": 0.712, "eval_ag_news_num_pred_words": 41.938, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.45723074193958, "eval_ag_news_pred_num_tokens": 62.4296875, "eval_ag_news_rouge_score": 0.3577707860317296, "eval_ag_news_runtime": 7.5083, "eval_ag_news_samples_per_second": 66.593, "eval_ag_news_steps_per_second": 0.133, "eval_ag_news_token_set_f1": 0.34854753140569744, "eval_ag_news_token_set_f1_sem": 0.004711246773498002, "eval_ag_news_token_set_precision": 0.32687807742299585, "eval_ag_news_token_set_recall": 0.3920744588382431, "eval_ag_news_true_num_tokens": 56.09375, "step": 1612 }, { "epoch": 7.1, "eval_anthropic_toxic_prompts_accuracy": 0.10565625, "eval_anthropic_toxic_prompts_bleu_score": 3.6555507241528207, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13471216566876523, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.694108784198761, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00449306898432817, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13199999928474426, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015152928667412809, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1062278747558594, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.302, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.986, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792, "eval_anthropic_toxic_prompts_num_pred_words": 42.614, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.336628726505555, "eval_anthropic_toxic_prompts_pred_num_tokens": 61.28125, "eval_anthropic_toxic_prompts_rouge_score": 0.2389581662106168, "eval_anthropic_toxic_prompts_runtime": 7.2584, "eval_anthropic_toxic_prompts_samples_per_second": 68.886, "eval_anthropic_toxic_prompts_steps_per_second": 0.138, "eval_anthropic_toxic_prompts_token_set_f1": 0.34118271658652294, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005605196304157451, "eval_anthropic_toxic_prompts_token_set_precision": 0.4604330151107107, "eval_anthropic_toxic_prompts_token_set_recall": 0.2952580469903798, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1612 }, { "epoch": 7.1, "eval_arxiv_accuracy": 0.42734375, "eval_arxiv_bleu_score": 4.4833449799753495, "eval_arxiv_bleu_score_sem": 0.13133602727160468, "eval_arxiv_emb_cos_sim": 0.7571825385093689, "eval_arxiv_emb_cos_sim_sem": 0.004914910965394936, "eval_arxiv_emb_top1_equal": 0.2639999985694885, "eval_arxiv_emb_top1_equal_sem": 0.019732885240582997, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0084228515625, "eval_arxiv_n_ngrams_match_1": 15.244, "eval_arxiv_n_ngrams_match_2": 2.932, "eval_arxiv_n_ngrams_match_3": 0.676, "eval_arxiv_num_pred_words": 39.102, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.25542890229939, "eval_arxiv_pred_num_tokens": 62.1953125, "eval_arxiv_rouge_score": 0.3632984432863542, "eval_arxiv_runtime": 9.2134, "eval_arxiv_samples_per_second": 54.269, "eval_arxiv_steps_per_second": 0.109, "eval_arxiv_token_set_f1": 0.3692277571283139, "eval_arxiv_token_set_f1_sem": 0.004558893026175048, "eval_arxiv_token_set_precision": 0.320379446657654, "eval_arxiv_token_set_recall": 0.4525783748505137, "eval_arxiv_true_num_tokens": 64.0, "step": 1612 }, { "epoch": 7.1, "eval_python_code_alpaca_accuracy": 0.15453125, "eval_python_code_alpaca_bleu_score": 5.3941285972882165, "eval_python_code_alpaca_bleu_score_sem": 0.17369563512429562, "eval_python_code_alpaca_emb_cos_sim": 0.7757654786109924, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036044649847583542, "eval_python_code_alpaca_emb_top1_equal": 0.18000000715255737, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017198593316470962, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.6887404918670654, "eval_python_code_alpaca_n_ngrams_match_1": 9.99, "eval_python_code_alpaca_n_ngrams_match_2": 2.98, "eval_python_code_alpaca_n_ngrams_match_3": 1.006, "eval_python_code_alpaca_num_pred_words": 38.514, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.713132934780964, "eval_python_code_alpaca_pred_num_tokens": 60.03125, "eval_python_code_alpaca_rouge_score": 0.3919716483017668, "eval_python_code_alpaca_runtime": 7.151, "eval_python_code_alpaca_samples_per_second": 69.92, "eval_python_code_alpaca_steps_per_second": 0.14, "eval_python_code_alpaca_token_set_f1": 0.469544790861787, "eval_python_code_alpaca_token_set_f1_sem": 0.005210264001859052, "eval_python_code_alpaca_token_set_precision": 0.5573700982167373, "eval_python_code_alpaca_token_set_recall": 0.42329262951935054, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1612 }, { "epoch": 7.1, "eval_wikibio_accuracy": 0.36940625, "eval_wikibio_bleu_score": 4.849451170594448, "eval_wikibio_bleu_score_sem": 0.21065823619517224, "eval_wikibio_emb_cos_sim": 0.7106419205665588, "eval_wikibio_emb_cos_sim_sem": 0.006688869930420594, "eval_wikibio_emb_top1_equal": 0.18400000035762787, "eval_wikibio_emb_top1_equal_sem": 0.017346174301986407, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.315532684326172, "eval_wikibio_n_ngrams_match_1": 8.306, "eval_wikibio_n_ngrams_match_2": 2.522, "eval_wikibio_n_ngrams_match_3": 0.952, "eval_wikibio_num_pred_words": 31.694, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.53705863793149, "eval_wikibio_pred_num_tokens": 62.8671875, "eval_wikibio_rouge_score": 0.29178624670056774, "eval_wikibio_runtime": 7.3455, "eval_wikibio_samples_per_second": 68.069, "eval_wikibio_steps_per_second": 0.136, "eval_wikibio_token_set_f1": 0.2695935871972429, "eval_wikibio_token_set_f1_sem": 0.006982368442738906, "eval_wikibio_token_set_precision": 0.26487933820827947, "eval_wikibio_token_set_recall": 0.30313653889791964, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1612 }, { "epoch": 7.1, "eval_bias-bios_accuracy": 0.517625, "eval_bias-bios_bleu_score": 18.311460397478225, "eval_bias-bios_bleu_score_sem": 0.7992379003992439, "eval_bias-bios_emb_cos_sim": 0.8802025318145752, "eval_bias-bios_emb_cos_sim_sem": 0.003091392151627784, "eval_bias-bios_emb_top1_equal": 0.328000009059906, "eval_bias-bios_emb_top1_equal_sem": 0.02101702640661987, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7391340732574463, "eval_bias-bios_n_ngrams_match_1": 23.012, "eval_bias-bios_n_ngrams_match_2": 10.444, "eval_bias-bios_n_ngrams_match_3": 5.88, "eval_bias-bios_num_pred_words": 46.702, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.692412076041388, "eval_bias-bios_pred_num_tokens": 62.2265625, "eval_bias-bios_rouge_score": 0.5305381661552371, "eval_bias-bios_runtime": 7.6578, "eval_bias-bios_samples_per_second": 65.293, "eval_bias-bios_steps_per_second": 0.131, "eval_bias-bios_token_set_f1": 0.5614744301701937, "eval_bias-bios_token_set_f1_sem": 0.0066862192501673575, "eval_bias-bios_token_set_precision": 0.554485965307712, "eval_bias-bios_token_set_recall": 0.58068459361329, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1612 }, { "epoch": 7.14, "learning_rate": 0.001, "loss": 1.9251, "step": 1620 }, { "epoch": 7.19, "learning_rate": 0.001, "loss": 1.7034, "step": 1632 }, { "epoch": 7.24, "eval_ag_news_accuracy": 0.3025625, "eval_ag_news_bleu_score": 4.286001981629996, "eval_ag_news_bleu_score_sem": 0.15687854341520385, "eval_ag_news_emb_cos_sim": 0.7988663911819458, "eval_ag_news_emb_cos_sim_sem": 0.004817627200428409, "eval_ag_news_emb_top1_equal": 0.23600000143051147, "eval_ag_news_emb_top1_equal_sem": 0.019008700160065242, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6356382369995117, "eval_ag_news_n_ngrams_match_1": 11.506, "eval_ag_news_n_ngrams_match_2": 2.332, "eval_ag_news_n_ngrams_match_3": 0.646, "eval_ag_news_num_pred_words": 29.562, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.92605098324666, "eval_ag_news_pred_num_tokens": 44.359375, "eval_ag_news_rouge_score": 0.3550396467086911, "eval_ag_news_runtime": 7.2363, "eval_ag_news_samples_per_second": 69.096, "eval_ag_news_steps_per_second": 0.138, "eval_ag_news_token_set_f1": 0.3455860395867242, "eval_ag_news_token_set_f1_sem": 0.004866392561009078, "eval_ag_news_token_set_precision": 0.29738271452813736, "eval_ag_news_token_set_recall": 0.43171517811120513, "eval_ag_news_true_num_tokens": 56.09375, "step": 1643 }, { "epoch": 7.24, "eval_anthropic_toxic_prompts_accuracy": 0.110375, "eval_anthropic_toxic_prompts_bleu_score": 5.980303037695327, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.2250752063017299, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7024601697921753, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004742135387992094, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.9766438007354736, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.558, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.69, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.606, "eval_anthropic_toxic_prompts_num_pred_words": 23.47, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 19.621851167582104, "eval_anthropic_toxic_prompts_pred_num_tokens": 33.8828125, "eval_anthropic_toxic_prompts_rouge_score": 0.3285517353029267, "eval_anthropic_toxic_prompts_runtime": 6.9949, "eval_anthropic_toxic_prompts_samples_per_second": 71.48, "eval_anthropic_toxic_prompts_steps_per_second": 0.143, "eval_anthropic_toxic_prompts_token_set_f1": 0.3533495443027637, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006121045898866037, "eval_anthropic_toxic_prompts_token_set_precision": 0.42009567292486544, "eval_anthropic_toxic_prompts_token_set_recall": 0.32878068071900995, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1643 }, { "epoch": 7.24, "eval_arxiv_accuracy": 0.4180625, "eval_arxiv_bleu_score": 3.6374956868576898, "eval_arxiv_bleu_score_sem": 0.09995697080577728, "eval_arxiv_emb_cos_sim": 0.7477392554283142, "eval_arxiv_emb_cos_sim_sem": 0.004550795567367016, "eval_arxiv_emb_top1_equal": 0.15800000727176666, "eval_arxiv_emb_top1_equal_sem": 0.01632805076118194, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.1020619869232178, "eval_arxiv_n_ngrams_match_1": 13.456, "eval_arxiv_n_ngrams_match_2": 2.482, "eval_arxiv_n_ngrams_match_3": 0.53, "eval_arxiv_num_pred_words": 29.192, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.24377038967276, "eval_arxiv_pred_num_tokens": 47.0859375, "eval_arxiv_rouge_score": 0.36408465684598523, "eval_arxiv_runtime": 7.2909, "eval_arxiv_samples_per_second": 68.579, "eval_arxiv_steps_per_second": 0.137, "eval_arxiv_token_set_f1": 0.3662130619620601, "eval_arxiv_token_set_f1_sem": 0.0043163861426842275, "eval_arxiv_token_set_precision": 0.30295154357919024, "eval_arxiv_token_set_recall": 0.4763281220006056, "eval_arxiv_true_num_tokens": 64.0, "step": 1643 }, { "epoch": 7.24, "eval_python_code_alpaca_accuracy": 0.165625, "eval_python_code_alpaca_bleu_score": 7.944977507185146, "eval_python_code_alpaca_bleu_score_sem": 0.25233973473450744, "eval_python_code_alpaca_emb_cos_sim": 0.7998301982879639, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003677243882031509, "eval_python_code_alpaca_emb_top1_equal": 0.21799999475479126, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018483376892288548, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.479875326156616, "eval_python_code_alpaca_n_ngrams_match_1": 9.134, "eval_python_code_alpaca_n_ngrams_match_2": 2.376, "eval_python_code_alpaca_n_ngrams_match_3": 0.756, "eval_python_code_alpaca_num_pred_words": 22.662, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.939775747320335, "eval_python_code_alpaca_pred_num_tokens": 34.484375, "eval_python_code_alpaca_rouge_score": 0.48839579648916653, "eval_python_code_alpaca_runtime": 6.9291, "eval_python_code_alpaca_samples_per_second": 72.159, "eval_python_code_alpaca_steps_per_second": 0.144, "eval_python_code_alpaca_token_set_f1": 0.5011354467022529, "eval_python_code_alpaca_token_set_f1_sem": 0.005571922672352036, "eval_python_code_alpaca_token_set_precision": 0.5274275229077607, "eval_python_code_alpaca_token_set_recall": 0.4925627715073534, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1643 }, { "epoch": 7.24, "eval_wikibio_accuracy": 0.36325, "eval_wikibio_bleu_score": 5.50352870668292, "eval_wikibio_bleu_score_sem": 0.22781839034644985, "eval_wikibio_emb_cos_sim": 0.7406373620033264, "eval_wikibio_emb_cos_sim_sem": 0.005716939917810654, "eval_wikibio_emb_top1_equal": 0.1979999989271164, "eval_wikibio_emb_top1_equal_sem": 0.017838958581409683, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.441089391708374, "eval_wikibio_n_ngrams_match_1": 8.706, "eval_wikibio_n_ngrams_match_2": 2.6, "eval_wikibio_n_ngrams_match_3": 0.922, "eval_wikibio_num_pred_words": 29.67, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 31.220951494609466, "eval_wikibio_pred_num_tokens": 55.625, "eval_wikibio_rouge_score": 0.3350927900544991, "eval_wikibio_runtime": 7.1601, "eval_wikibio_samples_per_second": 69.831, "eval_wikibio_steps_per_second": 0.14, "eval_wikibio_token_set_f1": 0.29396633540713607, "eval_wikibio_token_set_f1_sem": 0.006017030654109439, "eval_wikibio_token_set_precision": 0.28518570292078954, "eval_wikibio_token_set_recall": 0.3219513805606757, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1643 }, { "epoch": 7.24, "eval_bias-bios_accuracy": 0.51740625, "eval_bias-bios_bleu_score": 18.565207501173806, "eval_bias-bios_bleu_score_sem": 0.8562128078552043, "eval_bias-bios_emb_cos_sim": 0.8686398267745972, "eval_bias-bios_emb_cos_sim_sem": 0.0033610366598804567, "eval_bias-bios_emb_top1_equal": 0.2980000078678131, "eval_bias-bios_emb_top1_equal_sem": 0.020475119103777986, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.760959506034851, "eval_bias-bios_n_ngrams_match_1": 19.984, "eval_bias-bios_n_ngrams_match_2": 9.108, "eval_bias-bios_n_ngrams_match_3": 5.208, "eval_bias-bios_num_pred_words": 33.212, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.818017139631238, "eval_bias-bios_pred_num_tokens": 45.890625, "eval_bias-bios_rouge_score": 0.5452637587141534, "eval_bias-bios_runtime": 7.3863, "eval_bias-bios_samples_per_second": 67.693, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.5557605735112274, "eval_bias-bios_token_set_f1_sem": 0.00695490868771064, "eval_bias-bios_token_set_precision": 0.5039103965895205, "eval_bias-bios_token_set_recall": 0.6343332942788436, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1643 }, { "epoch": 7.24, "learning_rate": 0.001, "loss": 1.7573, "step": 1644 }, { "epoch": 7.3, "learning_rate": 0.001, "loss": 1.9469, "step": 1656 }, { "epoch": 7.35, "learning_rate": 0.001, "loss": 1.926, "step": 1668 }, { "epoch": 7.37, "eval_ag_news_accuracy": 0.300375, "eval_ag_news_bleu_score": 4.650876326075321, "eval_ag_news_bleu_score_sem": 0.15522337420778148, "eval_ag_news_emb_cos_sim": 0.801328182220459, "eval_ag_news_emb_cos_sim_sem": 0.0050656023737853636, "eval_ag_news_emb_top1_equal": 0.27799999713897705, "eval_ag_news_emb_top1_equal_sem": 0.0200558347666307, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5955419540405273, "eval_ag_news_n_ngrams_match_1": 12.808, "eval_ag_news_n_ngrams_match_2": 2.664, "eval_ag_news_n_ngrams_match_3": 0.696, "eval_ag_news_num_pred_words": 38.928, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.43544097249612, "eval_ag_news_pred_num_tokens": 58.90625, "eval_ag_news_rouge_score": 0.35424002923158404, "eval_ag_news_runtime": 9.3311, "eval_ag_news_samples_per_second": 53.584, "eval_ag_news_steps_per_second": 0.107, "eval_ag_news_token_set_f1": 0.34464839824272214, "eval_ag_news_token_set_f1_sem": 0.004853024209760347, "eval_ag_news_token_set_precision": 0.3142035266838974, "eval_ag_news_token_set_recall": 0.4011208877740866, "eval_ag_news_true_num_tokens": 56.09375, "step": 1674 }, { "epoch": 7.37, "eval_anthropic_toxic_prompts_accuracy": 0.10515625, "eval_anthropic_toxic_prompts_bleu_score": 3.7182273069339726, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13410780348787632, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6839096546173096, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004770681972822426, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.16200000047683716, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016494123019099097, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.111764430999756, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.906, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.812, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.66, "eval_anthropic_toxic_prompts_num_pred_words": 37.902, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.46063970785518, "eval_anthropic_toxic_prompts_pred_num_tokens": 54.546875, "eval_anthropic_toxic_prompts_rouge_score": 0.24805704357619135, "eval_anthropic_toxic_prompts_runtime": 7.8945, "eval_anthropic_toxic_prompts_samples_per_second": 63.336, "eval_anthropic_toxic_prompts_steps_per_second": 0.127, "eval_anthropic_toxic_prompts_token_set_f1": 0.3378408216519142, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005726325781171044, "eval_anthropic_toxic_prompts_token_set_precision": 0.436426569901928, "eval_anthropic_toxic_prompts_token_set_recall": 0.2997263766470801, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1674 }, { "epoch": 7.37, "eval_arxiv_accuracy": 0.42275, "eval_arxiv_bleu_score": 4.503449524355907, "eval_arxiv_bleu_score_sem": 0.12322119158722054, "eval_arxiv_emb_cos_sim": 0.7579543590545654, "eval_arxiv_emb_cos_sim_sem": 0.004369948550893629, "eval_arxiv_emb_top1_equal": 0.23999999463558197, "eval_arxiv_emb_top1_equal_sem": 0.019118866773455794, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0502963066101074, "eval_arxiv_n_ngrams_match_1": 15.374, "eval_arxiv_n_ngrams_match_2": 2.934, "eval_arxiv_n_ngrams_match_3": 0.662, "eval_arxiv_num_pred_words": 37.8, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.12160196569762, "eval_arxiv_pred_num_tokens": 60.6640625, "eval_arxiv_rouge_score": 0.3747585887734518, "eval_arxiv_runtime": 7.6589, "eval_arxiv_samples_per_second": 65.283, "eval_arxiv_steps_per_second": 0.131, "eval_arxiv_token_set_f1": 0.37588382467047504, "eval_arxiv_token_set_f1_sem": 0.004298183217087768, "eval_arxiv_token_set_precision": 0.3236729986617741, "eval_arxiv_token_set_recall": 0.4642341614634383, "eval_arxiv_true_num_tokens": 64.0, "step": 1674 }, { "epoch": 7.37, "eval_python_code_alpaca_accuracy": 0.1518125, "eval_python_code_alpaca_bleu_score": 5.5349821219571975, "eval_python_code_alpaca_bleu_score_sem": 0.18336546287590194, "eval_python_code_alpaca_emb_cos_sim": 0.7575463652610779, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004372302276355746, "eval_python_code_alpaca_emb_top1_equal": 0.19599999487400055, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01777075118942252, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7344582080841064, "eval_python_code_alpaca_n_ngrams_match_1": 9.622, "eval_python_code_alpaca_n_ngrams_match_2": 2.688, "eval_python_code_alpaca_n_ngrams_match_3": 0.892, "eval_python_code_alpaca_num_pred_words": 35.136, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.401396822777677, "eval_python_code_alpaca_pred_num_tokens": 54.4921875, "eval_python_code_alpaca_rouge_score": 0.3955248696967041, "eval_python_code_alpaca_runtime": 7.2636, "eval_python_code_alpaca_samples_per_second": 68.836, "eval_python_code_alpaca_steps_per_second": 0.138, "eval_python_code_alpaca_token_set_f1": 0.4712736813612866, "eval_python_code_alpaca_token_set_f1_sem": 0.005484467071758197, "eval_python_code_alpaca_token_set_precision": 0.5345270958066641, "eval_python_code_alpaca_token_set_recall": 0.4378345832325755, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1674 }, { "epoch": 7.37, "eval_wikibio_accuracy": 0.35821875, "eval_wikibio_bleu_score": 4.254597470202276, "eval_wikibio_bleu_score_sem": 0.19247988367918872, "eval_wikibio_emb_cos_sim": 0.6875274777412415, "eval_wikibio_emb_cos_sim_sem": 0.007395272788336099, "eval_wikibio_emb_top1_equal": 0.14399999380111694, "eval_wikibio_emb_top1_equal_sem": 0.01571693380047095, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3458445072174072, "eval_wikibio_n_ngrams_match_1": 7.11, "eval_wikibio_n_ngrams_match_2": 2.1, "eval_wikibio_n_ngrams_match_3": 0.726, "eval_wikibio_num_pred_words": 27.93, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.384536493615464, "eval_wikibio_pred_num_tokens": 62.28125, "eval_wikibio_rouge_score": 0.26586508728125835, "eval_wikibio_runtime": 7.381, "eval_wikibio_samples_per_second": 67.741, "eval_wikibio_steps_per_second": 0.135, "eval_wikibio_token_set_f1": 0.24263158653288092, "eval_wikibio_token_set_f1_sem": 0.007017116461473505, "eval_wikibio_token_set_precision": 0.22963411681555207, "eval_wikibio_token_set_recall": 0.2874055442065437, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1674 }, { "epoch": 7.37, "eval_bias-bios_accuracy": 0.51721875, "eval_bias-bios_bleu_score": 18.74315096320846, "eval_bias-bios_bleu_score_sem": 0.8052132527421368, "eval_bias-bios_emb_cos_sim": 0.8761293888092041, "eval_bias-bios_emb_cos_sim_sem": 0.002969192683252316, "eval_bias-bios_emb_top1_equal": 0.3199999928474426, "eval_bias-bios_emb_top1_equal_sem": 0.0208823415975322, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7489280700683594, "eval_bias-bios_n_ngrams_match_1": 22.304, "eval_bias-bios_n_ngrams_match_2": 10.202, "eval_bias-bios_n_ngrams_match_3": 5.706, "eval_bias-bios_num_pred_words": 43.782, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.748437450088057, "eval_bias-bios_pred_num_tokens": 59.0859375, "eval_bias-bios_rouge_score": 0.5344921446826842, "eval_bias-bios_runtime": 7.6401, "eval_bias-bios_samples_per_second": 65.444, "eval_bias-bios_steps_per_second": 0.131, "eval_bias-bios_token_set_f1": 0.559229653634193, "eval_bias-bios_token_set_f1_sem": 0.006654143164760632, "eval_bias-bios_token_set_precision": 0.5387286363785764, "eval_bias-bios_token_set_recall": 0.5919811309672199, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1674 }, { "epoch": 7.4, "learning_rate": 0.001, "loss": 1.7584, "step": 1680 }, { "epoch": 7.45, "learning_rate": 0.001, "loss": 1.6854, "step": 1692 }, { "epoch": 7.51, "learning_rate": 0.001, "loss": 1.9765, "step": 1704 }, { "epoch": 7.51, "eval_ag_news_accuracy": 0.30271875, "eval_ag_news_bleu_score": 4.851582613767712, "eval_ag_news_bleu_score_sem": 0.1634093529875337, "eval_ag_news_emb_cos_sim": 0.810145914554596, "eval_ag_news_emb_cos_sim_sem": 0.004267049697947543, "eval_ag_news_emb_top1_equal": 0.2800000011920929, "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.540452718734741, "eval_ag_news_n_ngrams_match_1": 13.362, "eval_ag_news_n_ngrams_match_2": 2.876, "eval_ag_news_n_ngrams_match_3": 0.8, "eval_ag_news_num_pred_words": 41.826, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.4825265435078, "eval_ag_news_pred_num_tokens": 62.125, "eval_ag_news_rouge_score": 0.35383916590173115, "eval_ag_news_runtime": 7.4242, "eval_ag_news_samples_per_second": 67.347, "eval_ag_news_steps_per_second": 0.135, "eval_ag_news_token_set_f1": 0.3506777530746214, "eval_ag_news_token_set_f1_sem": 0.00456835211840307, "eval_ag_news_token_set_precision": 0.3269254507482778, "eval_ag_news_token_set_recall": 0.39850577337557036, "eval_ag_news_true_num_tokens": 56.09375, "step": 1705 }, { "epoch": 7.51, "eval_anthropic_toxic_prompts_accuracy": 0.10628125, "eval_anthropic_toxic_prompts_bleu_score": 3.7370627662482057, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1285166203043838, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6878765225410461, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004509487085746684, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.137979030609131, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.262, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.056, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.79, "eval_anthropic_toxic_prompts_num_pred_words": 41.578, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.057221800983054, "eval_anthropic_toxic_prompts_pred_num_tokens": 60.0859375, "eval_anthropic_toxic_prompts_rouge_score": 0.24340618440616155, "eval_anthropic_toxic_prompts_runtime": 7.1278, "eval_anthropic_toxic_prompts_samples_per_second": 70.148, "eval_anthropic_toxic_prompts_steps_per_second": 0.14, "eval_anthropic_toxic_prompts_token_set_f1": 0.34041683902321007, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005818452190683891, "eval_anthropic_toxic_prompts_token_set_precision": 0.45550169072220287, "eval_anthropic_toxic_prompts_token_set_recall": 0.2965704042505173, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1705 }, { "epoch": 7.51, "eval_arxiv_accuracy": 0.42359375, "eval_arxiv_bleu_score": 4.506231542920421, "eval_arxiv_bleu_score_sem": 0.12085288715182764, "eval_arxiv_emb_cos_sim": 0.7582602500915527, "eval_arxiv_emb_cos_sim_sem": 0.004840408090836815, "eval_arxiv_emb_top1_equal": 0.2879999876022339, "eval_arxiv_emb_top1_equal_sem": 0.020271503192099565, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0391695499420166, "eval_arxiv_n_ngrams_match_1": 15.622, "eval_arxiv_n_ngrams_match_2": 2.95, "eval_arxiv_n_ngrams_match_3": 0.67, "eval_arxiv_num_pred_words": 39.402, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.88788968126323, "eval_arxiv_pred_num_tokens": 61.8359375, "eval_arxiv_rouge_score": 0.3665985989173553, "eval_arxiv_runtime": 8.027, "eval_arxiv_samples_per_second": 62.29, "eval_arxiv_steps_per_second": 0.125, "eval_arxiv_token_set_f1": 0.3723423502046456, "eval_arxiv_token_set_f1_sem": 0.004292290646390943, "eval_arxiv_token_set_precision": 0.3257064091259291, "eval_arxiv_token_set_recall": 0.4477068202273733, "eval_arxiv_true_num_tokens": 64.0, "step": 1705 }, { "epoch": 7.51, "eval_python_code_alpaca_accuracy": 0.152625, "eval_python_code_alpaca_bleu_score": 5.337657457190803, "eval_python_code_alpaca_bleu_score_sem": 0.1613635908932767, "eval_python_code_alpaca_emb_cos_sim": 0.7676838040351868, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037802830122170557, "eval_python_code_alpaca_emb_top1_equal": 0.1720000058412552, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01689386850274998, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7153306007385254, "eval_python_code_alpaca_n_ngrams_match_1": 9.8, "eval_python_code_alpaca_n_ngrams_match_2": 2.85, "eval_python_code_alpaca_n_ngrams_match_3": 0.902, "eval_python_code_alpaca_num_pred_words": 37.092, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.109604492674418, "eval_python_code_alpaca_pred_num_tokens": 59.109375, "eval_python_code_alpaca_rouge_score": 0.38660030871861584, "eval_python_code_alpaca_runtime": 7.2323, "eval_python_code_alpaca_samples_per_second": 69.134, "eval_python_code_alpaca_steps_per_second": 0.138, "eval_python_code_alpaca_token_set_f1": 0.464222601369294, "eval_python_code_alpaca_token_set_f1_sem": 0.005151611944424277, "eval_python_code_alpaca_token_set_precision": 0.5458459467726668, "eval_python_code_alpaca_token_set_recall": 0.4191006191223907, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1705 }, { "epoch": 7.51, "eval_wikibio_accuracy": 0.3705625, "eval_wikibio_bleu_score": 4.9118433121021585, "eval_wikibio_bleu_score_sem": 0.19560003433430423, "eval_wikibio_emb_cos_sim": 0.7209903001785278, "eval_wikibio_emb_cos_sim_sem": 0.00636168676398194, "eval_wikibio_emb_top1_equal": 0.1720000058412552, "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3367502689361572, "eval_wikibio_n_ngrams_match_1": 8.672, "eval_wikibio_n_ngrams_match_2": 2.642, "eval_wikibio_n_ngrams_match_3": 0.964, "eval_wikibio_num_pred_words": 32.832, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.127570979078257, "eval_wikibio_pred_num_tokens": 62.8671875, "eval_wikibio_rouge_score": 0.30578451335993057, "eval_wikibio_runtime": 7.2607, "eval_wikibio_samples_per_second": 68.864, "eval_wikibio_steps_per_second": 0.138, "eval_wikibio_token_set_f1": 0.28067397282174583, "eval_wikibio_token_set_f1_sem": 0.006536335756085939, "eval_wikibio_token_set_precision": 0.27792502257742313, "eval_wikibio_token_set_recall": 0.30646776239879864, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1705 }, { "epoch": 7.51, "eval_bias-bios_accuracy": 0.5183125, "eval_bias-bios_bleu_score": 18.417151799164678, "eval_bias-bios_bleu_score_sem": 0.8026136252860484, "eval_bias-bios_emb_cos_sim": 0.878588080406189, "eval_bias-bios_emb_cos_sim_sem": 0.0028087918241305595, "eval_bias-bios_emb_top1_equal": 0.36000001430511475, "eval_bias-bios_emb_top1_equal_sem": 0.021487751507037762, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7430189847946167, "eval_bias-bios_n_ngrams_match_1": 22.904, "eval_bias-bios_n_ngrams_match_2": 10.322, "eval_bias-bios_n_ngrams_match_3": 5.762, "eval_bias-bios_num_pred_words": 46.048, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.714569605543155, "eval_bias-bios_pred_num_tokens": 61.546875, "eval_bias-bios_rouge_score": 0.5311399669704344, "eval_bias-bios_runtime": 7.5917, "eval_bias-bios_samples_per_second": 65.862, "eval_bias-bios_steps_per_second": 0.132, "eval_bias-bios_token_set_f1": 0.5585373917793387, "eval_bias-bios_token_set_f1_sem": 0.006650315588312056, "eval_bias-bios_token_set_precision": 0.5507608675345065, "eval_bias-bios_token_set_recall": 0.5772023876296363, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1705 }, { "epoch": 7.56, "learning_rate": 0.001, "loss": 1.9347, "step": 1716 }, { "epoch": 7.61, "learning_rate": 0.001, "loss": 1.8096, "step": 1728 }, { "epoch": 7.65, "eval_ag_news_accuracy": 0.30046875, "eval_ag_news_bleu_score": 4.6886501818700195, "eval_ag_news_bleu_score_sem": 0.17742679602856962, "eval_ag_news_emb_cos_sim": 0.8065664172172546, "eval_ag_news_emb_cos_sim_sem": 0.004748064350459948, "eval_ag_news_emb_top1_equal": 0.28200000524520874, "eval_ag_news_emb_top1_equal_sem": 0.02014357168251164, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.650249481201172, "eval_ag_news_n_ngrams_match_1": 11.962, "eval_ag_news_n_ngrams_match_2": 2.542, "eval_ag_news_n_ngrams_match_3": 0.694, "eval_ag_news_num_pred_words": 31.874, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.48426595238064, "eval_ag_news_pred_num_tokens": 49.3828125, "eval_ag_news_rouge_score": 0.3670479319056318, "eval_ag_news_runtime": 7.829, "eval_ag_news_samples_per_second": 63.865, "eval_ag_news_steps_per_second": 0.128, "eval_ag_news_token_set_f1": 0.3471564789221244, "eval_ag_news_token_set_f1_sem": 0.004798216383031354, "eval_ag_news_token_set_precision": 0.30620342693026115, "eval_ag_news_token_set_recall": 0.4203662546331646, "eval_ag_news_true_num_tokens": 56.09375, "step": 1736 }, { "epoch": 7.65, "eval_anthropic_toxic_prompts_accuracy": 0.1083125, "eval_anthropic_toxic_prompts_bleu_score": 5.104559842212071, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.19071219001558767, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.699446976184845, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004793991984979677, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055010489467818, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 2.9995689392089844, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.656, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.686, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.626, "eval_anthropic_toxic_prompts_num_pred_words": 27.27, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.07688070156649, "eval_anthropic_toxic_prompts_pred_num_tokens": 39.6015625, "eval_anthropic_toxic_prompts_rouge_score": 0.3026076267347825, "eval_anthropic_toxic_prompts_runtime": 7.8439, "eval_anthropic_toxic_prompts_samples_per_second": 63.743, "eval_anthropic_toxic_prompts_steps_per_second": 0.127, "eval_anthropic_toxic_prompts_token_set_f1": 0.34725671483100823, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0058297886251437794, "eval_anthropic_toxic_prompts_token_set_precision": 0.4244084345903105, "eval_anthropic_toxic_prompts_token_set_recall": 0.31743953495929456, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1736 }, { "epoch": 7.65, "eval_arxiv_accuracy": 0.41196875, "eval_arxiv_bleu_score": 3.842853544571142, "eval_arxiv_bleu_score_sem": 0.10663368564080987, "eval_arxiv_emb_cos_sim": 0.7558121681213379, "eval_arxiv_emb_cos_sim_sem": 0.00432601311973432, "eval_arxiv_emb_top1_equal": 0.20600000023841858, "eval_arxiv_emb_top1_equal_sem": 0.018104793612990725, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.1280531883239746, "eval_arxiv_n_ngrams_match_1": 14.044, "eval_arxiv_n_ngrams_match_2": 2.544, "eval_arxiv_n_ngrams_match_3": 0.524, "eval_arxiv_num_pred_words": 31.56, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.829491530931623, "eval_arxiv_pred_num_tokens": 50.46875, "eval_arxiv_rouge_score": 0.3695066100625418, "eval_arxiv_runtime": 7.7611, "eval_arxiv_samples_per_second": 64.424, "eval_arxiv_steps_per_second": 0.129, "eval_arxiv_token_set_f1": 0.3668300551628085, "eval_arxiv_token_set_f1_sem": 0.004175076327889789, "eval_arxiv_token_set_precision": 0.30894036995626606, "eval_arxiv_token_set_recall": 0.46467032010595094, "eval_arxiv_true_num_tokens": 64.0, "step": 1736 }, { "epoch": 7.65, "eval_python_code_alpaca_accuracy": 0.1555, "eval_python_code_alpaca_bleu_score": 6.828216796264504, "eval_python_code_alpaca_bleu_score_sem": 0.22378378594816778, "eval_python_code_alpaca_emb_cos_sim": 0.7818934321403503, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004398725043788634, "eval_python_code_alpaca_emb_top1_equal": 0.19200000166893005, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01763218126724194, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.5831587314605713, "eval_python_code_alpaca_n_ngrams_match_1": 9.108, "eval_python_code_alpaca_n_ngrams_match_2": 2.44, "eval_python_code_alpaca_n_ngrams_match_3": 0.732, "eval_python_code_alpaca_num_pred_words": 25.85, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.238890282309018, "eval_python_code_alpaca_pred_num_tokens": 41.40625, "eval_python_code_alpaca_rouge_score": 0.4523808554731888, "eval_python_code_alpaca_runtime": 7.6902, "eval_python_code_alpaca_samples_per_second": 65.018, "eval_python_code_alpaca_steps_per_second": 0.13, "eval_python_code_alpaca_token_set_f1": 0.481507138765683, "eval_python_code_alpaca_token_set_f1_sem": 0.005669716196561488, "eval_python_code_alpaca_token_set_precision": 0.5192525976348585, "eval_python_code_alpaca_token_set_recall": 0.46509787489139526, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1736 }, { "epoch": 7.65, "eval_wikibio_accuracy": 0.35484375, "eval_wikibio_bleu_score": 5.543856789536708, "eval_wikibio_bleu_score_sem": 0.21142369496818994, "eval_wikibio_emb_cos_sim": 0.7370650768280029, "eval_wikibio_emb_cos_sim_sem": 0.005716528082494802, "eval_wikibio_emb_top1_equal": 0.18199999630451202, "eval_wikibio_emb_top1_equal_sem": 0.017272772986938162, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5122337341308594, "eval_wikibio_n_ngrams_match_1": 8.5, "eval_wikibio_n_ngrams_match_2": 2.59, "eval_wikibio_n_ngrams_match_3": 0.9, "eval_wikibio_num_pred_words": 28.85, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 33.52306582968505, "eval_wikibio_pred_num_tokens": 54.8515625, "eval_wikibio_rouge_score": 0.33258407318876426, "eval_wikibio_runtime": 7.8711, "eval_wikibio_samples_per_second": 63.523, "eval_wikibio_steps_per_second": 0.127, "eval_wikibio_token_set_f1": 0.29700381328772274, "eval_wikibio_token_set_f1_sem": 0.00600601802755354, "eval_wikibio_token_set_precision": 0.28324158592970555, "eval_wikibio_token_set_recall": 0.3295388378188429, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1736 }, { "epoch": 7.65, "eval_bias-bios_accuracy": 0.5184375, "eval_bias-bios_bleu_score": 19.40074143169447, "eval_bias-bios_bleu_score_sem": 0.8524070756221019, "eval_bias-bios_emb_cos_sim": 0.8761026859283447, "eval_bias-bios_emb_cos_sim_sem": 0.003328032190996095, "eval_bias-bios_emb_top1_equal": 0.30000001192092896, "eval_bias-bios_emb_top1_equal_sem": 0.020514426052435274, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7550889253616333, "eval_bias-bios_n_ngrams_match_1": 21.11, "eval_bias-bios_n_ngrams_match_2": 9.712, "eval_bias-bios_n_ngrams_match_3": 5.496, "eval_bias-bios_num_pred_words": 35.97, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.783962060006336, "eval_bias-bios_pred_num_tokens": 48.75, "eval_bias-bios_rouge_score": 0.5571982311817295, "eval_bias-bios_runtime": 7.6404, "eval_bias-bios_samples_per_second": 65.442, "eval_bias-bios_steps_per_second": 0.131, "eval_bias-bios_token_set_f1": 0.5653724034379729, "eval_bias-bios_token_set_f1_sem": 0.00690907806658598, "eval_bias-bios_token_set_precision": 0.5237722994373154, "eval_bias-bios_token_set_recall": 0.6273984808734474, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1736 }, { "epoch": 7.67, "learning_rate": 0.001, "loss": 1.6229, "step": 1740 }, { "epoch": 7.72, "learning_rate": 0.001, "loss": 2.0186, "step": 1752 }, { "epoch": 7.77, "learning_rate": 0.001, "loss": 1.9309, "step": 1764 }, { "epoch": 7.78, "eval_ag_news_accuracy": 0.3005, "eval_ag_news_bleu_score": 4.722855480011007, "eval_ag_news_bleu_score_sem": 0.15753943975856985, "eval_ag_news_emb_cos_sim": 0.811168372631073, "eval_ag_news_emb_cos_sim_sem": 0.0049495894315502035, "eval_ag_news_emb_top1_equal": 0.27000001072883606, "eval_ag_news_emb_top1_equal_sem": 0.019874356669179787, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.563847541809082, "eval_ag_news_n_ngrams_match_1": 13.504, "eval_ag_news_n_ngrams_match_2": 2.902, "eval_ag_news_n_ngrams_match_3": 0.776, "eval_ag_news_num_pred_words": 41.78, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.29874962095071, "eval_ag_news_pred_num_tokens": 62.8203125, "eval_ag_news_rouge_score": 0.3594130357291111, "eval_ag_news_runtime": 7.5908, "eval_ag_news_samples_per_second": 65.869, "eval_ag_news_steps_per_second": 0.132, "eval_ag_news_token_set_f1": 0.3522306876614356, "eval_ag_news_token_set_f1_sem": 0.00471422098838425, "eval_ag_news_token_set_precision": 0.32937037502803546, "eval_ag_news_token_set_recall": 0.3987290004470333, "eval_ag_news_true_num_tokens": 56.09375, "step": 1767 }, { "epoch": 7.78, "eval_anthropic_toxic_prompts_accuracy": 0.105375, "eval_anthropic_toxic_prompts_bleu_score": 3.4987759769154145, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12297900893527906, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6914503574371338, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004275769877600584, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14800000190734863, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015896458012572223, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.161133289337158, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.134, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.974, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.786, "eval_anthropic_toxic_prompts_num_pred_words": 43.256, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.597323376209825, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.515625, "eval_anthropic_toxic_prompts_rouge_score": 0.23212696161321655, "eval_anthropic_toxic_prompts_runtime": 10.2195, "eval_anthropic_toxic_prompts_samples_per_second": 48.926, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3285741928131714, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005470398784443636, "eval_anthropic_toxic_prompts_token_set_precision": 0.4484121061331258, "eval_anthropic_toxic_prompts_token_set_recall": 0.2823611547664964, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1767 }, { "epoch": 7.78, "eval_arxiv_accuracy": 0.42471875, "eval_arxiv_bleu_score": 4.434731531662221, "eval_arxiv_bleu_score_sem": 0.12335882016089646, "eval_arxiv_emb_cos_sim": 0.7598865032196045, "eval_arxiv_emb_cos_sim_sem": 0.005009810683690815, "eval_arxiv_emb_top1_equal": 0.32600000500679016, "eval_arxiv_emb_top1_equal_sem": 0.020984011608532603, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.030510187149048, "eval_arxiv_n_ngrams_match_1": 15.518, "eval_arxiv_n_ngrams_match_2": 2.902, "eval_arxiv_n_ngrams_match_3": 0.636, "eval_arxiv_num_pred_words": 39.462, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.70779474558654, "eval_arxiv_pred_num_tokens": 62.90625, "eval_arxiv_rouge_score": 0.3665031141829219, "eval_arxiv_runtime": 8.1516, "eval_arxiv_samples_per_second": 61.338, "eval_arxiv_steps_per_second": 0.123, "eval_arxiv_token_set_f1": 0.3706754086012643, "eval_arxiv_token_set_f1_sem": 0.004476124291976172, "eval_arxiv_token_set_precision": 0.32358896466381126, "eval_arxiv_token_set_recall": 0.45072060730764696, "eval_arxiv_true_num_tokens": 64.0, "step": 1767 }, { "epoch": 7.78, "eval_python_code_alpaca_accuracy": 0.15021875, "eval_python_code_alpaca_bleu_score": 4.789435602398936, "eval_python_code_alpaca_bleu_score_sem": 0.15382870656988126, "eval_python_code_alpaca_emb_cos_sim": 0.7624420523643494, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036925334347325555, "eval_python_code_alpaca_emb_top1_equal": 0.1599999964237213, "eval_python_code_alpaca_emb_top1_equal_sem": 0.016411540042267993, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7904624938964844, "eval_python_code_alpaca_n_ngrams_match_1": 9.702, "eval_python_code_alpaca_n_ngrams_match_2": 2.786, "eval_python_code_alpaca_n_ngrams_match_3": 0.878, "eval_python_code_alpaca_num_pred_words": 39.734, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.288551415603756, "eval_python_code_alpaca_pred_num_tokens": 62.8828125, "eval_python_code_alpaca_rouge_score": 0.37263424824477864, "eval_python_code_alpaca_runtime": 7.7315, "eval_python_code_alpaca_samples_per_second": 64.67, "eval_python_code_alpaca_steps_per_second": 0.129, "eval_python_code_alpaca_token_set_f1": 0.4545399548124749, "eval_python_code_alpaca_token_set_f1_sem": 0.005183398897931823, "eval_python_code_alpaca_token_set_precision": 0.5375639518729065, "eval_python_code_alpaca_token_set_recall": 0.41141154370575833, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1767 }, { "epoch": 7.78, "eval_wikibio_accuracy": 0.36715625, "eval_wikibio_bleu_score": 4.919477452581796, "eval_wikibio_bleu_score_sem": 0.2047184635454884, "eval_wikibio_emb_cos_sim": 0.7149428725242615, "eval_wikibio_emb_cos_sim_sem": 0.006690116098771149, "eval_wikibio_emb_top1_equal": 0.17800000309944153, "eval_wikibio_emb_top1_equal_sem": 0.017123621962581055, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3364834785461426, "eval_wikibio_n_ngrams_match_1": 8.498, "eval_wikibio_n_ngrams_match_2": 2.632, "eval_wikibio_n_ngrams_match_3": 0.99, "eval_wikibio_num_pred_words": 32.026, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.120067814377208, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.30135222420067076, "eval_wikibio_runtime": 7.4611, "eval_wikibio_samples_per_second": 67.014, "eval_wikibio_steps_per_second": 0.134, "eval_wikibio_token_set_f1": 0.27669579538740924, "eval_wikibio_token_set_f1_sem": 0.006812996857810089, "eval_wikibio_token_set_precision": 0.27239784751259927, "eval_wikibio_token_set_recall": 0.30446375248856655, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1767 }, { "epoch": 7.78, "eval_bias-bios_accuracy": 0.5174375, "eval_bias-bios_bleu_score": 18.54502948244028, "eval_bias-bios_bleu_score_sem": 0.776228581133173, "eval_bias-bios_emb_cos_sim": 0.8806796073913574, "eval_bias-bios_emb_cos_sim_sem": 0.0027210159238386543, "eval_bias-bios_emb_top1_equal": 0.34599998593330383, "eval_bias-bios_emb_top1_equal_sem": 0.021294951270401857, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.756240725517273, "eval_bias-bios_n_ngrams_match_1": 23.022, "eval_bias-bios_n_ngrams_match_2": 10.602, "eval_bias-bios_n_ngrams_match_3": 5.974, "eval_bias-bios_num_pred_words": 46.896, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.790627866508819, "eval_bias-bios_pred_num_tokens": 62.3984375, "eval_bias-bios_rouge_score": 0.5295545960920665, "eval_bias-bios_runtime": 7.7836, "eval_bias-bios_samples_per_second": 64.238, "eval_bias-bios_steps_per_second": 0.128, "eval_bias-bios_token_set_f1": 0.5596962553749173, "eval_bias-bios_token_set_f1_sem": 0.0065499820854619285, "eval_bias-bios_token_set_precision": 0.552492983592854, "eval_bias-bios_token_set_recall": 0.5769811603574992, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1767 }, { "epoch": 7.82, "learning_rate": 0.001, "loss": 1.8319, "step": 1776 }, { "epoch": 7.88, "learning_rate": 0.001, "loss": 1.6043, "step": 1788 }, { "epoch": 7.92, "eval_ag_news_accuracy": 0.30328125, "eval_ag_news_bleu_score": 4.792043355736725, "eval_ag_news_bleu_score_sem": 0.16482900313217344, "eval_ag_news_emb_cos_sim": 0.8007201552391052, "eval_ag_news_emb_cos_sim_sem": 0.005518706853646463, "eval_ag_news_emb_top1_equal": 0.28600001335144043, "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5438921451568604, "eval_ag_news_n_ngrams_match_1": 12.572, "eval_ag_news_n_ngrams_match_2": 2.682, "eval_ag_news_n_ngrams_match_3": 0.744, "eval_ag_news_num_pred_words": 36.492, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.601330848618204, "eval_ag_news_pred_num_tokens": 56.8984375, "eval_ag_news_rouge_score": 0.3590280838098541, "eval_ag_news_runtime": 8.658, "eval_ag_news_samples_per_second": 57.75, "eval_ag_news_steps_per_second": 0.115, "eval_ag_news_token_set_f1": 0.3456240580243733, "eval_ag_news_token_set_f1_sem": 0.004942295341404688, "eval_ag_news_token_set_precision": 0.30991615070020745, "eval_ag_news_token_set_recall": 0.4120273375000977, "eval_ag_news_true_num_tokens": 56.09375, "step": 1798 }, { "epoch": 7.92, "eval_anthropic_toxic_prompts_accuracy": 0.10675, "eval_anthropic_toxic_prompts_bleu_score": 4.810195451329484, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1836726785355343, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6906068921089172, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0052169098691491, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01580720436986462, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0305025577545166, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.75, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.796, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658, "eval_anthropic_toxic_prompts_num_pred_words": 30.982, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.70763675825323, "eval_anthropic_toxic_prompts_pred_num_tokens": 45.0234375, "eval_anthropic_toxic_prompts_rouge_score": 0.28588224786399713, "eval_anthropic_toxic_prompts_runtime": 6.9727, "eval_anthropic_toxic_prompts_samples_per_second": 71.708, "eval_anthropic_toxic_prompts_steps_per_second": 0.143, "eval_anthropic_toxic_prompts_token_set_f1": 0.35198053648732547, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005893928358486648, "eval_anthropic_toxic_prompts_token_set_precision": 0.4273047679068406, "eval_anthropic_toxic_prompts_token_set_recall": 0.32545975191429377, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1798 }, { "epoch": 7.92, "eval_arxiv_accuracy": 0.42528125, "eval_arxiv_bleu_score": 4.221191763826797, "eval_arxiv_bleu_score_sem": 0.12013763585677406, "eval_arxiv_emb_cos_sim": 0.7373228669166565, "eval_arxiv_emb_cos_sim_sem": 0.006062924899138375, "eval_arxiv_emb_top1_equal": 0.25999999046325684, "eval_arxiv_emb_top1_equal_sem": 0.0196359666629192, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.028066873550415, "eval_arxiv_n_ngrams_match_1": 14.23, "eval_arxiv_n_ngrams_match_2": 2.724, "eval_arxiv_n_ngrams_match_3": 0.65, "eval_arxiv_num_pred_words": 35.354, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.65726086928022, "eval_arxiv_pred_num_tokens": 58.640625, "eval_arxiv_rouge_score": 0.3524750488192354, "eval_arxiv_runtime": 8.5405, "eval_arxiv_samples_per_second": 58.545, "eval_arxiv_steps_per_second": 0.117, "eval_arxiv_token_set_f1": 0.354716902698285, "eval_arxiv_token_set_f1_sem": 0.004857799242708296, "eval_arxiv_token_set_precision": 0.29925603684239627, "eval_arxiv_token_set_recall": 0.45414328256592784, "eval_arxiv_true_num_tokens": 64.0, "step": 1798 }, { "epoch": 7.92, "eval_python_code_alpaca_accuracy": 0.15634375, "eval_python_code_alpaca_bleu_score": 6.6354189861223025, "eval_python_code_alpaca_bleu_score_sem": 0.23023821468117778, "eval_python_code_alpaca_emb_cos_sim": 0.7703977823257446, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004220384358420294, "eval_python_code_alpaca_emb_top1_equal": 0.20399999618530273, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018039369108186407, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.540879011154175, "eval_python_code_alpaca_n_ngrams_match_1": 9.11, "eval_python_code_alpaca_n_ngrams_match_2": 2.612, "eval_python_code_alpaca_n_ngrams_match_3": 0.858, "eval_python_code_alpaca_num_pred_words": 29.414, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 12.69082144302526, "eval_python_code_alpaca_pred_num_tokens": 48.5703125, "eval_python_code_alpaca_rouge_score": 0.42557610046948885, "eval_python_code_alpaca_runtime": 8.5016, "eval_python_code_alpaca_samples_per_second": 58.812, "eval_python_code_alpaca_steps_per_second": 0.118, "eval_python_code_alpaca_token_set_f1": 0.47824295657687516, "eval_python_code_alpaca_token_set_f1_sem": 0.0058007310055928065, "eval_python_code_alpaca_token_set_precision": 0.5116566636117332, "eval_python_code_alpaca_token_set_recall": 0.46656534883311057, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1798 }, { "epoch": 7.92, "eval_wikibio_accuracy": 0.3696875, "eval_wikibio_bleu_score": 4.370823640419664, "eval_wikibio_bleu_score_sem": 0.19076798738180825, "eval_wikibio_emb_cos_sim": 0.6826354265213013, "eval_wikibio_emb_cos_sim_sem": 0.00763070997536604, "eval_wikibio_emb_top1_equal": 0.15600000321865082, "eval_wikibio_emb_top1_equal_sem": 0.016243635183835314, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3066444396972656, "eval_wikibio_n_ngrams_match_1": 7.458, "eval_wikibio_n_ngrams_match_2": 2.3, "eval_wikibio_n_ngrams_match_3": 0.826, "eval_wikibio_num_pred_words": 29.314, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.29338703499244, "eval_wikibio_pred_num_tokens": 61.484375, "eval_wikibio_rouge_score": 0.2675907815883483, "eval_wikibio_runtime": 9.3728, "eval_wikibio_samples_per_second": 53.346, "eval_wikibio_steps_per_second": 0.107, "eval_wikibio_token_set_f1": 0.24419770753332024, "eval_wikibio_token_set_f1_sem": 0.007244638990825622, "eval_wikibio_token_set_precision": 0.23626165051127185, "eval_wikibio_token_set_recall": 0.2800556622176949, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1798 }, { "epoch": 7.92, "eval_bias-bios_accuracy": 0.520875, "eval_bias-bios_bleu_score": 19.41641229356281, "eval_bias-bios_bleu_score_sem": 0.857620968734896, "eval_bias-bios_emb_cos_sim": 0.8749354481697083, "eval_bias-bios_emb_cos_sim_sem": 0.0032761149515914533, "eval_bias-bios_emb_top1_equal": 0.35199999809265137, "eval_bias-bios_emb_top1_equal_sem": 0.02138004257753857, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7302442789077759, "eval_bias-bios_n_ngrams_match_1": 21.662, "eval_bias-bios_n_ngrams_match_2": 9.86, "eval_bias-bios_n_ngrams_match_3": 5.568, "eval_bias-bios_num_pred_words": 39.64, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.642031969512989, "eval_bias-bios_pred_num_tokens": 53.828125, "eval_bias-bios_rouge_score": 0.5458043549089255, "eval_bias-bios_runtime": 8.2889, "eval_bias-bios_samples_per_second": 60.322, "eval_bias-bios_steps_per_second": 0.121, "eval_bias-bios_token_set_f1": 0.5622363262827373, "eval_bias-bios_token_set_f1_sem": 0.006909230874498535, "eval_bias-bios_token_set_precision": 0.5260818559910302, "eval_bias-bios_token_set_recall": 0.6163827708499481, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1798 }, { "epoch": 7.93, "learning_rate": 0.001, "loss": 1.9572, "step": 1800 }, { "epoch": 7.98, "learning_rate": 0.001, "loss": 1.8099, "step": 1812 }, { "epoch": 8.04, "learning_rate": 0.001, "loss": 1.8423, "step": 1824 }, { "epoch": 8.06, "eval_ag_news_accuracy": 0.30134375, "eval_ag_news_bleu_score": 4.780837595873983, "eval_ag_news_bleu_score_sem": 0.15800340186528505, "eval_ag_news_emb_cos_sim": 0.8033591508865356, "eval_ag_news_emb_cos_sim_sem": 0.0052791523130582855, "eval_ag_news_emb_top1_equal": 0.28600001335144043, "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5662569999694824, "eval_ag_news_n_ngrams_match_1": 12.928, "eval_ag_news_n_ngrams_match_2": 2.704, "eval_ag_news_n_ngrams_match_3": 0.73, "eval_ag_news_num_pred_words": 38.418, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.3839030268648, "eval_ag_news_pred_num_tokens": 59.2109375, "eval_ag_news_rouge_score": 0.3585844720770892, "eval_ag_news_runtime": 8.5983, "eval_ag_news_samples_per_second": 58.151, "eval_ag_news_steps_per_second": 0.116, "eval_ag_news_token_set_f1": 0.3478374542683173, "eval_ag_news_token_set_f1_sem": 0.0050479052066528095, "eval_ag_news_token_set_precision": 0.31776503427402997, "eval_ag_news_token_set_recall": 0.40573543588464517, "eval_ag_news_true_num_tokens": 56.09375, "step": 1829 }, { "epoch": 8.06, "eval_anthropic_toxic_prompts_accuracy": 0.1070625, "eval_anthropic_toxic_prompts_bleu_score": 4.9483724168872705, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18687237219876157, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6995450854301453, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0047627024969777495, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14000000059604645, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015533271243205533, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.037666082382202, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.838, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.832, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.676, "eval_anthropic_toxic_prompts_num_pred_words": 30.12, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.85650901255428, "eval_anthropic_toxic_prompts_pred_num_tokens": 43.6484375, "eval_anthropic_toxic_prompts_rouge_score": 0.29494632328980463, "eval_anthropic_toxic_prompts_runtime": 8.6405, "eval_anthropic_toxic_prompts_samples_per_second": 57.867, "eval_anthropic_toxic_prompts_steps_per_second": 0.116, "eval_anthropic_toxic_prompts_token_set_f1": 0.34456781435249784, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005781889435940958, "eval_anthropic_toxic_prompts_token_set_precision": 0.42904865190258323, "eval_anthropic_toxic_prompts_token_set_recall": 0.3116079242511146, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1829 }, { "epoch": 8.06, "eval_arxiv_accuracy": 0.42596875, "eval_arxiv_bleu_score": 4.432236788900172, "eval_arxiv_bleu_score_sem": 0.1304993760904918, "eval_arxiv_emb_cos_sim": 0.7465862035751343, "eval_arxiv_emb_cos_sim_sem": 0.005250071608477703, "eval_arxiv_emb_top1_equal": 0.2800000011920929, "eval_arxiv_emb_top1_equal_sem": 0.020099949126240343, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0305418968200684, "eval_arxiv_n_ngrams_match_1": 14.788, "eval_arxiv_n_ngrams_match_2": 2.836, "eval_arxiv_n_ngrams_match_3": 0.684, "eval_arxiv_num_pred_words": 36.79, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.70845139335647, "eval_arxiv_pred_num_tokens": 60.890625, "eval_arxiv_rouge_score": 0.36099883081969086, "eval_arxiv_runtime": 9.4935, "eval_arxiv_samples_per_second": 52.668, "eval_arxiv_steps_per_second": 0.105, "eval_arxiv_token_set_f1": 0.36457213548998924, "eval_arxiv_token_set_f1_sem": 0.0045752990200083506, "eval_arxiv_token_set_precision": 0.3117065680056879, "eval_arxiv_token_set_recall": 0.45586883738956696, "eval_arxiv_true_num_tokens": 64.0, "step": 1829 }, { "epoch": 8.06, "eval_python_code_alpaca_accuracy": 0.15471875, "eval_python_code_alpaca_bleu_score": 6.5590124981329225, "eval_python_code_alpaca_bleu_score_sem": 0.2234983443166712, "eval_python_code_alpaca_emb_cos_sim": 0.7730389833450317, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038631195543194887, "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.5823779106140137, "eval_python_code_alpaca_n_ngrams_match_1": 9.404, "eval_python_code_alpaca_n_ngrams_match_2": 2.632, "eval_python_code_alpaca_n_ngrams_match_3": 0.848, "eval_python_code_alpaca_num_pred_words": 30.27, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.228557115492329, "eval_python_code_alpaca_pred_num_tokens": 49.7421875, "eval_python_code_alpaca_rouge_score": 0.4327032429294919, "eval_python_code_alpaca_runtime": 8.2947, "eval_python_code_alpaca_samples_per_second": 60.28, "eval_python_code_alpaca_steps_per_second": 0.121, "eval_python_code_alpaca_token_set_f1": 0.4822270748001864, "eval_python_code_alpaca_token_set_f1_sem": 0.0055473955401953, "eval_python_code_alpaca_token_set_precision": 0.5275970090814532, "eval_python_code_alpaca_token_set_recall": 0.46006825227663467, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1829 }, { "epoch": 8.06, "eval_wikibio_accuracy": 0.37275, "eval_wikibio_bleu_score": 4.983038179149817, "eval_wikibio_bleu_score_sem": 0.20608286310648566, "eval_wikibio_emb_cos_sim": 0.7166478037834167, "eval_wikibio_emb_cos_sim_sem": 0.006287153235015197, "eval_wikibio_emb_top1_equal": 0.18400000035762787, "eval_wikibio_emb_top1_equal_sem": 0.017346174301986407, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.313105583190918, "eval_wikibio_n_ngrams_match_1": 8.406, "eval_wikibio_n_ngrams_match_2": 2.586, "eval_wikibio_n_ngrams_match_3": 0.962, "eval_wikibio_num_pred_words": 31.31, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.470304453997354, "eval_wikibio_pred_num_tokens": 62.1953125, "eval_wikibio_rouge_score": 0.29782203869979695, "eval_wikibio_runtime": 8.5123, "eval_wikibio_samples_per_second": 58.739, "eval_wikibio_steps_per_second": 0.117, "eval_wikibio_token_set_f1": 0.27353970378616643, "eval_wikibio_token_set_f1_sem": 0.00668621847617472, "eval_wikibio_token_set_precision": 0.26680747758540296, "eval_wikibio_token_set_recall": 0.3051204144974896, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1829 }, { "epoch": 8.06, "eval_bias-bios_accuracy": 0.52309375, "eval_bias-bios_bleu_score": 19.697531118947712, "eval_bias-bios_bleu_score_sem": 0.8502093943718262, "eval_bias-bios_emb_cos_sim": 0.8757075071334839, "eval_bias-bios_emb_cos_sim_sem": 0.0033073384659748216, "eval_bias-bios_emb_top1_equal": 0.3319999873638153, "eval_bias-bios_emb_top1_equal_sem": 0.02108176585203148, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7177313566207886, "eval_bias-bios_n_ngrams_match_1": 22.114, "eval_bias-bios_n_ngrams_match_2": 10.124, "eval_bias-bios_n_ngrams_match_3": 5.774, "eval_bias-bios_num_pred_words": 41.354, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.571873520953892, "eval_bias-bios_pred_num_tokens": 56.984375, "eval_bias-bios_rouge_score": 0.542823255202964, "eval_bias-bios_runtime": 9.2763, "eval_bias-bios_samples_per_second": 53.901, "eval_bias-bios_steps_per_second": 0.108, "eval_bias-bios_token_set_f1": 0.5630881889770337, "eval_bias-bios_token_set_f1_sem": 0.0068162888586790924, "eval_bias-bios_token_set_precision": 0.533611840371098, "eval_bias-bios_token_set_recall": 0.6075041270934662, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1829 }, { "epoch": 8.09, "learning_rate": 0.001, "loss": 1.928, "step": 1836 }, { "epoch": 8.14, "learning_rate": 0.001, "loss": 1.8933, "step": 1848 }, { "epoch": 8.19, "learning_rate": 0.001, "loss": 1.6677, "step": 1860 }, { "epoch": 8.19, "eval_ag_news_accuracy": 0.30309375, "eval_ag_news_bleu_score": 4.741384672804814, "eval_ag_news_bleu_score_sem": 0.15837203198791805, "eval_ag_news_emb_cos_sim": 0.8121864199638367, "eval_ag_news_emb_cos_sim_sem": 0.0046788633557932795, "eval_ag_news_emb_top1_equal": 0.27399998903274536, "eval_ag_news_emb_top1_equal_sem": 0.0199661026485885, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.63670015335083, "eval_ag_news_n_ngrams_match_1": 12.424, "eval_ag_news_n_ngrams_match_2": 2.622, "eval_ag_news_n_ngrams_match_3": 0.692, "eval_ag_news_num_pred_words": 34.722, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.966346668463544, "eval_ag_news_pred_num_tokens": 52.328125, "eval_ag_news_rouge_score": 0.3665288936984439, "eval_ag_news_runtime": 7.2875, "eval_ag_news_samples_per_second": 68.611, "eval_ag_news_steps_per_second": 0.137, "eval_ag_news_token_set_f1": 0.3479838698728074, "eval_ag_news_token_set_f1_sem": 0.0048582863561366565, "eval_ag_news_token_set_precision": 0.3123052399394728, "eval_ag_news_token_set_recall": 0.4094756649000542, "eval_ag_news_true_num_tokens": 56.09375, "step": 1860 }, { "epoch": 8.19, "eval_anthropic_toxic_prompts_accuracy": 0.10640625, "eval_anthropic_toxic_prompts_bleu_score": 4.9816386277532265, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18112198763560855, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7021182179450989, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004715369424462663, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01632805076118194, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0633704662323, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.938, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.822, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.692, "eval_anthropic_toxic_prompts_num_pred_words": 29.768, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 21.399562250453446, "eval_anthropic_toxic_prompts_pred_num_tokens": 43.046875, "eval_anthropic_toxic_prompts_rouge_score": 0.2986432883196827, "eval_anthropic_toxic_prompts_runtime": 7.076, "eval_anthropic_toxic_prompts_samples_per_second": 70.661, "eval_anthropic_toxic_prompts_steps_per_second": 0.141, "eval_anthropic_toxic_prompts_token_set_f1": 0.3476961677437007, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005938682229453897, "eval_anthropic_toxic_prompts_token_set_precision": 0.4367535744857751, "eval_anthropic_toxic_prompts_token_set_recall": 0.31075569132235875, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1860 }, { "epoch": 8.19, "eval_arxiv_accuracy": 0.41928125, "eval_arxiv_bleu_score": 4.424283023632561, "eval_arxiv_bleu_score_sem": 0.11617275039162875, "eval_arxiv_emb_cos_sim": 0.7651264071464539, "eval_arxiv_emb_cos_sim_sem": 0.004366971074856047, "eval_arxiv_emb_top1_equal": 0.23000000417232513, "eval_arxiv_emb_top1_equal_sem": 0.018839050665941787, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0793185234069824, "eval_arxiv_n_ngrams_match_1": 15.038, "eval_arxiv_n_ngrams_match_2": 2.916, "eval_arxiv_n_ngrams_match_3": 0.668, "eval_arxiv_num_pred_words": 34.756, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.74357960552871, "eval_arxiv_pred_num_tokens": 55.7734375, "eval_arxiv_rouge_score": 0.3787988872723498, "eval_arxiv_runtime": 9.7779, "eval_arxiv_samples_per_second": 51.136, "eval_arxiv_steps_per_second": 0.102, "eval_arxiv_token_set_f1": 0.3769822097015867, "eval_arxiv_token_set_f1_sem": 0.004083903196924782, "eval_arxiv_token_set_precision": 0.3253013010979465, "eval_arxiv_token_set_recall": 0.4601533774146747, "eval_arxiv_true_num_tokens": 64.0, "step": 1860 }, { "epoch": 8.19, "eval_python_code_alpaca_accuracy": 0.15565625, "eval_python_code_alpaca_bleu_score": 6.6369636954019295, "eval_python_code_alpaca_bleu_score_sem": 0.20906494831471414, "eval_python_code_alpaca_emb_cos_sim": 0.7860262989997864, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037672392281477584, "eval_python_code_alpaca_emb_top1_equal": 0.18199999630451202, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017272772986938162, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.638681173324585, "eval_python_code_alpaca_n_ngrams_match_1": 9.666, "eval_python_code_alpaca_n_ngrams_match_2": 2.728, "eval_python_code_alpaca_n_ngrams_match_3": 0.888, "eval_python_code_alpaca_num_pred_words": 29.712, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.994734802261277, "eval_python_code_alpaca_pred_num_tokens": 47.40625, "eval_python_code_alpaca_rouge_score": 0.4400602798695769, "eval_python_code_alpaca_runtime": 7.9726, "eval_python_code_alpaca_samples_per_second": 62.715, "eval_python_code_alpaca_steps_per_second": 0.125, "eval_python_code_alpaca_token_set_f1": 0.4884306011296119, "eval_python_code_alpaca_token_set_f1_sem": 0.0055162166038522165, "eval_python_code_alpaca_token_set_precision": 0.5446404584245398, "eval_python_code_alpaca_token_set_recall": 0.4568444762878828, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1860 }, { "epoch": 8.19, "eval_wikibio_accuracy": 0.35609375, "eval_wikibio_bleu_score": 5.359000016975188, "eval_wikibio_bleu_score_sem": 0.19773247550237727, "eval_wikibio_emb_cos_sim": 0.7486798167228699, "eval_wikibio_emb_cos_sim_sem": 0.005239392212273522, "eval_wikibio_emb_top1_equal": 0.20000000298023224, "eval_wikibio_emb_top1_equal_sem": 0.017906459589198134, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4573657512664795, "eval_wikibio_n_ngrams_match_1": 8.894, "eval_wikibio_n_ngrams_match_2": 2.644, "eval_wikibio_n_ngrams_match_3": 0.95, "eval_wikibio_num_pred_words": 31.156, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 31.733272980946744, "eval_wikibio_pred_num_tokens": 58.828125, "eval_wikibio_rouge_score": 0.33704755730063046, "eval_wikibio_runtime": 8.2679, "eval_wikibio_samples_per_second": 60.475, "eval_wikibio_steps_per_second": 0.121, "eval_wikibio_token_set_f1": 0.29998801830909244, "eval_wikibio_token_set_f1_sem": 0.005805959306225305, "eval_wikibio_token_set_precision": 0.2915927252931224, "eval_wikibio_token_set_recall": 0.3265489991928534, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1860 }, { "epoch": 8.19, "eval_bias-bios_accuracy": 0.523875, "eval_bias-bios_bleu_score": 19.84644091800536, "eval_bias-bios_bleu_score_sem": 0.8744952223323816, "eval_bias-bios_emb_cos_sim": 0.8809575438499451, "eval_bias-bios_emb_cos_sim_sem": 0.0029165990476574, "eval_bias-bios_emb_top1_equal": 0.3440000116825104, "eval_bias-bios_emb_top1_equal_sem": 0.021265758943789875, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7271181344985962, "eval_bias-bios_n_ngrams_match_1": 21.742, "eval_bias-bios_n_ngrams_match_2": 9.928, "eval_bias-bios_n_ngrams_match_3": 5.668, "eval_bias-bios_num_pred_words": 38.868, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.624421703274523, "eval_bias-bios_pred_num_tokens": 52.421875, "eval_bias-bios_rouge_score": 0.5528489807404857, "eval_bias-bios_runtime": 13.0908, "eval_bias-bios_samples_per_second": 38.195, "eval_bias-bios_steps_per_second": 0.076, "eval_bias-bios_token_set_f1": 0.5651409833769688, "eval_bias-bios_token_set_f1_sem": 0.0069194692707168675, "eval_bias-bios_token_set_precision": 0.5349813822732767, "eval_bias-bios_token_set_recall": 0.6088791750502968, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1860 }, { "epoch": 8.25, "learning_rate": 0.001, "loss": 1.7665, "step": 1872 }, { "epoch": 8.3, "learning_rate": 0.001, "loss": 1.9331, "step": 1884 }, { "epoch": 8.33, "eval_ag_news_accuracy": 0.3009375, "eval_ag_news_bleu_score": 4.783602500552586, "eval_ag_news_bleu_score_sem": 0.15494989020321834, "eval_ag_news_emb_cos_sim": 0.8162067532539368, "eval_ag_news_emb_cos_sim_sem": 0.004301302001182929, "eval_ag_news_emb_top1_equal": 0.2639999985694885, "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5908281803131104, "eval_ag_news_n_ngrams_match_1": 13.826, "eval_ag_news_n_ngrams_match_2": 2.882, "eval_ag_news_n_ngrams_match_3": 0.814, "eval_ag_news_num_pred_words": 43.092, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.264096704415266, "eval_ag_news_pred_num_tokens": 62.875, "eval_ag_news_rouge_score": 0.3612734905729005, "eval_ag_news_runtime": 8.5439, "eval_ag_news_samples_per_second": 58.521, "eval_ag_news_steps_per_second": 0.117, "eval_ag_news_token_set_f1": 0.35453410878160624, "eval_ag_news_token_set_f1_sem": 0.004645656214615403, "eval_ag_news_token_set_precision": 0.33472180012828634, "eval_ag_news_token_set_recall": 0.39579148585435375, "eval_ag_news_true_num_tokens": 56.09375, "step": 1891 }, { "epoch": 8.33, "eval_anthropic_toxic_prompts_accuracy": 0.1051875, "eval_anthropic_toxic_prompts_bleu_score": 3.494749302388673, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12794921935087805, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.694394052028656, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004276470264197607, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2136523723602295, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.266, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.974, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746, "eval_anthropic_toxic_prompts_num_pred_words": 43.598, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.869754149947237, "eval_anthropic_toxic_prompts_pred_num_tokens": 61.5546875, "eval_anthropic_toxic_prompts_rouge_score": 0.23461670239543642, "eval_anthropic_toxic_prompts_runtime": 8.6056, "eval_anthropic_toxic_prompts_samples_per_second": 58.102, "eval_anthropic_toxic_prompts_steps_per_second": 0.116, "eval_anthropic_toxic_prompts_token_set_f1": 0.33095488094061143, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0053161893127592836, "eval_anthropic_toxic_prompts_token_set_precision": 0.45590834800070323, "eval_anthropic_toxic_prompts_token_set_recall": 0.28048851738633934, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1891 }, { "epoch": 8.33, "eval_arxiv_accuracy": 0.42571875, "eval_arxiv_bleu_score": 4.591204061401259, "eval_arxiv_bleu_score_sem": 0.1265891653412925, "eval_arxiv_emb_cos_sim": 0.7642773985862732, "eval_arxiv_emb_cos_sim_sem": 0.004905883908455568, "eval_arxiv_emb_top1_equal": 0.29600000381469727, "eval_arxiv_emb_top1_equal_sem": 0.020435341676588347, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0380170345306396, "eval_arxiv_n_ngrams_match_1": 15.618, "eval_arxiv_n_ngrams_match_2": 3.012, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 39.924, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.863829933772514, "eval_arxiv_pred_num_tokens": 62.9453125, "eval_arxiv_rouge_score": 0.3659808540317445, "eval_arxiv_runtime": 9.2214, "eval_arxiv_samples_per_second": 54.222, "eval_arxiv_steps_per_second": 0.108, "eval_arxiv_token_set_f1": 0.3706836572969632, "eval_arxiv_token_set_f1_sem": 0.004433309146267008, "eval_arxiv_token_set_precision": 0.32536084685593664, "eval_arxiv_token_set_recall": 0.44598999972836184, "eval_arxiv_true_num_tokens": 64.0, "step": 1891 }, { "epoch": 8.33, "eval_python_code_alpaca_accuracy": 0.1533125, "eval_python_code_alpaca_bleu_score": 5.021076645768737, "eval_python_code_alpaca_bleu_score_sem": 0.15326960251605942, "eval_python_code_alpaca_emb_cos_sim": 0.7768791317939758, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0033942160588140272, "eval_python_code_alpaca_emb_top1_equal": 0.17399999499320984, "eval_python_code_alpaca_emb_top1_equal_sem": 0.016971269551723376, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7844736576080322, "eval_python_code_alpaca_n_ngrams_match_1": 9.976, "eval_python_code_alpaca_n_ngrams_match_2": 2.918, "eval_python_code_alpaca_n_ngrams_match_3": 0.996, "eval_python_code_alpaca_num_pred_words": 40.982, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.191293469447224, "eval_python_code_alpaca_pred_num_tokens": 62.4140625, "eval_python_code_alpaca_rouge_score": 0.37388306065756094, "eval_python_code_alpaca_runtime": 8.516, "eval_python_code_alpaca_samples_per_second": 58.713, "eval_python_code_alpaca_steps_per_second": 0.117, "eval_python_code_alpaca_token_set_f1": 0.45995671587715675, "eval_python_code_alpaca_token_set_f1_sem": 0.004843210631367747, "eval_python_code_alpaca_token_set_precision": 0.5544483613954407, "eval_python_code_alpaca_token_set_recall": 0.4095854452444462, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1891 }, { "epoch": 8.33, "eval_wikibio_accuracy": 0.36975, "eval_wikibio_bleu_score": 4.851922607844777, "eval_wikibio_bleu_score_sem": 0.1873093904634118, "eval_wikibio_emb_cos_sim": 0.7219175100326538, "eval_wikibio_emb_cos_sim_sem": 0.006202877601707938, "eval_wikibio_emb_top1_equal": 0.16599999368190765, "eval_wikibio_emb_top1_equal_sem": 0.016656615375209204, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3165245056152344, "eval_wikibio_n_ngrams_match_1": 8.46, "eval_wikibio_n_ngrams_match_2": 2.57, "eval_wikibio_n_ngrams_match_3": 0.95, "eval_wikibio_num_pred_words": 32.034, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.564384027638354, "eval_wikibio_pred_num_tokens": 62.9609375, "eval_wikibio_rouge_score": 0.3016254823114396, "eval_wikibio_runtime": 8.6373, "eval_wikibio_samples_per_second": 57.888, "eval_wikibio_steps_per_second": 0.116, "eval_wikibio_token_set_f1": 0.27968945656360833, "eval_wikibio_token_set_f1_sem": 0.006463263382407512, "eval_wikibio_token_set_precision": 0.27151554578125564, "eval_wikibio_token_set_recall": 0.3125748784752346, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1891 }, { "epoch": 8.33, "eval_bias-bios_accuracy": 0.51821875, "eval_bias-bios_bleu_score": 18.91106026065125, "eval_bias-bios_bleu_score_sem": 0.8012562185192902, "eval_bias-bios_emb_cos_sim": 0.8836072683334351, "eval_bias-bios_emb_cos_sim_sem": 0.0027063216331050595, "eval_bias-bios_emb_top1_equal": 0.34599998593330383, "eval_bias-bios_emb_top1_equal_sem": 0.021294949937601483, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7455099821090698, "eval_bias-bios_n_ngrams_match_1": 23.232, "eval_bias-bios_n_ngrams_match_2": 10.734, "eval_bias-bios_n_ngrams_match_3": 6.094, "eval_bias-bios_num_pred_words": 46.834, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.728822327459904, "eval_bias-bios_pred_num_tokens": 61.984375, "eval_bias-bios_rouge_score": 0.5344024868808166, "eval_bias-bios_runtime": 8.4511, "eval_bias-bios_samples_per_second": 59.164, "eval_bias-bios_steps_per_second": 0.118, "eval_bias-bios_token_set_f1": 0.5663459254895272, "eval_bias-bios_token_set_f1_sem": 0.006602976981687319, "eval_bias-bios_token_set_precision": 0.5589301696435766, "eval_bias-bios_token_set_recall": 0.5836912180039152, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1891 }, { "epoch": 8.35, "learning_rate": 0.001, "loss": 1.9073, "step": 1896 }, { "epoch": 8.41, "learning_rate": 0.001, "loss": 1.688, "step": 1908 }, { "epoch": 8.46, "learning_rate": 0.001, "loss": 1.6889, "step": 1920 }, { "epoch": 8.47, "eval_ag_news_accuracy": 0.3001875, "eval_ag_news_bleu_score": 4.435449766158028, "eval_ag_news_bleu_score_sem": 0.14973093498983295, "eval_ag_news_emb_cos_sim": 0.8028610944747925, "eval_ag_news_emb_cos_sim_sem": 0.004656717211563431, "eval_ag_news_emb_top1_equal": 0.25600001215934753, "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6249423027038574, "eval_ag_news_n_ngrams_match_1": 11.832, "eval_ag_news_n_ngrams_match_2": 2.458, "eval_ag_news_n_ngrams_match_3": 0.65, "eval_ag_news_num_pred_words": 32.41, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.52255814699448, "eval_ag_news_pred_num_tokens": 48.4765625, "eval_ag_news_rouge_score": 0.35709356513331103, "eval_ag_news_runtime": 8.655, "eval_ag_news_samples_per_second": 57.77, "eval_ag_news_steps_per_second": 0.116, "eval_ag_news_token_set_f1": 0.3414618889785419, "eval_ag_news_token_set_f1_sem": 0.004810199967445401, "eval_ag_news_token_set_precision": 0.29624824561399704, "eval_ag_news_token_set_recall": 0.4208496232263841, "eval_ag_news_true_num_tokens": 56.09375, "step": 1922 }, { "epoch": 8.47, "eval_anthropic_toxic_prompts_accuracy": 0.10909375, "eval_anthropic_toxic_prompts_bleu_score": 5.894206921427712, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.22053411846492368, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7000393271446228, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004913296610940808, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01580720436986462, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.011507511138916, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.452, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.652, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.592, "eval_anthropic_toxic_prompts_num_pred_words": 23.66, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.31800647066561, "eval_anthropic_toxic_prompts_pred_num_tokens": 33.6484375, "eval_anthropic_toxic_prompts_rouge_score": 0.32193702696635396, "eval_anthropic_toxic_prompts_runtime": 6.9678, "eval_anthropic_toxic_prompts_samples_per_second": 71.758, "eval_anthropic_toxic_prompts_steps_per_second": 0.144, "eval_anthropic_toxic_prompts_token_set_f1": 0.35768379223275326, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005947411729849731, "eval_anthropic_toxic_prompts_token_set_precision": 0.409905716617632, "eval_anthropic_toxic_prompts_token_set_recall": 0.34156957135932925, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1922 }, { "epoch": 8.47, "eval_arxiv_accuracy": 0.4183125, "eval_arxiv_bleu_score": 3.843480783594084, "eval_arxiv_bleu_score_sem": 0.11168850679535348, "eval_arxiv_emb_cos_sim": 0.7463698983192444, "eval_arxiv_emb_cos_sim_sem": 0.005271770598181763, "eval_arxiv_emb_top1_equal": 0.21199999749660492, "eval_arxiv_emb_top1_equal_sem": 0.01829703673906991, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0738461017608643, "eval_arxiv_n_ngrams_match_1": 13.604, "eval_arxiv_n_ngrams_match_2": 2.6, "eval_arxiv_n_ngrams_match_3": 0.596, "eval_arxiv_num_pred_words": 31.144, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.624914558559464, "eval_arxiv_pred_num_tokens": 50.8828125, "eval_arxiv_rouge_score": 0.3602952189935648, "eval_arxiv_runtime": 7.506, "eval_arxiv_samples_per_second": 66.614, "eval_arxiv_steps_per_second": 0.133, "eval_arxiv_token_set_f1": 0.3620667627381011, "eval_arxiv_token_set_f1_sem": 0.004667772539252971, "eval_arxiv_token_set_precision": 0.29690661507058724, "eval_arxiv_token_set_recall": 0.47944050208988725, "eval_arxiv_true_num_tokens": 64.0, "step": 1922 }, { "epoch": 8.47, "eval_python_code_alpaca_accuracy": 0.164125, "eval_python_code_alpaca_bleu_score": 8.149238938682629, "eval_python_code_alpaca_bleu_score_sem": 0.27088847698794644, "eval_python_code_alpaca_emb_cos_sim": 0.7918772101402283, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003844238771008207, "eval_python_code_alpaca_emb_top1_equal": 0.18799999356269836, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017490679184236527, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.4667320251464844, "eval_python_code_alpaca_n_ngrams_match_1": 8.97, "eval_python_code_alpaca_n_ngrams_match_2": 2.46, "eval_python_code_alpaca_n_ngrams_match_3": 0.804, "eval_python_code_alpaca_num_pred_words": 22.936, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.78387445376964, "eval_python_code_alpaca_pred_num_tokens": 35.703125, "eval_python_code_alpaca_rouge_score": 0.4751503363768772, "eval_python_code_alpaca_runtime": 7.1271, "eval_python_code_alpaca_samples_per_second": 70.155, "eval_python_code_alpaca_steps_per_second": 0.14, "eval_python_code_alpaca_token_set_f1": 0.5075156538927706, "eval_python_code_alpaca_token_set_f1_sem": 0.005494481157661636, "eval_python_code_alpaca_token_set_precision": 0.5162122177661042, "eval_python_code_alpaca_token_set_recall": 0.5168435949387356, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1922 }, { "epoch": 8.47, "eval_wikibio_accuracy": 0.36375, "eval_wikibio_bleu_score": 5.502713975837069, "eval_wikibio_bleu_score_sem": 0.20994709566725045, "eval_wikibio_emb_cos_sim": 0.733432412147522, "eval_wikibio_emb_cos_sim_sem": 0.006024464945119351, "eval_wikibio_emb_top1_equal": 0.20600000023841858, "eval_wikibio_emb_top1_equal_sem": 0.0181047949457911, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3645694255828857, "eval_wikibio_n_ngrams_match_1": 8.712, "eval_wikibio_n_ngrams_match_2": 2.632, "eval_wikibio_n_ngrams_match_3": 0.954, "eval_wikibio_num_pred_words": 30.35, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.9210419569838, "eval_wikibio_pred_num_tokens": 55.71875, "eval_wikibio_rouge_score": 0.3255887283809268, "eval_wikibio_runtime": 7.2562, "eval_wikibio_samples_per_second": 68.906, "eval_wikibio_steps_per_second": 0.138, "eval_wikibio_token_set_f1": 0.2944355952466261, "eval_wikibio_token_set_f1_sem": 0.006328439981856485, "eval_wikibio_token_set_precision": 0.28416654885615683, "eval_wikibio_token_set_recall": 0.32345250615831, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1922 }, { "epoch": 8.47, "eval_bias-bios_accuracy": 0.514625, "eval_bias-bios_bleu_score": 18.49435751172742, "eval_bias-bios_bleu_score_sem": 0.8705296846533569, "eval_bias-bios_emb_cos_sim": 0.8680934309959412, "eval_bias-bios_emb_cos_sim_sem": 0.0035307687876271567, "eval_bias-bios_emb_top1_equal": 0.32199999690055847, "eval_bias-bios_emb_top1_equal_sem": 0.02091666653838802, "eval_bias-bios_exact_match": 0.006, "eval_bias-bios_exact_match_sem": 0.003457152557758369, "eval_bias-bios_loss": 1.7615852355957031, "eval_bias-bios_n_ngrams_match_1": 19.848, "eval_bias-bios_n_ngrams_match_2": 9.216, "eval_bias-bios_n_ngrams_match_3": 5.312, "eval_bias-bios_num_pred_words": 32.898, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.821658784164547, "eval_bias-bios_pred_num_tokens": 44.765625, "eval_bias-bios_rouge_score": 0.5446137362717307, "eval_bias-bios_runtime": 7.3911, "eval_bias-bios_samples_per_second": 67.649, "eval_bias-bios_steps_per_second": 0.135, "eval_bias-bios_token_set_f1": 0.5594082568477496, "eval_bias-bios_token_set_f1_sem": 0.007063404257871856, "eval_bias-bios_token_set_precision": 0.5013370129474001, "eval_bias-bios_token_set_recall": 0.6512309406286945, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1922 }, { "epoch": 8.51, "learning_rate": 0.001, "loss": 1.9388, "step": 1932 }, { "epoch": 8.56, "learning_rate": 0.001, "loss": 1.9097, "step": 1944 }, { "epoch": 8.6, "eval_ag_news_accuracy": 0.3004375, "eval_ag_news_bleu_score": 4.6384322948771315, "eval_ag_news_bleu_score_sem": 0.15318901483073602, "eval_ag_news_emb_cos_sim": 0.8059445023536682, "eval_ag_news_emb_cos_sim_sem": 0.005206606322650648, "eval_ag_news_emb_top1_equal": 0.26600000262260437, "eval_ag_news_emb_top1_equal_sem": 0.01978055817719369, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5975077152252197, "eval_ag_news_n_ngrams_match_1": 13.012, "eval_ag_news_n_ngrams_match_2": 2.656, "eval_ag_news_n_ngrams_match_3": 0.694, "eval_ag_news_num_pred_words": 39.108, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.50713479148357, "eval_ag_news_pred_num_tokens": 59.328125, "eval_ag_news_rouge_score": 0.35917363408294, "eval_ag_news_runtime": 9.0027, "eval_ag_news_samples_per_second": 55.539, "eval_ag_news_steps_per_second": 0.111, "eval_ag_news_token_set_f1": 0.34629940683072663, "eval_ag_news_token_set_f1_sem": 0.004761672140250987, "eval_ag_news_token_set_precision": 0.31872649692720056, "eval_ag_news_token_set_recall": 0.3968739193028862, "eval_ag_news_true_num_tokens": 56.09375, "step": 1953 }, { "epoch": 8.6, "eval_anthropic_toxic_prompts_accuracy": 0.10528125, "eval_anthropic_toxic_prompts_bleu_score": 4.006997604254403, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14432842993601536, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6876260042190552, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004934488470102209, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.16200000047683716, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016494123019099097, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.146822452545166, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.988, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.858, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, "eval_anthropic_toxic_prompts_num_pred_words": 36.364, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.262030813524362, "eval_anthropic_toxic_prompts_pred_num_tokens": 53.046875, "eval_anthropic_toxic_prompts_rouge_score": 0.2569124557583795, "eval_anthropic_toxic_prompts_runtime": 8.5843, "eval_anthropic_toxic_prompts_samples_per_second": 58.246, "eval_anthropic_toxic_prompts_steps_per_second": 0.116, "eval_anthropic_toxic_prompts_token_set_f1": 0.3357822926624641, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005603619060244223, "eval_anthropic_toxic_prompts_token_set_precision": 0.43718307712451415, "eval_anthropic_toxic_prompts_token_set_recall": 0.29577861821149204, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1953 }, { "epoch": 8.6, "eval_arxiv_accuracy": 0.42340625, "eval_arxiv_bleu_score": 4.469706868664844, "eval_arxiv_bleu_score_sem": 0.12689372985136127, "eval_arxiv_emb_cos_sim": 0.7521007657051086, "eval_arxiv_emb_cos_sim_sem": 0.004824712034021417, "eval_arxiv_emb_top1_equal": 0.2280000001192093, "eval_arxiv_emb_top1_equal_sem": 0.018781307089698163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0552661418914795, "eval_arxiv_n_ngrams_match_1": 15.32, "eval_arxiv_n_ngrams_match_2": 2.954, "eval_arxiv_n_ngrams_match_3": 0.672, "eval_arxiv_num_pred_words": 37.658, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.226834124998554, "eval_arxiv_pred_num_tokens": 60.109375, "eval_arxiv_rouge_score": 0.3712890829953238, "eval_arxiv_runtime": 8.9036, "eval_arxiv_samples_per_second": 56.157, "eval_arxiv_steps_per_second": 0.112, "eval_arxiv_token_set_f1": 0.36978878421186867, "eval_arxiv_token_set_f1_sem": 0.004399200540521649, "eval_arxiv_token_set_precision": 0.3202345342560703, "eval_arxiv_token_set_recall": 0.45431508454570024, "eval_arxiv_true_num_tokens": 64.0, "step": 1953 }, { "epoch": 8.6, "eval_python_code_alpaca_accuracy": 0.15228125, "eval_python_code_alpaca_bleu_score": 5.802510448307412, "eval_python_code_alpaca_bleu_score_sem": 0.19450827928089162, "eval_python_code_alpaca_emb_cos_sim": 0.7649307250976562, "eval_python_code_alpaca_emb_cos_sim_sem": 0.004133352827137784, "eval_python_code_alpaca_emb_top1_equal": 0.16599999368190765, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01665661404240883, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7348744869232178, "eval_python_code_alpaca_n_ngrams_match_1": 9.686, "eval_python_code_alpaca_n_ngrams_match_2": 2.746, "eval_python_code_alpaca_n_ngrams_match_3": 0.916, "eval_python_code_alpaca_num_pred_words": 34.002, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.40780943299212, "eval_python_code_alpaca_pred_num_tokens": 53.484375, "eval_python_code_alpaca_rouge_score": 0.40480305406942174, "eval_python_code_alpaca_runtime": 11.2595, "eval_python_code_alpaca_samples_per_second": 44.407, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.4745207619123493, "eval_python_code_alpaca_token_set_f1_sem": 0.005881419094198025, "eval_python_code_alpaca_token_set_precision": 0.5407649612484423, "eval_python_code_alpaca_token_set_recall": 0.43923169222918024, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1953 }, { "epoch": 8.6, "eval_wikibio_accuracy": 0.35765625, "eval_wikibio_bleu_score": 4.715955204214835, "eval_wikibio_bleu_score_sem": 0.19740124186338762, "eval_wikibio_emb_cos_sim": 0.7062340378761292, "eval_wikibio_emb_cos_sim_sem": 0.006724205133960606, "eval_wikibio_emb_top1_equal": 0.17000000178813934, "eval_wikibio_emb_top1_equal_sem": 0.016815633120741882, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3458058834075928, "eval_wikibio_n_ngrams_match_1": 7.826, "eval_wikibio_n_ngrams_match_2": 2.344, "eval_wikibio_n_ngrams_match_3": 0.886, "eval_wikibio_num_pred_words": 29.11, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.383440195847996, "eval_wikibio_pred_num_tokens": 61.578125, "eval_wikibio_rouge_score": 0.2898479176930867, "eval_wikibio_runtime": 8.5106, "eval_wikibio_samples_per_second": 58.751, "eval_wikibio_steps_per_second": 0.118, "eval_wikibio_token_set_f1": 0.26292399366093444, "eval_wikibio_token_set_f1_sem": 0.0069327366813689865, "eval_wikibio_token_set_precision": 0.25331391833809414, "eval_wikibio_token_set_recall": 0.29977228313329735, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1953 }, { "epoch": 8.6, "eval_bias-bios_accuracy": 0.5214375, "eval_bias-bios_bleu_score": 19.096584666724308, "eval_bias-bios_bleu_score_sem": 0.8134426930137699, "eval_bias-bios_emb_cos_sim": 0.8831885457038879, "eval_bias-bios_emb_cos_sim_sem": 0.0026979201593416726, "eval_bias-bios_emb_top1_equal": 0.328000009059906, "eval_bias-bios_emb_top1_equal_sem": 0.02101702640661987, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.730255365371704, "eval_bias-bios_n_ngrams_match_1": 22.402, "eval_bias-bios_n_ngrams_match_2": 10.26, "eval_bias-bios_n_ngrams_match_3": 5.776, "eval_bias-bios_num_pred_words": 43.138, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.642094520043632, "eval_bias-bios_pred_num_tokens": 57.28125, "eval_bias-bios_rouge_score": 0.5385177520846558, "eval_bias-bios_runtime": 10.0512, "eval_bias-bios_samples_per_second": 49.745, "eval_bias-bios_steps_per_second": 0.099, "eval_bias-bios_token_set_f1": 0.561331969332709, "eval_bias-bios_token_set_f1_sem": 0.006692673176916008, "eval_bias-bios_token_set_precision": 0.5433084362152154, "eval_bias-bios_token_set_recall": 0.5896360367294446, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1953 }, { "epoch": 8.62, "learning_rate": 0.001, "loss": 1.7706, "step": 1956 }, { "epoch": 8.67, "learning_rate": 0.001, "loss": 1.6184, "step": 1968 }, { "epoch": 8.72, "learning_rate": 0.001, "loss": 1.9561, "step": 1980 }, { "epoch": 8.74, "eval_ag_news_accuracy": 0.2984375, "eval_ag_news_bleu_score": 4.711518363848454, "eval_ag_news_bleu_score_sem": 0.1532454630156928, "eval_ag_news_emb_cos_sim": 0.8103645443916321, "eval_ag_news_emb_cos_sim_sem": 0.004439122889552799, "eval_ag_news_emb_top1_equal": 0.2540000081062317, "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5887601375579834, "eval_ag_news_n_ngrams_match_1": 13.628, "eval_ag_news_n_ngrams_match_2": 2.846, "eval_ag_news_n_ngrams_match_3": 0.774, "eval_ag_news_num_pred_words": 42.722, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.18917849568472, "eval_ag_news_pred_num_tokens": 62.578125, "eval_ag_news_rouge_score": 0.3583171385544124, "eval_ag_news_runtime": 8.9128, "eval_ag_news_samples_per_second": 56.099, "eval_ag_news_steps_per_second": 0.112, "eval_ag_news_token_set_f1": 0.3518442756676556, "eval_ag_news_token_set_f1_sem": 0.004538612728904295, "eval_ag_news_token_set_precision": 0.33050553019565915, "eval_ag_news_token_set_recall": 0.39305225223561946, "eval_ag_news_true_num_tokens": 56.09375, "step": 1984 }, { "epoch": 8.74, "eval_anthropic_toxic_prompts_accuracy": 0.10534375, "eval_anthropic_toxic_prompts_bleu_score": 3.4700986196870534, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12543073827880039, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6862208843231201, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004841854845643796, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345323732734733, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1650032997131348, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.18, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.874, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.682, "eval_anthropic_toxic_prompts_num_pred_words": 41.556, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.688822199019885, "eval_anthropic_toxic_prompts_pred_num_tokens": 60.3515625, "eval_anthropic_toxic_prompts_rouge_score": 0.24235614608709247, "eval_anthropic_toxic_prompts_runtime": 13.8753, "eval_anthropic_toxic_prompts_samples_per_second": 36.035, "eval_anthropic_toxic_prompts_steps_per_second": 0.072, "eval_anthropic_toxic_prompts_token_set_f1": 0.324157927386135, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005188058466257896, "eval_anthropic_toxic_prompts_token_set_precision": 0.45317186235301055, "eval_anthropic_toxic_prompts_token_set_recall": 0.27235036851456745, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1984 }, { "epoch": 8.74, "eval_arxiv_accuracy": 0.4249375, "eval_arxiv_bleu_score": 4.5550681043846515, "eval_arxiv_bleu_score_sem": 0.12490632330266863, "eval_arxiv_emb_cos_sim": 0.7511861324310303, "eval_arxiv_emb_cos_sim_sem": 0.005204222942380189, "eval_arxiv_emb_top1_equal": 0.2720000147819519, "eval_arxiv_emb_top1_equal_sem": 0.019920483557355567, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0397071838378906, "eval_arxiv_n_ngrams_match_1": 15.462, "eval_arxiv_n_ngrams_match_2": 3.074, "eval_arxiv_n_ngrams_match_3": 0.686, "eval_arxiv_num_pred_words": 39.26, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.89912273813464, "eval_arxiv_pred_num_tokens": 62.1875, "eval_arxiv_rouge_score": 0.3649035759644474, "eval_arxiv_runtime": 9.0376, "eval_arxiv_samples_per_second": 55.324, "eval_arxiv_steps_per_second": 0.111, "eval_arxiv_token_set_f1": 0.37007363238592483, "eval_arxiv_token_set_f1_sem": 0.004455040383833329, "eval_arxiv_token_set_precision": 0.3223344413721305, "eval_arxiv_token_set_recall": 0.45127944355291727, "eval_arxiv_true_num_tokens": 64.0, "step": 1984 }, { "epoch": 8.74, "eval_python_code_alpaca_accuracy": 0.15215625, "eval_python_code_alpaca_bleu_score": 5.232190399641075, "eval_python_code_alpaca_bleu_score_sem": 0.17118683274174132, "eval_python_code_alpaca_emb_cos_sim": 0.7645835280418396, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038300940938292218, "eval_python_code_alpaca_emb_top1_equal": 0.1720000058412552, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01689386850274998, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7257423400878906, "eval_python_code_alpaca_n_ngrams_match_1": 9.856, "eval_python_code_alpaca_n_ngrams_match_2": 2.808, "eval_python_code_alpaca_n_ngrams_match_3": 0.948, "eval_python_code_alpaca_num_pred_words": 38.478, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.267743579221799, "eval_python_code_alpaca_pred_num_tokens": 59.984375, "eval_python_code_alpaca_rouge_score": 0.38401352446211545, "eval_python_code_alpaca_runtime": 8.9935, "eval_python_code_alpaca_samples_per_second": 55.596, "eval_python_code_alpaca_steps_per_second": 0.111, "eval_python_code_alpaca_token_set_f1": 0.4612661815110851, "eval_python_code_alpaca_token_set_f1_sem": 0.005052258479426189, "eval_python_code_alpaca_token_set_precision": 0.5531760300949725, "eval_python_code_alpaca_token_set_recall": 0.41410391470065155, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1984 }, { "epoch": 8.74, "eval_wikibio_accuracy": 0.36490625, "eval_wikibio_bleu_score": 5.001279621214367, "eval_wikibio_bleu_score_sem": 0.1826826365219488, "eval_wikibio_emb_cos_sim": 0.7376046776771545, "eval_wikibio_emb_cos_sim_sem": 0.005935431880873798, "eval_wikibio_emb_top1_equal": 0.1720000058412552, "eval_wikibio_emb_top1_equal_sem": 0.016893869835550357, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3209877014160156, "eval_wikibio_n_ngrams_match_1": 9.054, "eval_wikibio_n_ngrams_match_2": 2.696, "eval_wikibio_n_ngrams_match_3": 0.962, "eval_wikibio_num_pred_words": 33.432, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.687684222458053, "eval_wikibio_pred_num_tokens": 62.8203125, "eval_wikibio_rouge_score": 0.3192760141503558, "eval_wikibio_runtime": 9.256, "eval_wikibio_samples_per_second": 54.019, "eval_wikibio_steps_per_second": 0.108, "eval_wikibio_token_set_f1": 0.29227717249567686, "eval_wikibio_token_set_f1_sem": 0.006152932662747309, "eval_wikibio_token_set_precision": 0.2899212240419065, "eval_wikibio_token_set_recall": 0.31631044337697983, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1984 }, { "epoch": 8.74, "eval_bias-bios_accuracy": 0.51871875, "eval_bias-bios_bleu_score": 18.589985258771534, "eval_bias-bios_bleu_score_sem": 0.7979796988758581, "eval_bias-bios_emb_cos_sim": 0.87850421667099, "eval_bias-bios_emb_cos_sim_sem": 0.00308360126703616, "eval_bias-bios_emb_top1_equal": 0.36399999260902405, "eval_bias-bios_emb_top1_equal_sem": 0.021539170945502367, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7309093475341797, "eval_bias-bios_n_ngrams_match_1": 22.936, "eval_bias-bios_n_ngrams_match_2": 10.564, "eval_bias-bios_n_ngrams_match_3": 5.958, "eval_bias-bios_num_pred_words": 46.596, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.645785556023036, "eval_bias-bios_pred_num_tokens": 61.7265625, "eval_bias-bios_rouge_score": 0.5292722044570275, "eval_bias-bios_runtime": 8.9433, "eval_bias-bios_samples_per_second": 55.908, "eval_bias-bios_steps_per_second": 0.112, "eval_bias-bios_token_set_f1": 0.5576777941607378, "eval_bias-bios_token_set_f1_sem": 0.006638163225364711, "eval_bias-bios_token_set_precision": 0.5529639838978428, "eval_bias-bios_token_set_recall": 0.5721815762275477, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 1984 }, { "epoch": 8.78, "learning_rate": 0.001, "loss": 1.9147, "step": 1992 }, { "epoch": 8.83, "learning_rate": 0.001, "loss": 1.7913, "step": 2004 }, { "epoch": 8.88, "eval_ag_news_accuracy": 0.2978125, "eval_ag_news_bleu_score": 4.281196440555999, "eval_ag_news_bleu_score_sem": 0.14904247872139, "eval_ag_news_emb_cos_sim": 0.79570472240448, "eval_ag_news_emb_cos_sim_sem": 0.005238732809288028, "eval_ag_news_emb_top1_equal": 0.2879999876022339, "eval_ag_news_emb_top1_equal_sem": 0.020271503192099565, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.697861909866333, "eval_ag_news_n_ngrams_match_1": 11.452, "eval_ag_news_n_ngrams_match_2": 2.336, "eval_ag_news_n_ngrams_match_3": 0.616, "eval_ag_news_num_pred_words": 30.118, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.36091676282594, "eval_ag_news_pred_num_tokens": 46.9296875, "eval_ag_news_rouge_score": 0.35785020490253794, "eval_ag_news_runtime": 8.5687, "eval_ag_news_samples_per_second": 58.352, "eval_ag_news_steps_per_second": 0.117, "eval_ag_news_token_set_f1": 0.33766916509866857, "eval_ag_news_token_set_f1_sem": 0.00485462271797604, "eval_ag_news_token_set_precision": 0.29302357484649894, "eval_ag_news_token_set_recall": 0.4163851421960052, "eval_ag_news_true_num_tokens": 56.09375, "step": 2015 }, { "epoch": 8.88, "eval_anthropic_toxic_prompts_accuracy": 0.10834375, "eval_anthropic_toxic_prompts_bleu_score": 5.503328095222172, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.2081873652662082, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6974567770957947, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005060578715972177, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15399999916553497, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016158283980625493, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.032620668411255, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.684, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.742, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648, "eval_anthropic_toxic_prompts_num_pred_words": 25.888, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.751544308456207, "eval_anthropic_toxic_prompts_pred_num_tokens": 36.640625, "eval_anthropic_toxic_prompts_rouge_score": 0.31427257546174336, "eval_anthropic_toxic_prompts_runtime": 8.3584, "eval_anthropic_toxic_prompts_samples_per_second": 59.82, "eval_anthropic_toxic_prompts_steps_per_second": 0.12, "eval_anthropic_toxic_prompts_token_set_f1": 0.34967349844245615, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005804893266715575, "eval_anthropic_toxic_prompts_token_set_precision": 0.4261249444076511, "eval_anthropic_toxic_prompts_token_set_recall": 0.32047917547306215, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 2015 }, { "epoch": 8.88, "eval_arxiv_accuracy": 0.41240625, "eval_arxiv_bleu_score": 3.8906961921992056, "eval_arxiv_bleu_score_sem": 0.11585900533149257, "eval_arxiv_emb_cos_sim": 0.7511284947395325, "eval_arxiv_emb_cos_sim_sem": 0.004930338129734693, "eval_arxiv_emb_top1_equal": 0.20399999618530273, "eval_arxiv_emb_top1_equal_sem": 0.018039369108186407, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.1257741451263428, "eval_arxiv_n_ngrams_match_1": 13.67, "eval_arxiv_n_ngrams_match_2": 2.592, "eval_arxiv_n_ngrams_match_3": 0.59, "eval_arxiv_num_pred_words": 30.658, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.777521377159953, "eval_arxiv_pred_num_tokens": 50.5703125, "eval_arxiv_rouge_score": 0.3649159407180931, "eval_arxiv_runtime": 9.1547, "eval_arxiv_samples_per_second": 54.617, "eval_arxiv_steps_per_second": 0.109, "eval_arxiv_token_set_f1": 0.3608202993888602, "eval_arxiv_token_set_f1_sem": 0.004479796341346729, "eval_arxiv_token_set_precision": 0.30272902785643907, "eval_arxiv_token_set_recall": 0.46174145285100326, "eval_arxiv_true_num_tokens": 64.0, "step": 2015 }, { "epoch": 8.88, "eval_python_code_alpaca_accuracy": 0.15665625, "eval_python_code_alpaca_bleu_score": 7.371272378741536, "eval_python_code_alpaca_bleu_score_sem": 0.23850916870772282, "eval_python_code_alpaca_emb_cos_sim": 0.7900562882423401, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038222738876293495, "eval_python_code_alpaca_emb_top1_equal": 0.21400000154972076, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018359796975924752, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.597472906112671, "eval_python_code_alpaca_n_ngrams_match_1": 9.35, "eval_python_code_alpaca_n_ngrams_match_2": 2.518, "eval_python_code_alpaca_n_ngrams_match_3": 0.796, "eval_python_code_alpaca_num_pred_words": 25.446, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.429756859807119, "eval_python_code_alpaca_pred_num_tokens": 39.7109375, "eval_python_code_alpaca_rouge_score": 0.46722847470855877, "eval_python_code_alpaca_runtime": 8.8159, "eval_python_code_alpaca_samples_per_second": 56.715, "eval_python_code_alpaca_steps_per_second": 0.113, "eval_python_code_alpaca_token_set_f1": 0.49105008465089256, "eval_python_code_alpaca_token_set_f1_sem": 0.005520499061597566, "eval_python_code_alpaca_token_set_precision": 0.5310244980548657, "eval_python_code_alpaca_token_set_recall": 0.47358586937672026, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 2015 }, { "epoch": 8.88, "eval_wikibio_accuracy": 0.35096875, "eval_wikibio_bleu_score": 5.4778327942374, "eval_wikibio_bleu_score_sem": 0.21824048592552908, "eval_wikibio_emb_cos_sim": 0.729748547077179, "eval_wikibio_emb_cos_sim_sem": 0.006024370316292732, "eval_wikibio_emb_top1_equal": 0.16599999368190765, "eval_wikibio_emb_top1_equal_sem": 0.01665661670800958, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4867711067199707, "eval_wikibio_n_ngrams_match_1": 8.13, "eval_wikibio_n_ngrams_match_2": 2.482, "eval_wikibio_n_ngrams_match_3": 0.888, "eval_wikibio_num_pred_words": 27.738, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 32.68025610528006, "eval_wikibio_pred_num_tokens": 53.921875, "eval_wikibio_rouge_score": 0.3262775647584429, "eval_wikibio_runtime": 8.7759, "eval_wikibio_samples_per_second": 56.974, "eval_wikibio_steps_per_second": 0.114, "eval_wikibio_token_set_f1": 0.28553711803580656, "eval_wikibio_token_set_f1_sem": 0.006105552369558703, "eval_wikibio_token_set_precision": 0.2709517628431276, "eval_wikibio_token_set_recall": 0.3200790729630114, "eval_wikibio_true_num_tokens": 61.1328125, "step": 2015 }, { "epoch": 8.88, "eval_bias-bios_accuracy": 0.5211875, "eval_bias-bios_bleu_score": 19.690638801615393, "eval_bias-bios_bleu_score_sem": 0.8750117263932384, "eval_bias-bios_emb_cos_sim": 0.8750105500221252, "eval_bias-bios_emb_cos_sim_sem": 0.0031565084463454018, "eval_bias-bios_emb_top1_equal": 0.3140000104904175, "eval_bias-bios_emb_top1_equal_sem": 0.020776702507015268, "eval_bias-bios_exact_match": 0.004, "eval_bias-bios_exact_match_sem": 0.002825591608118863, "eval_bias-bios_loss": 1.7455610036849976, "eval_bias-bios_n_ngrams_match_1": 20.688, "eval_bias-bios_n_ngrams_match_2": 9.66, "eval_bias-bios_n_ngrams_match_3": 5.574, "eval_bias-bios_num_pred_words": 34.442, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.729114628460026, "eval_bias-bios_pred_num_tokens": 46.53125, "eval_bias-bios_rouge_score": 0.556540741298637, "eval_bias-bios_runtime": 8.9271, "eval_bias-bios_samples_per_second": 56.009, "eval_bias-bios_steps_per_second": 0.112, "eval_bias-bios_token_set_f1": 0.564996792506389, "eval_bias-bios_token_set_f1_sem": 0.006915302425054524, "eval_bias-bios_token_set_precision": 0.519362727301722, "eval_bias-bios_token_set_recall": 0.6305880276758675, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 2015 }, { "epoch": 8.88, "learning_rate": 0.001, "loss": 1.5533, "step": 2016 }, { "epoch": 8.93, "learning_rate": 0.001, "loss": 1.9502, "step": 2028 }, { "epoch": 8.99, "learning_rate": 0.001, "loss": 1.7723, "step": 2040 }, { "epoch": 9.01, "eval_ag_news_accuracy": 0.2991875, "eval_ag_news_bleu_score": 4.30447564205776, "eval_ag_news_bleu_score_sem": 0.1472288899842663, "eval_ag_news_emb_cos_sim": 0.7993799448013306, "eval_ag_news_emb_cos_sim_sem": 0.00450738725875599, "eval_ag_news_emb_top1_equal": 0.2639999985694885, "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6647632122039795, "eval_ag_news_n_ngrams_match_1": 11.316, "eval_ag_news_n_ngrams_match_2": 2.278, "eval_ag_news_n_ngrams_match_3": 0.614, "eval_ag_news_num_pred_words": 31.038, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.046889241518514, "eval_ag_news_pred_num_tokens": 45.71875, "eval_ag_news_rouge_score": 0.34739004258080053, "eval_ag_news_runtime": 8.7567, "eval_ag_news_samples_per_second": 57.099, "eval_ag_news_steps_per_second": 0.114, "eval_ag_news_token_set_f1": 0.3335748310879103, "eval_ag_news_token_set_f1_sem": 0.004691210695222084, "eval_ag_news_token_set_precision": 0.2911811558728018, "eval_ag_news_token_set_recall": 0.40806454549989735, "eval_ag_news_true_num_tokens": 56.09375, "step": 2046 }, { "epoch": 9.01, "eval_anthropic_toxic_prompts_accuracy": 0.111, "eval_anthropic_toxic_prompts_bleu_score": 5.890646812535753, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.22763323642991268, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6962231993675232, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005150838289762755, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1679999977350235, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016736554076096456, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.001394510269165, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.268, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.514, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.542, "eval_anthropic_toxic_prompts_num_pred_words": 22.254, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 20.11356594952904, "eval_anthropic_toxic_prompts_pred_num_tokens": 31.28125, "eval_anthropic_toxic_prompts_rouge_score": 0.3268142602299797, "eval_anthropic_toxic_prompts_runtime": 7.0964, "eval_anthropic_toxic_prompts_samples_per_second": 70.459, "eval_anthropic_toxic_prompts_steps_per_second": 0.141, "eval_anthropic_toxic_prompts_token_set_f1": 0.3395884278036088, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062451772266664705, "eval_anthropic_toxic_prompts_token_set_precision": 0.4012655734234844, "eval_anthropic_toxic_prompts_token_set_recall": 0.32073276005169205, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 2046 }, { "epoch": 9.01, "eval_arxiv_accuracy": 0.41378125, "eval_arxiv_bleu_score": 3.6087119904118774, "eval_arxiv_bleu_score_sem": 0.0994488717730809, "eval_arxiv_emb_cos_sim": 0.7521159052848816, "eval_arxiv_emb_cos_sim_sem": 0.004161717817917032, "eval_arxiv_emb_top1_equal": 0.20000000298023224, "eval_arxiv_emb_top1_equal_sem": 0.017906459589198134, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.1329307556152344, "eval_arxiv_n_ngrams_match_1": 13.584, "eval_arxiv_n_ngrams_match_2": 2.43, "eval_arxiv_n_ngrams_match_3": 0.492, "eval_arxiv_num_pred_words": 30.74, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.941115918528485, "eval_arxiv_pred_num_tokens": 49.3515625, "eval_arxiv_rouge_score": 0.36391134537924674, "eval_arxiv_runtime": 7.4784, "eval_arxiv_samples_per_second": 66.86, "eval_arxiv_steps_per_second": 0.134, "eval_arxiv_token_set_f1": 0.3633853973300188, "eval_arxiv_token_set_f1_sem": 0.004066752835657384, "eval_arxiv_token_set_precision": 0.30449794792905727, "eval_arxiv_token_set_recall": 0.4635120505169177, "eval_arxiv_true_num_tokens": 64.0, "step": 2046 }, { "epoch": 9.01, "eval_python_code_alpaca_accuracy": 0.1635625, "eval_python_code_alpaca_bleu_score": 7.749931235668791, "eval_python_code_alpaca_bleu_score_sem": 0.25897742129416856, "eval_python_code_alpaca_emb_cos_sim": 0.7957867980003357, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036585930067849024, "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346174301986407, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.4820594787597656, "eval_python_code_alpaca_n_ngrams_match_1": 8.966, "eval_python_code_alpaca_n_ngrams_match_2": 2.316, "eval_python_code_alpaca_n_ngrams_match_3": 0.736, "eval_python_code_alpaca_num_pred_words": 23.268, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 11.96588253983069, "eval_python_code_alpaca_pred_num_tokens": 36.1875, "eval_python_code_alpaca_rouge_score": 0.4742782948523391, "eval_python_code_alpaca_runtime": 8.4943, "eval_python_code_alpaca_samples_per_second": 58.863, "eval_python_code_alpaca_steps_per_second": 0.118, "eval_python_code_alpaca_token_set_f1": 0.4923373640218391, "eval_python_code_alpaca_token_set_f1_sem": 0.005667700385172624, "eval_python_code_alpaca_token_set_precision": 0.5204886057719729, "eval_python_code_alpaca_token_set_recall": 0.4850051527947253, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 2046 }, { "epoch": 9.01, "eval_wikibio_accuracy": 0.35090625, "eval_wikibio_bleu_score": 5.393127917759537, "eval_wikibio_bleu_score_sem": 0.1942594604313065, "eval_wikibio_emb_cos_sim": 0.7504561543464661, "eval_wikibio_emb_cos_sim_sem": 0.004878221969874094, "eval_wikibio_emb_top1_equal": 0.15399999916553497, "eval_wikibio_emb_top1_equal_sem": 0.016158283980625493, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.490276575088501, "eval_wikibio_n_ngrams_match_1": 9.23, "eval_wikibio_n_ngrams_match_2": 2.8, "eval_wikibio_n_ngrams_match_3": 0.996, "eval_wikibio_num_pred_words": 33.05, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 32.79501673669647, "eval_wikibio_pred_num_tokens": 59.5625, "eval_wikibio_rouge_score": 0.3395944756830169, "eval_wikibio_runtime": 9.6425, "eval_wikibio_samples_per_second": 51.854, "eval_wikibio_steps_per_second": 0.104, "eval_wikibio_token_set_f1": 0.30709651202586963, "eval_wikibio_token_set_f1_sem": 0.005262512950887301, "eval_wikibio_token_set_precision": 0.3053592113623277, "eval_wikibio_token_set_recall": 0.3229125854108825, "eval_wikibio_true_num_tokens": 61.1328125, "step": 2046 }, { "epoch": 9.01, "eval_bias-bios_accuracy": 0.51809375, "eval_bias-bios_bleu_score": 18.31660780382149, "eval_bias-bios_bleu_score_sem": 0.8767916065522813, "eval_bias-bios_emb_cos_sim": 0.8683634400367737, "eval_bias-bios_emb_cos_sim_sem": 0.003222134537206428, "eval_bias-bios_emb_top1_equal": 0.3400000035762787, "eval_bias-bios_emb_top1_equal_sem": 0.021206117459812355, "eval_bias-bios_exact_match": 0.004, "eval_bias-bios_exact_match_sem": 0.002825591608118863, "eval_bias-bios_loss": 1.7671860456466675, "eval_bias-bios_n_ngrams_match_1": 19.582, "eval_bias-bios_n_ngrams_match_2": 8.978, "eval_bias-bios_n_ngrams_match_3": 5.232, "eval_bias-bios_num_pred_words": 32.48, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.8543562699255185, "eval_bias-bios_pred_num_tokens": 44.546875, "eval_bias-bios_rouge_score": 0.5402457226413455, "eval_bias-bios_runtime": 10.5114, "eval_bias-bios_samples_per_second": 47.568, "eval_bias-bios_steps_per_second": 0.095, "eval_bias-bios_token_set_f1": 0.5531321383277782, "eval_bias-bios_token_set_f1_sem": 0.007013550276523037, "eval_bias-bios_token_set_precision": 0.501690836944203, "eval_bias-bios_token_set_recall": 0.6315241644945384, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 2046 }, { "epoch": 9.04, "learning_rate": 0.001, "loss": 1.8543, "step": 2052 }, { "epoch": 9.09, "learning_rate": 0.001, "loss": 1.8932, "step": 2064 }, { "epoch": 9.15, "learning_rate": 0.001, "loss": 1.8602, "step": 2076 }, { "epoch": 9.15, "eval_ag_news_accuracy": 0.29896875, "eval_ag_news_bleu_score": 4.784236208697559, "eval_ag_news_bleu_score_sem": 0.15460864144388536, "eval_ag_news_emb_cos_sim": 0.8137236833572388, "eval_ag_news_emb_cos_sim_sem": 0.004588369542336808, "eval_ag_news_emb_top1_equal": 0.28600001335144043, "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6021766662597656, "eval_ag_news_n_ngrams_match_1": 13.402, "eval_ag_news_n_ngrams_match_2": 2.744, "eval_ag_news_n_ngrams_match_3": 0.744, "eval_ag_news_num_pred_words": 41.766, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.6779833476461, "eval_ag_news_pred_num_tokens": 60.9921875, "eval_ag_news_rouge_score": 0.3599304555911331, "eval_ag_news_runtime": 7.6467, "eval_ag_news_samples_per_second": 65.388, "eval_ag_news_steps_per_second": 0.131, "eval_ag_news_token_set_f1": 0.34965679144064016, "eval_ag_news_token_set_f1_sem": 0.004684602734156572, "eval_ag_news_token_set_precision": 0.3302881209684665, "eval_ag_news_token_set_recall": 0.38649755707507244, "eval_ag_news_true_num_tokens": 56.09375, "step": 2077 }, { "epoch": 9.15, "eval_anthropic_toxic_prompts_accuracy": 0.1051875, "eval_anthropic_toxic_prompts_bleu_score": 3.964390298136568, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14222773439203992, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6894749999046326, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004599546073080835, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1599999964237213, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01641154137506837, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1385340690612793, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.052, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714, "eval_anthropic_toxic_prompts_num_pred_words": 37.216, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.070022997931947, "eval_anthropic_toxic_prompts_pred_num_tokens": 53.09375, "eval_anthropic_toxic_prompts_rouge_score": 0.2580413938262478, "eval_anthropic_toxic_prompts_runtime": 9.432, "eval_anthropic_toxic_prompts_samples_per_second": 53.011, "eval_anthropic_toxic_prompts_steps_per_second": 0.106, "eval_anthropic_toxic_prompts_token_set_f1": 0.337508001789652, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005664451580114572, "eval_anthropic_toxic_prompts_token_set_precision": 0.44401354128900383, "eval_anthropic_toxic_prompts_token_set_recall": 0.2945522601233305, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 2077 }, { "epoch": 9.15, "eval_arxiv_accuracy": 0.4218125, "eval_arxiv_bleu_score": 4.489080872238089, "eval_arxiv_bleu_score_sem": 0.11303639098293934, "eval_arxiv_emb_cos_sim": 0.758951723575592, "eval_arxiv_emb_cos_sim_sem": 0.004843240958033718, "eval_arxiv_emb_top1_equal": 0.2680000066757202, "eval_arxiv_emb_top1_equal_sem": 0.019827715320059287, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0513577461242676, "eval_arxiv_n_ngrams_match_1": 15.63, "eval_arxiv_n_ngrams_match_2": 3.016, "eval_arxiv_n_ngrams_match_3": 0.674, "eval_arxiv_num_pred_words": 39.66, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.1440331712043, "eval_arxiv_pred_num_tokens": 61.859375, "eval_arxiv_rouge_score": 0.3685153218719503, "eval_arxiv_runtime": 9.1478, "eval_arxiv_samples_per_second": 54.658, "eval_arxiv_steps_per_second": 0.109, "eval_arxiv_token_set_f1": 0.37061277220553285, "eval_arxiv_token_set_f1_sem": 0.004351648539318885, "eval_arxiv_token_set_precision": 0.32616954864681463, "eval_arxiv_token_set_recall": 0.44792024372594574, "eval_arxiv_true_num_tokens": 64.0, "step": 2077 }, { "epoch": 9.15, "eval_python_code_alpaca_accuracy": 0.15096875, "eval_python_code_alpaca_bleu_score": 5.555247370237761, "eval_python_code_alpaca_bleu_score_sem": 0.177653537319343, "eval_python_code_alpaca_emb_cos_sim": 0.7676288485527039, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038828899818810327, "eval_python_code_alpaca_emb_top1_equal": 0.19200000166893005, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01763218126724194, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.7803382873535156, "eval_python_code_alpaca_n_ngrams_match_1": 9.9, "eval_python_code_alpaca_n_ngrams_match_2": 2.82, "eval_python_code_alpaca_n_ngrams_match_3": 0.944, "eval_python_code_alpaca_num_pred_words": 36.2, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.124474731386275, "eval_python_code_alpaca_pred_num_tokens": 56.4921875, "eval_python_code_alpaca_rouge_score": 0.3968651941156845, "eval_python_code_alpaca_runtime": 9.8269, "eval_python_code_alpaca_samples_per_second": 50.881, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4758023187041246, "eval_python_code_alpaca_token_set_f1_sem": 0.005450858689322465, "eval_python_code_alpaca_token_set_precision": 0.5504250236468058, "eval_python_code_alpaca_token_set_recall": 0.4366312452197343, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 2077 }, { "epoch": 9.15, "eval_wikibio_accuracy": 0.36121875, "eval_wikibio_bleu_score": 4.974402129355878, "eval_wikibio_bleu_score_sem": 0.20370528008271044, "eval_wikibio_emb_cos_sim": 0.7261828780174255, "eval_wikibio_emb_cos_sim_sem": 0.005940349247857084, "eval_wikibio_emb_top1_equal": 0.15399999916553497, "eval_wikibio_emb_top1_equal_sem": 0.016158283980625493, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.302112340927124, "eval_wikibio_n_ngrams_match_1": 8.336, "eval_wikibio_n_ngrams_match_2": 2.586, "eval_wikibio_n_ngrams_match_3": 0.944, "eval_wikibio_num_pred_words": 30.934, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.169970588188452, "eval_wikibio_pred_num_tokens": 62.8515625, "eval_wikibio_rouge_score": 0.3019658582188841, "eval_wikibio_runtime": 10.5935, "eval_wikibio_samples_per_second": 47.199, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.27333695218102066, "eval_wikibio_token_set_f1_sem": 0.00662283612815362, "eval_wikibio_token_set_precision": 0.26494879182019215, "eval_wikibio_token_set_recall": 0.30625239575202884, "eval_wikibio_true_num_tokens": 61.1328125, "step": 2077 }, { "epoch": 9.15, "eval_bias-bios_accuracy": 0.52196875, "eval_bias-bios_bleu_score": 19.35869072751502, "eval_bias-bios_bleu_score_sem": 0.8266302646813208, "eval_bias-bios_emb_cos_sim": 0.8836058974266052, "eval_bias-bios_emb_cos_sim_sem": 0.0028305407938486794, "eval_bias-bios_emb_top1_equal": 0.33799999952316284, "eval_bias-bios_emb_top1_equal_sem": 0.02117566563684607, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7248351573944092, "eval_bias-bios_n_ngrams_match_1": 22.862, "eval_bias-bios_n_ngrams_match_2": 10.558, "eval_bias-bios_n_ngrams_match_3": 5.998, "eval_bias-bios_num_pred_words": 44.866, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.611595923353254, "eval_bias-bios_pred_num_tokens": 59.171875, "eval_bias-bios_rouge_score": 0.5393844436690122, "eval_bias-bios_runtime": 9.8175, "eval_bias-bios_samples_per_second": 50.93, "eval_bias-bios_steps_per_second": 0.102, "eval_bias-bios_token_set_f1": 0.5655314980286692, "eval_bias-bios_token_set_f1_sem": 0.00664708407861911, "eval_bias-bios_token_set_precision": 0.5518918963501629, "eval_bias-bios_token_set_recall": 0.5894865301239679, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 2077 }, { "epoch": 9.2, "learning_rate": 0.001, "loss": 1.5989, "step": 2088 }, { "epoch": 9.25, "learning_rate": 0.001, "loss": 1.7685, "step": 2100 }, { "epoch": 9.29, "eval_ag_news_accuracy": 0.2963125, "eval_ag_news_bleu_score": 4.742361383468992, "eval_ag_news_bleu_score_sem": 0.16001156054221022, "eval_ag_news_emb_cos_sim": 0.8078240156173706, "eval_ag_news_emb_cos_sim_sem": 0.004800064223487835, "eval_ag_news_emb_top1_equal": 0.2680000066757202, "eval_ag_news_emb_top1_equal_sem": 0.019827715320059287, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.606048822402954, "eval_ag_news_n_ngrams_match_1": 13.284, "eval_ag_news_n_ngrams_match_2": 2.808, "eval_ag_news_n_ngrams_match_3": 0.822, "eval_ag_news_num_pred_words": 42.118, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.82028154881562, "eval_ag_news_pred_num_tokens": 62.4296875, "eval_ag_news_rouge_score": 0.3538727301681347, "eval_ag_news_runtime": 10.1283, "eval_ag_news_samples_per_second": 49.366, "eval_ag_news_steps_per_second": 0.099, "eval_ag_news_token_set_f1": 0.3470712549807732, "eval_ag_news_token_set_f1_sem": 0.004668509288448099, "eval_ag_news_token_set_precision": 0.32390128066439394, "eval_ag_news_token_set_recall": 0.3932473776896412, "eval_ag_news_true_num_tokens": 56.09375, "step": 2108 }, { "epoch": 9.29, "eval_anthropic_toxic_prompts_accuracy": 0.104625, "eval_anthropic_toxic_prompts_bleu_score": 3.816677653825337, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14619119461894714, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6866650581359863, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004563866340643905, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055010489467818, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0957064628601074, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.12, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.908, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, "eval_anthropic_toxic_prompts_num_pred_words": 40.54, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.102847864310338, "eval_anthropic_toxic_prompts_pred_num_tokens": 57.0703125, "eval_anthropic_toxic_prompts_rouge_score": 0.24599181752231808, "eval_anthropic_toxic_prompts_runtime": 9.553, "eval_anthropic_toxic_prompts_samples_per_second": 52.339, "eval_anthropic_toxic_prompts_steps_per_second": 0.105, "eval_anthropic_toxic_prompts_token_set_f1": 0.3365827504338421, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005591265270104003, "eval_anthropic_toxic_prompts_token_set_precision": 0.4504800028610584, "eval_anthropic_toxic_prompts_token_set_recall": 0.29152372627313916, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 2108 }, { "epoch": 9.29, "eval_arxiv_accuracy": 0.42315625, "eval_arxiv_bleu_score": 4.544794268192885, "eval_arxiv_bleu_score_sem": 0.12591766464787646, "eval_arxiv_emb_cos_sim": 0.7467482089996338, "eval_arxiv_emb_cos_sim_sem": 0.005067803826804646, "eval_arxiv_emb_top1_equal": 0.2639999985694885, "eval_arxiv_emb_top1_equal_sem": 0.019732885240582997, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0610029697418213, "eval_arxiv_n_ngrams_match_1": 15.25, "eval_arxiv_n_ngrams_match_2": 2.944, "eval_arxiv_n_ngrams_match_3": 0.72, "eval_arxiv_num_pred_words": 39.27, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.3489587873246, "eval_arxiv_pred_num_tokens": 62.5234375, "eval_arxiv_rouge_score": 0.3583765104716006, "eval_arxiv_runtime": 9.2107, "eval_arxiv_samples_per_second": 54.285, "eval_arxiv_steps_per_second": 0.109, "eval_arxiv_token_set_f1": 0.36628400169248776, "eval_arxiv_token_set_f1_sem": 0.004341865017628588, "eval_arxiv_token_set_precision": 0.3198325113572577, "eval_arxiv_token_set_recall": 0.44349729752138284, "eval_arxiv_true_num_tokens": 64.0, "step": 2108 }, { "epoch": 9.29, "eval_python_code_alpaca_accuracy": 0.1504375, "eval_python_code_alpaca_bleu_score": 5.339483500068607, "eval_python_code_alpaca_bleu_score_sem": 0.17147023537882844, "eval_python_code_alpaca_emb_cos_sim": 0.7684195637702942, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0035968480306156577, "eval_python_code_alpaca_emb_top1_equal": 0.16599999368190765, "eval_python_code_alpaca_emb_top1_equal_sem": 0.016656615375209204, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.650757074356079, "eval_python_code_alpaca_n_ngrams_match_1": 9.746, "eval_python_code_alpaca_n_ngrams_match_2": 2.816, "eval_python_code_alpaca_n_ngrams_match_3": 0.892, "eval_python_code_alpaca_num_pred_words": 37.3, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 14.164758362368579, "eval_python_code_alpaca_pred_num_tokens": 56.8671875, "eval_python_code_alpaca_rouge_score": 0.3932239250461642, "eval_python_code_alpaca_runtime": 9.2918, "eval_python_code_alpaca_samples_per_second": 53.811, "eval_python_code_alpaca_steps_per_second": 0.108, "eval_python_code_alpaca_token_set_f1": 0.4701602719431107, "eval_python_code_alpaca_token_set_f1_sem": 0.00509986315936943, "eval_python_code_alpaca_token_set_precision": 0.5467301722042317, "eval_python_code_alpaca_token_set_recall": 0.4302604505118198, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 2108 }, { "epoch": 9.29, "eval_wikibio_accuracy": 0.36921875, "eval_wikibio_bleu_score": 4.925348408283992, "eval_wikibio_bleu_score_sem": 0.18491607742008886, "eval_wikibio_emb_cos_sim": 0.7267881035804749, "eval_wikibio_emb_cos_sim_sem": 0.006228084188798708, "eval_wikibio_emb_top1_equal": 0.17599999904632568, "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.313004732131958, "eval_wikibio_n_ngrams_match_1": 8.734, "eval_wikibio_n_ngrams_match_2": 2.648, "eval_wikibio_n_ngrams_match_3": 0.934, "eval_wikibio_num_pred_words": 32.936, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.467534184397877, "eval_wikibio_pred_num_tokens": 62.8828125, "eval_wikibio_rouge_score": 0.3109781321220584, "eval_wikibio_runtime": 8.1563, "eval_wikibio_samples_per_second": 61.302, "eval_wikibio_steps_per_second": 0.123, "eval_wikibio_token_set_f1": 0.28620409585063805, "eval_wikibio_token_set_f1_sem": 0.006271836547776463, "eval_wikibio_token_set_precision": 0.27991496187706066, "eval_wikibio_token_set_recall": 0.31658886375293016, "eval_wikibio_true_num_tokens": 61.1328125, "step": 2108 }, { "epoch": 9.29, "eval_bias-bios_accuracy": 0.519375, "eval_bias-bios_bleu_score": 18.704773391130743, "eval_bias-bios_bleu_score_sem": 0.7916581464984886, "eval_bias-bios_emb_cos_sim": 0.8804818391799927, "eval_bias-bios_emb_cos_sim_sem": 0.00273221327958854, "eval_bias-bios_emb_top1_equal": 0.3540000021457672, "eval_bias-bios_emb_top1_equal_sem": 0.021407582231685648, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.734951138496399, "eval_bias-bios_n_ngrams_match_1": 22.898, "eval_bias-bios_n_ngrams_match_2": 10.586, "eval_bias-bios_n_ngrams_match_3": 5.936, "eval_bias-bios_num_pred_words": 46.224, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.668650818235701, "eval_bias-bios_pred_num_tokens": 61.2734375, "eval_bias-bios_rouge_score": 0.5303838349796598, "eval_bias-bios_runtime": 9.2802, "eval_bias-bios_samples_per_second": 53.878, "eval_bias-bios_steps_per_second": 0.108, "eval_bias-bios_token_set_f1": 0.5603213543459662, "eval_bias-bios_token_set_f1_sem": 0.006540485270971654, "eval_bias-bios_token_set_precision": 0.547956585093052, "eval_bias-bios_token_set_recall": 0.5836179991680309, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 2108 }, { "epoch": 9.3, "learning_rate": 0.001, "loss": 1.9071, "step": 2112 }, { "epoch": 9.36, "learning_rate": 0.001, "loss": 1.8768, "step": 2124 }, { "epoch": 9.41, "learning_rate": 0.001, "loss": 1.656, "step": 2136 }, { "epoch": 9.42, "eval_ag_news_accuracy": 0.29778125, "eval_ag_news_bleu_score": 4.602006773557938, "eval_ag_news_bleu_score_sem": 0.14925154576540084, "eval_ag_news_emb_cos_sim": 0.8055858612060547, "eval_ag_news_emb_cos_sim_sem": 0.00523150236725406, "eval_ag_news_emb_top1_equal": 0.2800000011920929, "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.692465305328369, "eval_ag_news_n_ngrams_match_1": 12.23, "eval_ag_news_n_ngrams_match_2": 2.478, "eval_ag_news_n_ngrams_match_3": 0.658, "eval_ag_news_num_pred_words": 33.72, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.14369152281874, "eval_ag_news_pred_num_tokens": 50.9296875, "eval_ag_news_rouge_score": 0.36367578767071507, "eval_ag_news_runtime": 8.1497, "eval_ag_news_samples_per_second": 61.352, "eval_ag_news_steps_per_second": 0.123, "eval_ag_news_token_set_f1": 0.3436143762432177, "eval_ag_news_token_set_f1_sem": 0.00480620750082883, "eval_ag_news_token_set_precision": 0.3081705922040173, "eval_ag_news_token_set_recall": 0.4031165490812298, "eval_ag_news_true_num_tokens": 56.09375, "step": 2139 }, { "epoch": 9.42, "eval_anthropic_toxic_prompts_accuracy": 0.10625, "eval_anthropic_toxic_prompts_bleu_score": 5.066131426147549, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18945199564446616, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6970410346984863, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004803729424518879, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01632805076118194, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0718159675598145, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.762, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.792, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712, "eval_anthropic_toxic_prompts_num_pred_words": 28.788, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 21.58105761272503, "eval_anthropic_toxic_prompts_pred_num_tokens": 41.046875, "eval_anthropic_toxic_prompts_rouge_score": 0.29877063955843924, "eval_anthropic_toxic_prompts_runtime": 9.0343, "eval_anthropic_toxic_prompts_samples_per_second": 55.344, "eval_anthropic_toxic_prompts_steps_per_second": 0.111, "eval_anthropic_toxic_prompts_token_set_f1": 0.3459221599188793, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005996514735593316, "eval_anthropic_toxic_prompts_token_set_precision": 0.4299008213906788, "eval_anthropic_toxic_prompts_token_set_recall": 0.31319329335249113, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 2139 }, { "epoch": 9.42, "eval_arxiv_accuracy": 0.41684375, "eval_arxiv_bleu_score": 4.282260926566434, "eval_arxiv_bleu_score_sem": 0.11797367084225056, "eval_arxiv_emb_cos_sim": 0.764333188533783, "eval_arxiv_emb_cos_sim_sem": 0.004310273080506548, "eval_arxiv_emb_top1_equal": 0.22200000286102295, "eval_arxiv_emb_top1_equal_sem": 0.01860441382553699, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.1021311283111572, "eval_arxiv_n_ngrams_match_1": 14.612, "eval_arxiv_n_ngrams_match_2": 2.786, "eval_arxiv_n_ngrams_match_3": 0.66, "eval_arxiv_num_pred_words": 33.678, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 22.245308408000255, "eval_arxiv_pred_num_tokens": 53.53125, "eval_arxiv_rouge_score": 0.37436243044694695, "eval_arxiv_runtime": 7.9047, "eval_arxiv_samples_per_second": 63.254, "eval_arxiv_steps_per_second": 0.127, "eval_arxiv_token_set_f1": 0.3727697548259072, "eval_arxiv_token_set_f1_sem": 0.0039577237427073746, "eval_arxiv_token_set_precision": 0.3203496820674592, "eval_arxiv_token_set_recall": 0.4578201411971948, "eval_arxiv_true_num_tokens": 64.0, "step": 2139 }, { "epoch": 9.42, "eval_python_code_alpaca_accuracy": 0.1565625, "eval_python_code_alpaca_bleu_score": 6.878507736852894, "eval_python_code_alpaca_bleu_score_sem": 0.22342270711564582, "eval_python_code_alpaca_emb_cos_sim": 0.7866730093955994, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038430362518699314, "eval_python_code_alpaca_emb_top1_equal": 0.21400000154972076, "eval_python_code_alpaca_emb_top1_equal_sem": 0.018359796975924752, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.614943742752075, "eval_python_code_alpaca_n_ngrams_match_1": 9.52, "eval_python_code_alpaca_n_ngrams_match_2": 2.618, "eval_python_code_alpaca_n_ngrams_match_3": 0.838, "eval_python_code_alpaca_num_pred_words": 27.932, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.666447519540702, "eval_python_code_alpaca_pred_num_tokens": 43.03125, "eval_python_code_alpaca_rouge_score": 0.44403574820208813, "eval_python_code_alpaca_runtime": 7.8167, "eval_python_code_alpaca_samples_per_second": 63.965, "eval_python_code_alpaca_steps_per_second": 0.128, "eval_python_code_alpaca_token_set_f1": 0.4872379721570455, "eval_python_code_alpaca_token_set_f1_sem": 0.0053259176733194856, "eval_python_code_alpaca_token_set_precision": 0.5347430179351469, "eval_python_code_alpaca_token_set_recall": 0.46333195077411227, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 2139 }, { "epoch": 9.42, "eval_wikibio_accuracy": 0.35528125, "eval_wikibio_bleu_score": 5.5707561638950605, "eval_wikibio_bleu_score_sem": 0.20852715862239565, "eval_wikibio_emb_cos_sim": 0.7497373223304749, "eval_wikibio_emb_cos_sim_sem": 0.005253118056934688, "eval_wikibio_emb_top1_equal": 0.1679999977350235, "eval_wikibio_emb_top1_equal_sem": 0.016736554076096456, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.4478862285614014, "eval_wikibio_n_ngrams_match_1": 8.882, "eval_wikibio_n_ngrams_match_2": 2.648, "eval_wikibio_n_ngrams_match_3": 0.968, "eval_wikibio_num_pred_words": 30.796, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 31.43387800196767, "eval_wikibio_pred_num_tokens": 56.734375, "eval_wikibio_rouge_score": 0.3351312538709482, "eval_wikibio_runtime": 8.6543, "eval_wikibio_samples_per_second": 57.775, "eval_wikibio_steps_per_second": 0.116, "eval_wikibio_token_set_f1": 0.30050983298174566, "eval_wikibio_token_set_f1_sem": 0.005767539340825794, "eval_wikibio_token_set_precision": 0.29309326972903244, "eval_wikibio_token_set_recall": 0.32473088203684336, "eval_wikibio_true_num_tokens": 61.1328125, "step": 2139 }, { "epoch": 9.42, "eval_bias-bios_accuracy": 0.524625, "eval_bias-bios_bleu_score": 19.932481798405632, "eval_bias-bios_bleu_score_sem": 0.8520770726409266, "eval_bias-bios_emb_cos_sim": 0.8823494911193848, "eval_bias-bios_emb_cos_sim_sem": 0.002854128028483915, "eval_bias-bios_emb_top1_equal": 0.3440000116825104, "eval_bias-bios_emb_top1_equal_sem": 0.021265758943789875, "eval_bias-bios_exact_match": 0.0, "eval_bias-bios_exact_match_sem": 0.0, "eval_bias-bios_loss": 1.7244471311569214, "eval_bias-bios_n_ngrams_match_1": 21.46, "eval_bias-bios_n_ngrams_match_2": 9.906, "eval_bias-bios_n_ngrams_match_3": 5.628, "eval_bias-bios_num_pred_words": 37.35, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.609418899299354, "eval_bias-bios_pred_num_tokens": 50.25, "eval_bias-bios_rouge_score": 0.5569036405399674, "eval_bias-bios_runtime": 8.2143, "eval_bias-bios_samples_per_second": 60.869, "eval_bias-bios_steps_per_second": 0.122, "eval_bias-bios_token_set_f1": 0.5652699740770379, "eval_bias-bios_token_set_f1_sem": 0.006749920522407458, "eval_bias-bios_token_set_precision": 0.5302845452267541, "eval_bias-bios_token_set_recall": 0.6149457522776463, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 2139 }, { "epoch": 9.46, "learning_rate": 0.001, "loss": 1.704, "step": 2148 }, { "epoch": 9.52, "learning_rate": 0.001, "loss": 1.914, "step": 2160 }, { "epoch": 9.56, "eval_ag_news_accuracy": 0.2976875, "eval_ag_news_bleu_score": 4.662313290914308, "eval_ag_news_bleu_score_sem": 0.14449223368201164, "eval_ag_news_emb_cos_sim": 0.8208157420158386, "eval_ag_news_emb_cos_sim_sem": 0.004164131852596115, "eval_ag_news_emb_top1_equal": 0.3179999887943268, "eval_ag_news_emb_top1_equal_sem": 0.02084757283415153, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6028475761413574, "eval_ag_news_n_ngrams_match_1": 13.692, "eval_ag_news_n_ngrams_match_2": 2.838, "eval_ag_news_n_ngrams_match_3": 0.758, "eval_ag_news_num_pred_words": 42.6, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.70259922570546, "eval_ag_news_pred_num_tokens": 62.671875, "eval_ag_news_rouge_score": 0.3604608500886415, "eval_ag_news_runtime": 8.2458, "eval_ag_news_samples_per_second": 60.637, "eval_ag_news_steps_per_second": 0.121, "eval_ag_news_token_set_f1": 0.3520480981442656, "eval_ag_news_token_set_f1_sem": 0.004477106239124451, "eval_ag_news_token_set_precision": 0.33254875465414585, "eval_ag_news_token_set_recall": 0.39049022931431326, "eval_ag_news_true_num_tokens": 56.09375, "step": 2170 }, { "epoch": 9.56, "eval_anthropic_toxic_prompts_accuracy": 0.10446875, "eval_anthropic_toxic_prompts_bleu_score": 3.4787612684393774, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12096052765266524, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6876007318496704, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004465850535071452, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1379999965429306, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01543984193692329, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2157742977142334, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.258, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752, "eval_anthropic_toxic_prompts_num_pred_words": 43.164, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.922581940318025, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.0078125, "eval_anthropic_toxic_prompts_rouge_score": 0.23859345959584935, "eval_anthropic_toxic_prompts_runtime": 7.188, "eval_anthropic_toxic_prompts_samples_per_second": 69.56, "eval_anthropic_toxic_prompts_steps_per_second": 0.139, "eval_anthropic_toxic_prompts_token_set_f1": 0.3262175803043801, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005114010427873442, "eval_anthropic_toxic_prompts_token_set_precision": 0.4622129056213488, "eval_anthropic_toxic_prompts_token_set_recall": 0.27428680446357634, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 2170 }, { "epoch": 9.56, "eval_arxiv_accuracy": 0.42478125, "eval_arxiv_bleu_score": 4.5261234443477205, "eval_arxiv_bleu_score_sem": 0.12124453371716433, "eval_arxiv_emb_cos_sim": 0.760948657989502, "eval_arxiv_emb_cos_sim_sem": 0.004511617567146002, "eval_arxiv_emb_top1_equal": 0.3059999942779541, "eval_arxiv_emb_top1_equal_sem": 0.02062957067522617, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.0417191982269287, "eval_arxiv_n_ngrams_match_1": 15.466, "eval_arxiv_n_ngrams_match_2": 3.018, "eval_arxiv_n_ngrams_match_3": 0.668, "eval_arxiv_num_pred_words": 39.328, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 20.941214404121222, "eval_arxiv_pred_num_tokens": 62.7734375, "eval_arxiv_rouge_score": 0.3653128340776683, "eval_arxiv_runtime": 8.437, "eval_arxiv_samples_per_second": 59.263, "eval_arxiv_steps_per_second": 0.119, "eval_arxiv_token_set_f1": 0.37110011938398535, "eval_arxiv_token_set_f1_sem": 0.004186995057375558, "eval_arxiv_token_set_precision": 0.3248995450752447, "eval_arxiv_token_set_recall": 0.44557839135742766, "eval_arxiv_true_num_tokens": 64.0, "step": 2170 }, { "epoch": 9.56, "eval_python_code_alpaca_accuracy": 0.15040625, "eval_python_code_alpaca_bleu_score": 4.897251390417854, "eval_python_code_alpaca_bleu_score_sem": 0.15472477985283284, "eval_python_code_alpaca_emb_cos_sim": 0.7662152051925659, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003257823266045889, "eval_python_code_alpaca_emb_top1_equal": 0.15600000321865082, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01624363651663569, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.789936065673828, "eval_python_code_alpaca_n_ngrams_match_1": 9.924, "eval_python_code_alpaca_n_ngrams_match_2": 2.752, "eval_python_code_alpaca_n_ngrams_match_3": 0.896, "eval_python_code_alpaca_num_pred_words": 40.184, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.279978919032434, "eval_python_code_alpaca_pred_num_tokens": 61.9296875, "eval_python_code_alpaca_rouge_score": 0.3780064651438805, "eval_python_code_alpaca_runtime": 7.1858, "eval_python_code_alpaca_samples_per_second": 69.581, "eval_python_code_alpaca_steps_per_second": 0.139, "eval_python_code_alpaca_token_set_f1": 0.4557865078169055, "eval_python_code_alpaca_token_set_f1_sem": 0.004864382445439913, "eval_python_code_alpaca_token_set_precision": 0.5547829884131548, "eval_python_code_alpaca_token_set_recall": 0.40366635664875294, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 2170 }, { "epoch": 9.56, "eval_wikibio_accuracy": 0.36859375, "eval_wikibio_bleu_score": 5.140089255979534, "eval_wikibio_bleu_score_sem": 0.19882621034566597, "eval_wikibio_emb_cos_sim": 0.7363477945327759, "eval_wikibio_emb_cos_sim_sem": 0.005824229015191742, "eval_wikibio_emb_top1_equal": 0.17599999904632568, "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.316140651702881, "eval_wikibio_n_ngrams_match_1": 8.868, "eval_wikibio_n_ngrams_match_2": 2.662, "eval_wikibio_n_ngrams_match_3": 1.014, "eval_wikibio_num_pred_words": 32.484, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 27.553805361448823, "eval_wikibio_pred_num_tokens": 62.96875, "eval_wikibio_rouge_score": 0.3147196406028563, "eval_wikibio_runtime": 7.1483, "eval_wikibio_samples_per_second": 69.947, "eval_wikibio_steps_per_second": 0.14, "eval_wikibio_token_set_f1": 0.288499220535135, "eval_wikibio_token_set_f1_sem": 0.0062498790382535625, "eval_wikibio_token_set_precision": 0.2838371780069014, "eval_wikibio_token_set_recall": 0.31298672925789045, "eval_wikibio_true_num_tokens": 61.1328125, "step": 2170 }, { "epoch": 9.56, "eval_bias-bios_accuracy": 0.52065625, "eval_bias-bios_bleu_score": 18.525149542142778, "eval_bias-bios_bleu_score_sem": 0.7829670022152398, "eval_bias-bios_emb_cos_sim": 0.8845528960227966, "eval_bias-bios_emb_cos_sim_sem": 0.002710936454403237, "eval_bias-bios_emb_top1_equal": 0.3400000035762787, "eval_bias-bios_emb_top1_equal_sem": 0.02120611612701198, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7260913848876953, "eval_bias-bios_n_ngrams_match_1": 23.064, "eval_bias-bios_n_ngrams_match_2": 10.62, "eval_bias-bios_n_ngrams_match_3": 5.954, "eval_bias-bios_num_pred_words": 46.594, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.618649794138943, "eval_bias-bios_pred_num_tokens": 61.9375, "eval_bias-bios_rouge_score": 0.5330248094854184, "eval_bias-bios_runtime": 7.5812, "eval_bias-bios_samples_per_second": 65.952, "eval_bias-bios_steps_per_second": 0.132, "eval_bias-bios_token_set_f1": 0.5622923685230573, "eval_bias-bios_token_set_f1_sem": 0.006591145208419194, "eval_bias-bios_token_set_precision": 0.5571964713315781, "eval_bias-bios_token_set_recall": 0.5784252887077622, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 2170 }, { "epoch": 9.57, "learning_rate": 0.001, "loss": 1.8839, "step": 2172 }, { "epoch": 9.62, "learning_rate": 0.001, "loss": 1.7105, "step": 2184 }, { "epoch": 9.67, "learning_rate": 0.001, "loss": 1.618, "step": 2196 }, { "epoch": 9.7, "eval_ag_news_accuracy": 0.30134375, "eval_ag_news_bleu_score": 4.71286424512986, "eval_ag_news_bleu_score_sem": 0.15146065256691588, "eval_ag_news_emb_cos_sim": 0.8077859282493591, "eval_ag_news_emb_cos_sim_sem": 0.004614233200012415, "eval_ag_news_emb_top1_equal": 0.2980000078678131, "eval_ag_news_emb_top1_equal_sem": 0.020475119103777986, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.58840274810791, "eval_ag_news_n_ngrams_match_1": 12.806, "eval_ag_news_n_ngrams_match_2": 2.646, "eval_ag_news_n_ngrams_match_3": 0.714, "eval_ag_news_num_pred_words": 37.05, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.17624717597981, "eval_ag_news_pred_num_tokens": 55.4375, "eval_ag_news_rouge_score": 0.3616265811172079, "eval_ag_news_runtime": 8.2263, "eval_ag_news_samples_per_second": 60.781, "eval_ag_news_steps_per_second": 0.122, "eval_ag_news_token_set_f1": 0.34669352755643745, "eval_ag_news_token_set_f1_sem": 0.004696687693622286, "eval_ag_news_token_set_precision": 0.31449899140963855, "eval_ag_news_token_set_recall": 0.4041313425662957, "eval_ag_news_true_num_tokens": 56.09375, "step": 2201 }, { "epoch": 9.7, "eval_anthropic_toxic_prompts_accuracy": 0.105625, "eval_anthropic_toxic_prompts_bleu_score": 5.247525795504296, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18949880274254094, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6975520849227905, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004827754150877183, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15600000321865082, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01624363651663569, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.0993876457214355, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.908, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.898, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.724, "eval_anthropic_toxic_prompts_num_pred_words": 29.092, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.184362432018744, "eval_anthropic_toxic_prompts_pred_num_tokens": 40.7421875, "eval_anthropic_toxic_prompts_rouge_score": 0.2998625990799938, "eval_anthropic_toxic_prompts_runtime": 7.1556, "eval_anthropic_toxic_prompts_samples_per_second": 69.876, "eval_anthropic_toxic_prompts_steps_per_second": 0.14, "eval_anthropic_toxic_prompts_token_set_f1": 0.351969265526759, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005811501239388595, "eval_anthropic_toxic_prompts_token_set_precision": 0.436701861568256, "eval_anthropic_toxic_prompts_token_set_recall": 0.31632956198925377, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 2201 }, { "epoch": 9.7, "eval_arxiv_accuracy": 0.41996875, "eval_arxiv_bleu_score": 4.343432537348339, "eval_arxiv_bleu_score_sem": 0.12501379181401626, "eval_arxiv_emb_cos_sim": 0.7525537610054016, "eval_arxiv_emb_cos_sim_sem": 0.0055605577850193905, "eval_arxiv_emb_top1_equal": 0.23000000417232513, "eval_arxiv_emb_top1_equal_sem": 0.018839050665941787, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.061976909637451, "eval_arxiv_n_ngrams_match_1": 14.962, "eval_arxiv_n_ngrams_match_2": 2.88, "eval_arxiv_n_ngrams_match_3": 0.634, "eval_arxiv_num_pred_words": 35.798, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.369761518678352, "eval_arxiv_pred_num_tokens": 57.390625, "eval_arxiv_rouge_score": 0.3687784695393248, "eval_arxiv_runtime": 7.4289, "eval_arxiv_samples_per_second": 67.305, "eval_arxiv_steps_per_second": 0.135, "eval_arxiv_token_set_f1": 0.3704525693635287, "eval_arxiv_token_set_f1_sem": 0.004514654454041064, "eval_arxiv_token_set_precision": 0.31772398129801444, "eval_arxiv_token_set_recall": 0.4578272797297293, "eval_arxiv_true_num_tokens": 64.0, "step": 2201 }, { "epoch": 9.7, "eval_python_code_alpaca_accuracy": 0.15540625, "eval_python_code_alpaca_bleu_score": 6.813732587814551, "eval_python_code_alpaca_bleu_score_sem": 0.2352598823468727, "eval_python_code_alpaca_emb_cos_sim": 0.784164309501648, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037966154812114805, "eval_python_code_alpaca_emb_top1_equal": 0.18799999356269836, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017490679184236527, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.620197296142578, "eval_python_code_alpaca_n_ngrams_match_1": 9.592, "eval_python_code_alpaca_n_ngrams_match_2": 2.604, "eval_python_code_alpaca_n_ngrams_match_3": 0.836, "eval_python_code_alpaca_num_pred_words": 28.782, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 13.738433857711156, "eval_python_code_alpaca_pred_num_tokens": 45.59375, "eval_python_code_alpaca_rouge_score": 0.44568837350944746, "eval_python_code_alpaca_runtime": 7.0229, "eval_python_code_alpaca_samples_per_second": 71.196, "eval_python_code_alpaca_steps_per_second": 0.142, "eval_python_code_alpaca_token_set_f1": 0.49671498346282555, "eval_python_code_alpaca_token_set_f1_sem": 0.005645253796668827, "eval_python_code_alpaca_token_set_precision": 0.5401766662770758, "eval_python_code_alpaca_token_set_recall": 0.4764558513237111, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 2201 }, { "epoch": 9.7, "eval_wikibio_accuracy": 0.3660625, "eval_wikibio_bleu_score": 5.173119518560294, "eval_wikibio_bleu_score_sem": 0.20925532304214553, "eval_wikibio_emb_cos_sim": 0.7356558442115784, "eval_wikibio_emb_cos_sim_sem": 0.005945926351025957, "eval_wikibio_emb_top1_equal": 0.1679999977350235, "eval_wikibio_emb_top1_equal_sem": 0.016736554076096456, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.33961820602417, "eval_wikibio_n_ngrams_match_1": 8.662, "eval_wikibio_n_ngrams_match_2": 2.558, "eval_wikibio_n_ngrams_match_3": 0.926, "eval_wikibio_num_pred_words": 31.312, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.208354869271222, "eval_wikibio_pred_num_tokens": 60.8671875, "eval_wikibio_rouge_score": 0.3186093278020351, "eval_wikibio_runtime": 7.6126, "eval_wikibio_samples_per_second": 65.681, "eval_wikibio_steps_per_second": 0.131, "eval_wikibio_token_set_f1": 0.2882805023245171, "eval_wikibio_token_set_f1_sem": 0.0063211158691996346, "eval_wikibio_token_set_precision": 0.27934300035101545, "eval_wikibio_token_set_recall": 0.3159991648885542, "eval_wikibio_true_num_tokens": 61.1328125, "step": 2201 }, { "epoch": 9.7, "eval_bias-bios_accuracy": 0.5226875, "eval_bias-bios_bleu_score": 19.813183783213244, "eval_bias-bios_bleu_score_sem": 0.876898543066567, "eval_bias-bios_emb_cos_sim": 0.8768750429153442, "eval_bias-bios_emb_cos_sim_sem": 0.0033438308734403625, "eval_bias-bios_emb_top1_equal": 0.3400000035762787, "eval_bias-bios_emb_top1_equal_sem": 0.021206117459812355, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7114582061767578, "eval_bias-bios_n_ngrams_match_1": 21.73, "eval_bias-bios_n_ngrams_match_2": 9.894, "eval_bias-bios_n_ngrams_match_3": 5.61, "eval_bias-bios_num_pred_words": 38.77, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.5370297245558175, "eval_bias-bios_pred_num_tokens": 52.84375, "eval_bias-bios_rouge_score": 0.5543715036682836, "eval_bias-bios_runtime": 7.8561, "eval_bias-bios_samples_per_second": 63.645, "eval_bias-bios_steps_per_second": 0.127, "eval_bias-bios_token_set_f1": 0.5675258217455457, "eval_bias-bios_token_set_f1_sem": 0.006914693327907323, "eval_bias-bios_token_set_precision": 0.5323334251485022, "eval_bias-bios_token_set_recall": 0.6194974352714948, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 2201 }, { "epoch": 9.73, "learning_rate": 0.001, "loss": 1.9294, "step": 2208 }, { "epoch": 9.78, "learning_rate": 0.001, "loss": 1.8799, "step": 2220 }, { "epoch": 9.83, "learning_rate": 0.001, "loss": 1.7465, "step": 2232 }, { "epoch": 9.83, "eval_ag_news_accuracy": 0.2993125, "eval_ag_news_bleu_score": 4.552771903506971, "eval_ag_news_bleu_score_sem": 0.1476329454929656, "eval_ag_news_emb_cos_sim": 0.8088976144790649, "eval_ag_news_emb_cos_sim_sem": 0.004970004601293117, "eval_ag_news_emb_top1_equal": 0.2720000147819519, "eval_ag_news_emb_top1_equal_sem": 0.019920483557355567, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.615370512008667, "eval_ag_news_n_ngrams_match_1": 12.698, "eval_ag_news_n_ngrams_match_2": 2.48, "eval_ag_news_n_ngrams_match_3": 0.602, "eval_ag_news_num_pred_words": 37.68, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.16511349581766, "eval_ag_news_pred_num_tokens": 55.8203125, "eval_ag_news_rouge_score": 0.35895464585649733, "eval_ag_news_runtime": 7.5991, "eval_ag_news_samples_per_second": 65.798, "eval_ag_news_steps_per_second": 0.132, "eval_ag_news_token_set_f1": 0.34484115121913256, "eval_ag_news_token_set_f1_sem": 0.004831306493464716, "eval_ag_news_token_set_precision": 0.3160684717490578, "eval_ag_news_token_set_recall": 0.39293489358284867, "eval_ag_news_true_num_tokens": 56.09375, "step": 2232 }, { "epoch": 9.83, "eval_anthropic_toxic_prompts_accuracy": 0.10490625, "eval_anthropic_toxic_prompts_bleu_score": 4.10741998542506, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14748491850601175, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.692173421382904, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004653590795083951, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17000000178813934, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016815633120741882, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.105919361114502, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.944, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.616, "eval_anthropic_toxic_prompts_num_pred_words": 33.656, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 22.329738634739794, "eval_anthropic_toxic_prompts_pred_num_tokens": 48.140625, "eval_anthropic_toxic_prompts_rouge_score": 0.27032811691109593, "eval_anthropic_toxic_prompts_runtime": 8.4776, "eval_anthropic_toxic_prompts_samples_per_second": 58.979, "eval_anthropic_toxic_prompts_steps_per_second": 0.118, "eval_anthropic_toxic_prompts_token_set_f1": 0.33804776599997466, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005822921546573262, "eval_anthropic_toxic_prompts_token_set_precision": 0.43360700078282016, "eval_anthropic_toxic_prompts_token_set_recall": 0.3003765734989987, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 2232 }, { "epoch": 9.83, "eval_arxiv_accuracy": 0.42009375, "eval_arxiv_bleu_score": 4.3336030432396875, "eval_arxiv_bleu_score_sem": 0.11746066880184015, "eval_arxiv_emb_cos_sim": 0.7599811553955078, "eval_arxiv_emb_cos_sim_sem": 0.0044633248783609684, "eval_arxiv_emb_top1_equal": 0.24400000274181366, "eval_arxiv_emb_top1_equal_sem": 0.0192267343061996, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.081174373626709, "eval_arxiv_n_ngrams_match_1": 15.182, "eval_arxiv_n_ngrams_match_2": 2.782, "eval_arxiv_n_ngrams_match_3": 0.62, "eval_arxiv_num_pred_words": 36.522, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 21.783969900093172, "eval_arxiv_pred_num_tokens": 58.0078125, "eval_arxiv_rouge_score": 0.3726513634142933, "eval_arxiv_runtime": 7.4, "eval_arxiv_samples_per_second": 67.568, "eval_arxiv_steps_per_second": 0.135, "eval_arxiv_token_set_f1": 0.37323648339023796, "eval_arxiv_token_set_f1_sem": 0.004144558142027552, "eval_arxiv_token_set_precision": 0.3254934337714673, "eval_arxiv_token_set_recall": 0.4520853257568568, "eval_arxiv_true_num_tokens": 64.0, "step": 2232 }, { "epoch": 9.83, "eval_python_code_alpaca_accuracy": 0.1516875, "eval_python_code_alpaca_bleu_score": 5.6993663718319345, "eval_python_code_alpaca_bleu_score_sem": 0.1847532408187973, "eval_python_code_alpaca_emb_cos_sim": 0.7660696506500244, "eval_python_code_alpaca_emb_cos_sim_sem": 0.003645446597086736, "eval_python_code_alpaca_emb_top1_equal": 0.17800000309944153, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01712362329538143, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.730116367340088, "eval_python_code_alpaca_n_ngrams_match_1": 9.632, "eval_python_code_alpaca_n_ngrams_match_2": 2.586, "eval_python_code_alpaca_n_ngrams_match_3": 0.846, "eval_python_code_alpaca_num_pred_words": 33.144, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 15.334671371003653, "eval_python_code_alpaca_pred_num_tokens": 53.25, "eval_python_code_alpaca_rouge_score": 0.412255658616845, "eval_python_code_alpaca_runtime": 7.191, "eval_python_code_alpaca_samples_per_second": 69.531, "eval_python_code_alpaca_steps_per_second": 0.139, "eval_python_code_alpaca_token_set_f1": 0.47325705061843437, "eval_python_code_alpaca_token_set_f1_sem": 0.005397233971667218, "eval_python_code_alpaca_token_set_precision": 0.5404911472732544, "eval_python_code_alpaca_token_set_recall": 0.43638181985541336, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 2232 }, { "epoch": 9.83, "eval_wikibio_accuracy": 0.36203125, "eval_wikibio_bleu_score": 5.0191180628988965, "eval_wikibio_bleu_score_sem": 0.19563339003404953, "eval_wikibio_emb_cos_sim": 0.7325459718704224, "eval_wikibio_emb_cos_sim_sem": 0.005758308708647951, "eval_wikibio_emb_top1_equal": 0.164000004529953, "eval_wikibio_emb_top1_equal_sem": 0.016575810354078253, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.3490078449249268, "eval_wikibio_n_ngrams_match_1": 8.456, "eval_wikibio_n_ngrams_match_2": 2.51, "eval_wikibio_n_ngrams_match_3": 0.916, "eval_wikibio_num_pred_words": 31.194, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 28.47446853593286, "eval_wikibio_pred_num_tokens": 61.1171875, "eval_wikibio_rouge_score": 0.31073899515076675, "eval_wikibio_runtime": 7.1404, "eval_wikibio_samples_per_second": 70.024, "eval_wikibio_steps_per_second": 0.14, "eval_wikibio_token_set_f1": 0.28317539584992235, "eval_wikibio_token_set_f1_sem": 0.006279021614211278, "eval_wikibio_token_set_precision": 0.27439931371489285, "eval_wikibio_token_set_recall": 0.31139234908194147, "eval_wikibio_true_num_tokens": 61.1328125, "step": 2232 }, { "epoch": 9.83, "eval_bias-bios_accuracy": 0.52453125, "eval_bias-bios_bleu_score": 19.547165413925843, "eval_bias-bios_bleu_score_sem": 0.8244631545646848, "eval_bias-bios_emb_cos_sim": 0.8838831186294556, "eval_bias-bios_emb_cos_sim_sem": 0.002780134283669016, "eval_bias-bios_emb_top1_equal": 0.3479999899864197, "eval_bias-bios_emb_top1_equal_sem": 0.021323727763296864, "eval_bias-bios_exact_match": 0.002, "eval_bias-bios_exact_match_sem": 0.002, "eval_bias-bios_loss": 1.7094721794128418, "eval_bias-bios_n_ngrams_match_1": 22.43, "eval_bias-bios_n_ngrams_match_2": 10.338, "eval_bias-bios_n_ngrams_match_3": 5.81, "eval_bias-bios_num_pred_words": 41.41, "eval_bias-bios_num_true_words": 40.706, "eval_bias-bios_perplexity": 5.526043947964353, "eval_bias-bios_pred_num_tokens": 57.0390625, "eval_bias-bios_rouge_score": 0.5483517989330391, "eval_bias-bios_runtime": 8.0059, "eval_bias-bios_samples_per_second": 62.454, "eval_bias-bios_steps_per_second": 0.125, "eval_bias-bios_token_set_f1": 0.5685466671620878, "eval_bias-bios_token_set_f1_sem": 0.006663139402344389, "eval_bias-bios_token_set_precision": 0.547655818008668, "eval_bias-bios_token_set_recall": 0.60030696946573, "eval_bias-bios_true_num_tokens": 55.1796875, "step": 2232 } ], "logging_steps": 12, "max_steps": 2270, "num_train_epochs": 10, "save_steps": 62, "total_flos": 1.1894837433768346e+17, "trial_name": null, "trial_params": null }