diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,14095 @@ +{ + "best_metric": 1.7094721794128418, + "best_model_checkpoint": "/home/nlp/matan_avitan/git/vec2text/saves/train_on_bios/output-checkpoin-259966/checkpoint-2232", + "epoch": 9.832599118942731, + "eval_steps": 31, + "global_step": 2232, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05, + "learning_rate": 0.001, + "loss": 2.7429, + "step": 12 + }, + { + "epoch": 0.11, + "learning_rate": 0.001, + "loss": 2.5522, + "step": 24 + }, + { + "epoch": 0.14, + "eval_ag_news_accuracy": 0.31696875, + "eval_ag_news_bleu_score": 4.693673734147203, + "eval_ag_news_bleu_score_sem": 0.15842407029281247, + "eval_ag_news_emb_cos_sim": 0.8161767721176147, + "eval_ag_news_emb_cos_sim_sem": 0.004149520028885719, + "eval_ag_news_emb_top1_equal": 0.2919999957084656, + "eval_ag_news_emb_top1_equal_sem": 0.020354375386612028, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.4767088890075684, + "eval_ag_news_n_ngrams_match_1": 13.35, + "eval_ag_news_n_ngrams_match_2": 2.99, + "eval_ag_news_n_ngrams_match_3": 0.828, + "eval_ag_news_num_pred_words": 40.846, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.35306912734109, + "eval_ag_news_pred_num_tokens": 63.0, + "eval_ag_news_rouge_score": 0.3556909202875407, + "eval_ag_news_runtime": 10.7445, + "eval_ag_news_samples_per_second": 46.536, + "eval_ag_news_steps_per_second": 0.093, + "eval_ag_news_token_set_f1": 0.3507447087858559, + "eval_ag_news_token_set_f1_sem": 0.00481906834298769, + "eval_ag_news_token_set_precision": 0.32442194612796943, + "eval_ag_news_token_set_recall": 0.40673198714311515, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 31 + }, + { + "epoch": 0.14, + "eval_anthropic_toxic_prompts_accuracy": 0.11178125, + "eval_anthropic_toxic_prompts_bleu_score": 3.5514153333761613, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1376082181977126, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6868857741355896, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004651350024453609, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12800000607967377, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014955912783191019, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0755512714385986, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.14, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.952, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, + "eval_anthropic_toxic_prompts_num_pred_words": 41.272, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 21.661820163365203, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9296875, + "eval_anthropic_toxic_prompts_rouge_score": 0.24930881983630065, + "eval_anthropic_toxic_prompts_runtime": 6.6738, + "eval_anthropic_toxic_prompts_samples_per_second": 74.92, + "eval_anthropic_toxic_prompts_steps_per_second": 0.15, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34382614982619525, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006466915071716929, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4350558077494649, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3121233769279072, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 31 + }, + { + "epoch": 0.14, + "eval_arxiv_accuracy": 0.40021875, + "eval_arxiv_bleu_score": 4.158222701612027, + "eval_arxiv_bleu_score_sem": 0.13321196613584635, + "eval_arxiv_emb_cos_sim": 0.7486079335212708, + "eval_arxiv_emb_cos_sim_sem": 0.006440433274933822, + "eval_arxiv_emb_top1_equal": 0.2460000067949295, + "eval_arxiv_emb_top1_equal_sem": 0.01927982107793324, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.041785717010498, + "eval_arxiv_n_ngrams_match_1": 14.34, + "eval_arxiv_n_ngrams_match_2": 2.772, + "eval_arxiv_n_ngrams_match_3": 0.628, + "eval_arxiv_num_pred_words": 36.496, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.94260743456068, + "eval_arxiv_pred_num_tokens": 63.0, + "eval_arxiv_rouge_score": 0.3550765439293301, + "eval_arxiv_runtime": 7.8486, + "eval_arxiv_samples_per_second": 63.705, + "eval_arxiv_steps_per_second": 0.127, + "eval_arxiv_token_set_f1": 0.35341010605025447, + "eval_arxiv_token_set_f1_sem": 0.005413458965286179, + "eval_arxiv_token_set_precision": 0.3008597523478743, + "eval_arxiv_token_set_recall": 0.46438469321665715, + "eval_arxiv_true_num_tokens": 64.0, + "step": 31 + }, + { + "epoch": 0.14, + "eval_python_code_alpaca_accuracy": 0.1539375, + "eval_python_code_alpaca_bleu_score": 4.375912111669264, + "eval_python_code_alpaca_bleu_score_sem": 0.14286387437922407, + "eval_python_code_alpaca_emb_cos_sim": 0.733892560005188, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.006095502538702667, + "eval_python_code_alpaca_emb_top1_equal": 0.16200000047683716, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.016494123019099097, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7376487255096436, + "eval_python_code_alpaca_n_ngrams_match_1": 8.704, + "eval_python_code_alpaca_n_ngrams_match_2": 2.286, + "eval_python_code_alpaca_n_ngrams_match_3": 0.656, + "eval_python_code_alpaca_num_pred_words": 35.038, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.450613719652043, + "eval_python_code_alpaca_pred_num_tokens": 62.984375, + "eval_python_code_alpaca_rouge_score": 0.3670313706522858, + "eval_python_code_alpaca_runtime": 6.6362, + "eval_python_code_alpaca_samples_per_second": 75.344, + "eval_python_code_alpaca_steps_per_second": 0.151, + "eval_python_code_alpaca_token_set_f1": 0.43887632040645924, + "eval_python_code_alpaca_token_set_f1_sem": 0.00656663675900497, + "eval_python_code_alpaca_token_set_precision": 0.4829332550075322, + "eval_python_code_alpaca_token_set_recall": 0.4324919006662028, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 31 + }, + { + "epoch": 0.14, + "eval_wikibio_accuracy": 0.35665625, + "eval_wikibio_bleu_score": 5.6953724260148775, + "eval_wikibio_bleu_score_sem": 0.2116118501994311, + "eval_wikibio_emb_cos_sim": 0.7341107130050659, + "eval_wikibio_emb_cos_sim_sem": 0.005868252744375891, + "eval_wikibio_emb_top1_equal": 0.18400000035762787, + "eval_wikibio_emb_top1_equal_sem": 0.017346172969186033, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.472095489501953, + "eval_wikibio_n_ngrams_match_1": 9.376, + "eval_wikibio_n_ngrams_match_2": 3.148, + "eval_wikibio_n_ngrams_match_3": 1.21, + "eval_wikibio_num_pred_words": 34.388, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 32.20415525792636, + "eval_wikibio_pred_num_tokens": 63.0, + "eval_wikibio_rouge_score": 0.3396970245008327, + "eval_wikibio_runtime": 7.8165, + "eval_wikibio_samples_per_second": 63.967, + "eval_wikibio_steps_per_second": 0.128, + "eval_wikibio_token_set_f1": 0.30196565654624985, + "eval_wikibio_token_set_f1_sem": 0.006037672926570891, + "eval_wikibio_token_set_precision": 0.30395106669379673, + "eval_wikibio_token_set_recall": 0.3225063022698392, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 31 + }, + { + "epoch": 0.14, + "eval_bias-bios_accuracy": 0.43903125, + "eval_bias-bios_bleu_score": 9.97709652864065, + "eval_bias-bios_bleu_score_sem": 0.3171072933989879, + "eval_bias-bios_emb_cos_sim": 0.846991240978241, + "eval_bias-bios_emb_cos_sim_sem": 0.0033278412673423867, + "eval_bias-bios_emb_top1_equal": 0.257999986410141, + "eval_bias-bios_emb_top1_equal_sem": 0.019586710359463095, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 2.272756814956665, + "eval_bias-bios_n_ngrams_match_1": 19.672, + "eval_bias-bios_n_ngrams_match_2": 6.802, + "eval_bias-bios_n_ngrams_match_3": 2.788, + "eval_bias-bios_num_pred_words": 46.306, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 9.706121946694221, + "eval_bias-bios_pred_num_tokens": 62.9921875, + "eval_bias-bios_rouge_score": 0.45800074150132053, + "eval_bias-bios_runtime": 7.6527, + "eval_bias-bios_samples_per_second": 65.336, + "eval_bias-bios_steps_per_second": 0.131, + "eval_bias-bios_token_set_f1": 0.48702588662994584, + "eval_bias-bios_token_set_f1_sem": 0.004756417593574572, + "eval_bias-bios_token_set_precision": 0.4642910225543334, + "eval_bias-bios_token_set_recall": 0.5280679233880424, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 31 + }, + { + "epoch": 0.16, + "learning_rate": 0.001, + "loss": 2.5171, + "step": 36 + }, + { + "epoch": 0.21, + "learning_rate": 0.001, + "loss": 2.2642, + "step": 48 + }, + { + "epoch": 0.26, + "learning_rate": 0.001, + "loss": 2.8679, + "step": 60 + }, + { + "epoch": 0.27, + "eval_ag_news_accuracy": 0.304875, + "eval_ag_news_bleu_score": 5.060497511269132, + "eval_ag_news_bleu_score_sem": 0.1588487365663699, + "eval_ag_news_emb_cos_sim": 0.8225099444389343, + "eval_ag_news_emb_cos_sim_sem": 0.0037236120070751506, + "eval_ag_news_emb_top1_equal": 0.31200000643730164, + "eval_ag_news_emb_top1_equal_sem": 0.020740595612058172, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.4802517890930176, + "eval_ag_news_n_ngrams_match_1": 13.956, + "eval_ag_news_n_ngrams_match_2": 3.176, + "eval_ag_news_n_ngrams_match_3": 0.898, + "eval_ag_news_num_pred_words": 42.868, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.467896108867876, + "eval_ag_news_pred_num_tokens": 62.4140625, + "eval_ag_news_rouge_score": 0.3692136347481344, + "eval_ag_news_runtime": 7.1641, + "eval_ag_news_samples_per_second": 69.793, + "eval_ag_news_steps_per_second": 0.14, + "eval_ag_news_token_set_f1": 0.36540067573157586, + "eval_ag_news_token_set_f1_sem": 0.004698136906103602, + "eval_ag_news_token_set_precision": 0.3389609317064429, + "eval_ag_news_token_set_recall": 0.42054779375425144, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 62 + }, + { + "epoch": 0.27, + "eval_anthropic_toxic_prompts_accuracy": 0.10921875, + "eval_anthropic_toxic_prompts_bleu_score": 3.514336801925988, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1289825795751676, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7021579742431641, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004256657187024065, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1340000033378601, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015249692640233114, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.930422306060791, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.578, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.156, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.834, + "eval_anthropic_toxic_prompts_num_pred_words": 45.428, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 18.735540958736387, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.0078125, + "eval_anthropic_toxic_prompts_rouge_score": 0.23798547473021991, + "eval_anthropic_toxic_prompts_runtime": 6.8257, + "eval_anthropic_toxic_prompts_samples_per_second": 73.252, + "eval_anthropic_toxic_prompts_steps_per_second": 0.147, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3503551059292943, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0060456741682062545, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4663104252542219, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3055933594976192, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 62 + }, + { + "epoch": 0.27, + "eval_arxiv_accuracy": 0.40415625, + "eval_arxiv_bleu_score": 4.32018626719132, + "eval_arxiv_bleu_score_sem": 0.12063743691178247, + "eval_arxiv_emb_cos_sim": 0.7646932601928711, + "eval_arxiv_emb_cos_sim_sem": 0.004794754346794133, + "eval_arxiv_emb_top1_equal": 0.23199999332427979, + "eval_arxiv_emb_top1_equal_sem": 0.018896193149216322, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0367958545684814, + "eval_arxiv_n_ngrams_match_1": 14.982, + "eval_arxiv_n_ngrams_match_2": 2.936, + "eval_arxiv_n_ngrams_match_3": 0.628, + "eval_arxiv_num_pred_words": 39.184, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.83836699330422, + "eval_arxiv_pred_num_tokens": 61.8125, + "eval_arxiv_rouge_score": 0.36987343880561163, + "eval_arxiv_runtime": 7.1902, + "eval_arxiv_samples_per_second": 69.539, + "eval_arxiv_steps_per_second": 0.139, + "eval_arxiv_token_set_f1": 0.37145593312975383, + "eval_arxiv_token_set_f1_sem": 0.004854050823526144, + "eval_arxiv_token_set_precision": 0.31398716496476536, + "eval_arxiv_token_set_recall": 0.4893776089225949, + "eval_arxiv_true_num_tokens": 64.0, + "step": 62 + }, + { + "epoch": 0.27, + "eval_python_code_alpaca_accuracy": 0.153875, + "eval_python_code_alpaca_bleu_score": 5.218403720836803, + "eval_python_code_alpaca_bleu_score_sem": 0.16420659278980215, + "eval_python_code_alpaca_emb_cos_sim": 0.7770482301712036, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004089995498141099, + "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.5997347831726074, + "eval_python_code_alpaca_n_ngrams_match_1": 10.17, + "eval_python_code_alpaca_n_ngrams_match_2": 3.094, + "eval_python_code_alpaca_n_ngrams_match_3": 1.046, + "eval_python_code_alpaca_num_pred_words": 39.394, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.460167698592779, + "eval_python_code_alpaca_pred_num_tokens": 61.890625, + "eval_python_code_alpaca_rouge_score": 0.3838419247498926, + "eval_python_code_alpaca_runtime": 6.9264, + "eval_python_code_alpaca_samples_per_second": 72.187, + "eval_python_code_alpaca_steps_per_second": 0.144, + "eval_python_code_alpaca_token_set_f1": 0.4857091392562985, + "eval_python_code_alpaca_token_set_f1_sem": 0.005462245820208245, + "eval_python_code_alpaca_token_set_precision": 0.5659594722840119, + "eval_python_code_alpaca_token_set_recall": 0.44529530817081914, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 62 + }, + { + "epoch": 0.27, + "eval_wikibio_accuracy": 0.34871875, + "eval_wikibio_bleu_score": 5.131162518306524, + "eval_wikibio_bleu_score_sem": 0.20245051705727532, + "eval_wikibio_emb_cos_sim": 0.7161974310874939, + "eval_wikibio_emb_cos_sim_sem": 0.006247003956520955, + "eval_wikibio_emb_top1_equal": 0.1420000046491623, + "eval_wikibio_emb_top1_equal_sem": 0.015625630310786714, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4194374084472656, + "eval_wikibio_n_ngrams_match_1": 8.386, + "eval_wikibio_n_ngrams_match_2": 2.642, + "eval_wikibio_n_ngrams_match_3": 0.982, + "eval_wikibio_num_pred_words": 31.434, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 30.55222176323163, + "eval_wikibio_pred_num_tokens": 62.9140625, + "eval_wikibio_rouge_score": 0.31856747306238054, + "eval_wikibio_runtime": 6.9661, + "eval_wikibio_samples_per_second": 71.777, + "eval_wikibio_steps_per_second": 0.144, + "eval_wikibio_token_set_f1": 0.28387075920802823, + "eval_wikibio_token_set_f1_sem": 0.006648036834838477, + "eval_wikibio_token_set_precision": 0.27314929855068715, + "eval_wikibio_token_set_recall": 0.32361977068815584, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 62 + }, + { + "epoch": 0.27, + "eval_bias-bios_accuracy": 0.44571875, + "eval_bias-bios_bleu_score": 10.748684882655063, + "eval_bias-bios_bleu_score_sem": 0.33633789418984067, + "eval_bias-bios_emb_cos_sim": 0.8556249141693115, + "eval_bias-bios_emb_cos_sim_sem": 0.0028325299984082552, + "eval_bias-bios_emb_top1_equal": 0.30799999833106995, + "eval_bias-bios_emb_top1_equal_sem": 0.020667033028164562, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 2.164238214492798, + "eval_bias-bios_n_ngrams_match_1": 20.344, + "eval_bias-bios_n_ngrams_match_2": 7.47, + "eval_bias-bios_n_ngrams_match_3": 3.198, + "eval_bias-bios_num_pred_words": 48.886, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 8.707965787384495, + "eval_bias-bios_pred_num_tokens": 62.3828125, + "eval_bias-bios_rouge_score": 0.46276773387866293, + "eval_bias-bios_runtime": 7.4252, + "eval_bias-bios_samples_per_second": 67.338, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.5012030916495526, + "eval_bias-bios_token_set_f1_sem": 0.004882132968376436, + "eval_bias-bios_token_set_precision": 0.4824323129514482, + "eval_bias-bios_token_set_recall": 0.535152442471092, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 62 + }, + { + "epoch": 0.32, + "learning_rate": 0.001, + "loss": 2.4842, + "step": 72 + }, + { + "epoch": 0.37, + "learning_rate": 0.001, + "loss": 2.3402, + "step": 84 + }, + { + "epoch": 0.41, + "eval_ag_news_accuracy": 0.31328125, + "eval_ag_news_bleu_score": 4.516750644571753, + "eval_ag_news_bleu_score_sem": 0.17149583918765143, + "eval_ag_news_emb_cos_sim": 0.793439507484436, + "eval_ag_news_emb_cos_sim_sem": 0.004841141464243117, + "eval_ag_news_emb_top1_equal": 0.24400000274181366, + "eval_ag_news_emb_top1_equal_sem": 0.0192267343061996, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.4803214073181152, + "eval_ag_news_n_ngrams_match_1": 11.478, + "eval_ag_news_n_ngrams_match_2": 2.472, + "eval_ag_news_n_ngrams_match_3": 0.702, + "eval_ag_news_num_pred_words": 31.22, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.47015654485049, + "eval_ag_news_pred_num_tokens": 49.0, + "eval_ag_news_rouge_score": 0.35275194042143654, + "eval_ag_news_runtime": 30.8963, + "eval_ag_news_samples_per_second": 16.183, + "eval_ag_news_steps_per_second": 0.032, + "eval_ag_news_token_set_f1": 0.3386501404441052, + "eval_ag_news_token_set_f1_sem": 0.005318713349884382, + "eval_ag_news_token_set_precision": 0.2896306380985742, + "eval_ag_news_token_set_recall": 0.4466289829368145, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 93 + }, + { + "epoch": 0.41, + "eval_anthropic_toxic_prompts_accuracy": 0.11359375, + "eval_anthropic_toxic_prompts_bleu_score": 5.140854055881747, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.21195092740800459, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6991554498672485, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004861811532057735, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17399999499320984, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016971269551723376, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.741424322128296, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.802, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.944, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.756, + "eval_anthropic_toxic_prompts_num_pred_words": 29.662, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 15.5090592685474, + "eval_anthropic_toxic_prompts_pred_num_tokens": 45.1328125, + "eval_anthropic_toxic_prompts_rouge_score": 0.3009768237903324, + "eval_anthropic_toxic_prompts_runtime": 6.6478, + "eval_anthropic_toxic_prompts_samples_per_second": 75.213, + "eval_anthropic_toxic_prompts_steps_per_second": 0.15, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3644166633832432, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067735124312273635, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4294945619168725, + "eval_anthropic_toxic_prompts_token_set_recall": 0.34899513455575765, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 93 + }, + { + "epoch": 0.41, + "eval_arxiv_accuracy": 0.40621875, + "eval_arxiv_bleu_score": 3.486266642764337, + "eval_arxiv_bleu_score_sem": 0.11682124642354311, + "eval_arxiv_emb_cos_sim": 0.7276380062103271, + "eval_arxiv_emb_cos_sim_sem": 0.006449194438198392, + "eval_arxiv_emb_top1_equal": 0.17000000178813934, + "eval_arxiv_emb_top1_equal_sem": 0.016815633120741882, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0975985527038574, + "eval_arxiv_n_ngrams_match_1": 12.438, + "eval_arxiv_n_ngrams_match_2": 2.302, + "eval_arxiv_n_ngrams_match_3": 0.498, + "eval_arxiv_num_pred_words": 29.636, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.144708027378755, + "eval_arxiv_pred_num_tokens": 50.4609375, + "eval_arxiv_rouge_score": 0.34343007600768327, + "eval_arxiv_runtime": 6.9219, + "eval_arxiv_samples_per_second": 72.235, + "eval_arxiv_steps_per_second": 0.144, + "eval_arxiv_token_set_f1": 0.34406688703760474, + "eval_arxiv_token_set_f1_sem": 0.0052970795722565266, + "eval_arxiv_token_set_precision": 0.2720488131413779, + "eval_arxiv_token_set_recall": 0.5071973334549789, + "eval_arxiv_true_num_tokens": 64.0, + "step": 93 + }, + { + "epoch": 0.41, + "eval_python_code_alpaca_accuracy": 0.15865625, + "eval_python_code_alpaca_bleu_score": 5.943762510754415, + "eval_python_code_alpaca_bleu_score_sem": 0.2175054353006214, + "eval_python_code_alpaca_emb_cos_sim": 0.7461953163146973, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.00596813547007352, + "eval_python_code_alpaca_emb_top1_equal": 0.19200000166893005, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01763218126724194, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.4123122692108154, + "eval_python_code_alpaca_n_ngrams_match_1": 8.432, + "eval_python_code_alpaca_n_ngrams_match_2": 2.242, + "eval_python_code_alpaca_n_ngrams_match_3": 0.706, + "eval_python_code_alpaca_num_pred_words": 26.672, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.159735647926345, + "eval_python_code_alpaca_pred_num_tokens": 47.8203125, + "eval_python_code_alpaca_rouge_score": 0.40986058218769317, + "eval_python_code_alpaca_runtime": 6.8363, + "eval_python_code_alpaca_samples_per_second": 73.139, + "eval_python_code_alpaca_steps_per_second": 0.146, + "eval_python_code_alpaca_token_set_f1": 0.4641263193534667, + "eval_python_code_alpaca_token_set_f1_sem": 0.006759693397650342, + "eval_python_code_alpaca_token_set_precision": 0.4751880021278726, + "eval_python_code_alpaca_token_set_recall": 0.4834618991897721, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 93 + }, + { + "epoch": 0.41, + "eval_wikibio_accuracy": 0.3475, + "eval_wikibio_bleu_score": 5.7450436332065165, + "eval_wikibio_bleu_score_sem": 0.23459594560767927, + "eval_wikibio_emb_cos_sim": 0.7174302339553833, + "eval_wikibio_emb_cos_sim_sem": 0.006967267271935492, + "eval_wikibio_emb_top1_equal": 0.16599999368190765, + "eval_wikibio_emb_top1_equal_sem": 0.01665661404240883, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.5982017517089844, + "eval_wikibio_n_ngrams_match_1": 8.528, + "eval_wikibio_n_ngrams_match_2": 2.744, + "eval_wikibio_n_ngrams_match_3": 1.016, + "eval_wikibio_num_pred_words": 29.094, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 36.532480869480594, + "eval_wikibio_pred_num_tokens": 53.8671875, + "eval_wikibio_rouge_score": 0.33783075470658863, + "eval_wikibio_runtime": 6.9323, + "eval_wikibio_samples_per_second": 72.126, + "eval_wikibio_steps_per_second": 0.144, + "eval_wikibio_token_set_f1": 0.2926915338296796, + "eval_wikibio_token_set_f1_sem": 0.006339384709369148, + "eval_wikibio_token_set_precision": 0.28480366224074966, + "eval_wikibio_token_set_recall": 0.3280943627476054, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 93 + }, + { + "epoch": 0.41, + "eval_bias-bios_accuracy": 0.47609375, + "eval_bias-bios_bleu_score": 14.143628030554162, + "eval_bias-bios_bleu_score_sem": 0.5952833016066833, + "eval_bias-bios_emb_cos_sim": 0.8480692505836487, + "eval_bias-bios_emb_cos_sim_sem": 0.0034921289055575185, + "eval_bias-bios_emb_top1_equal": 0.2840000092983246, + "eval_bias-bios_emb_top1_equal_sem": 0.020186705101045338, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 2.0621674060821533, + "eval_bias-bios_n_ngrams_match_1": 18.828, + "eval_bias-bios_n_ngrams_match_2": 7.64, + "eval_bias-bios_n_ngrams_match_3": 3.826, + "eval_bias-bios_num_pred_words": 36.126, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 7.862993655133785, + "eval_bias-bios_pred_num_tokens": 47.9921875, + "eval_bias-bios_rouge_score": 0.5012418979790247, + "eval_bias-bios_runtime": 7.4397, + "eval_bias-bios_samples_per_second": 67.207, + "eval_bias-bios_steps_per_second": 0.134, + "eval_bias-bios_token_set_f1": 0.5158033024363126, + "eval_bias-bios_token_set_f1_sem": 0.006050981474164487, + "eval_bias-bios_token_set_precision": 0.46195844368781785, + "eval_bias-bios_token_set_recall": 0.6017904464338891, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 93 + }, + { + "epoch": 0.42, + "learning_rate": 0.001, + "loss": 2.1097, + "step": 96 + }, + { + "epoch": 0.48, + "learning_rate": 0.001, + "loss": 2.3791, + "step": 108 + }, + { + "epoch": 0.53, + "learning_rate": 0.001, + "loss": 2.3587, + "step": 120 + }, + { + "epoch": 0.55, + "eval_ag_news_accuracy": 0.31, + "eval_ag_news_bleu_score": 4.7225283432858465, + "eval_ag_news_bleu_score_sem": 0.15513744356970763, + "eval_ag_news_emb_cos_sim": 0.8204448819160461, + "eval_ag_news_emb_cos_sim_sem": 0.004084354753754322, + "eval_ag_news_emb_top1_equal": 0.28200000524520874, + "eval_ag_news_emb_top1_equal_sem": 0.02014357434811239, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.450395345687866, + "eval_ag_news_n_ngrams_match_1": 13.582, + "eval_ag_news_n_ngrams_match_2": 2.918, + "eval_ag_news_n_ngrams_match_3": 0.774, + "eval_ag_news_num_pred_words": 42.082, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 31.512848315065245, + "eval_ag_news_pred_num_tokens": 62.9140625, + "eval_ag_news_rouge_score": 0.3611285789834328, + "eval_ag_news_runtime": 7.7072, + "eval_ag_news_samples_per_second": 64.874, + "eval_ag_news_steps_per_second": 0.13, + "eval_ag_news_token_set_f1": 0.35262006963334624, + "eval_ag_news_token_set_f1_sem": 0.004891780010880941, + "eval_ag_news_token_set_precision": 0.32770115783501474, + "eval_ag_news_token_set_recall": 0.40682450130250397, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 124 + }, + { + "epoch": 0.55, + "eval_anthropic_toxic_prompts_accuracy": 0.11134375, + "eval_anthropic_toxic_prompts_bleu_score": 3.588870597966171, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12529714132983094, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7048087120056152, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004388768025387664, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12200000137090683, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014651325247908655, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.969221353530884, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.566, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.188, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.804, + "eval_anthropic_toxic_prompts_num_pred_words": 44.512, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 19.476748189009147, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.921875, + "eval_anthropic_toxic_prompts_rouge_score": 0.24404335146473338, + "eval_anthropic_toxic_prompts_runtime": 6.7594, + "eval_anthropic_toxic_prompts_samples_per_second": 73.971, + "eval_anthropic_toxic_prompts_steps_per_second": 0.148, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35608243439319104, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006070834817090331, + "eval_anthropic_toxic_prompts_token_set_precision": 0.47290911056237195, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3119626371016283, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 124 + }, + { + "epoch": 0.55, + "eval_arxiv_accuracy": 0.41953125, + "eval_arxiv_bleu_score": 4.034091269472269, + "eval_arxiv_bleu_score_sem": 0.12793967624084907, + "eval_arxiv_emb_cos_sim": 0.7384451031684875, + "eval_arxiv_emb_cos_sim_sem": 0.0063196562377584925, + "eval_arxiv_emb_top1_equal": 0.24799999594688416, + "eval_arxiv_emb_top1_equal_sem": 0.01933234140950753, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.965681314468384, + "eval_arxiv_n_ngrams_match_1": 13.97, + "eval_arxiv_n_ngrams_match_2": 2.662, + "eval_arxiv_n_ngrams_match_3": 0.574, + "eval_arxiv_num_pred_words": 36.738, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.407921635830117, + "eval_arxiv_pred_num_tokens": 62.9140625, + "eval_arxiv_rouge_score": 0.3471889053815287, + "eval_arxiv_runtime": 7.1394, + "eval_arxiv_samples_per_second": 70.034, + "eval_arxiv_steps_per_second": 0.14, + "eval_arxiv_token_set_f1": 0.3505383309264076, + "eval_arxiv_token_set_f1_sem": 0.0053464458822839395, + "eval_arxiv_token_set_precision": 0.2922403618830216, + "eval_arxiv_token_set_recall": 0.4868777158351232, + "eval_arxiv_true_num_tokens": 64.0, + "step": 124 + }, + { + "epoch": 0.55, + "eval_python_code_alpaca_accuracy": 0.1574375, + "eval_python_code_alpaca_bleu_score": 4.901041283031989, + "eval_python_code_alpaca_bleu_score_sem": 0.15128683929670117, + "eval_python_code_alpaca_emb_cos_sim": 0.7697573900222778, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0049162044481588, + "eval_python_code_alpaca_emb_top1_equal": 0.14800000190734863, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.015896458012572223, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.595602512359619, + "eval_python_code_alpaca_n_ngrams_match_1": 9.89, + "eval_python_code_alpaca_n_ngrams_match_2": 2.874, + "eval_python_code_alpaca_n_ngrams_match_3": 0.936, + "eval_python_code_alpaca_num_pred_words": 39.19, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.404661402980476, + "eval_python_code_alpaca_pred_num_tokens": 62.8203125, + "eval_python_code_alpaca_rouge_score": 0.37669615985353927, + "eval_python_code_alpaca_runtime": 6.8719, + "eval_python_code_alpaca_samples_per_second": 72.76, + "eval_python_code_alpaca_steps_per_second": 0.146, + "eval_python_code_alpaca_token_set_f1": 0.4669983325014964, + "eval_python_code_alpaca_token_set_f1_sem": 0.006102331677873762, + "eval_python_code_alpaca_token_set_precision": 0.5525666056613069, + "eval_python_code_alpaca_token_set_recall": 0.42905431288408613, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 124 + }, + { + "epoch": 0.55, + "eval_wikibio_accuracy": 0.36175, + "eval_wikibio_bleu_score": 4.969333362935701, + "eval_wikibio_bleu_score_sem": 0.20690804856768155, + "eval_wikibio_emb_cos_sim": 0.7222297191619873, + "eval_wikibio_emb_cos_sim_sem": 0.00648223522589297, + "eval_wikibio_emb_top1_equal": 0.17000000178813934, + "eval_wikibio_emb_top1_equal_sem": 0.016815633120741882, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.389568567276001, + "eval_wikibio_n_ngrams_match_1": 8.624, + "eval_wikibio_n_ngrams_match_2": 2.76, + "eval_wikibio_n_ngrams_match_3": 1.06, + "eval_wikibio_num_pred_words": 33.248, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 29.65315616829789, + "eval_wikibio_pred_num_tokens": 62.9921875, + "eval_wikibio_rouge_score": 0.30751147754995894, + "eval_wikibio_runtime": 6.8812, + "eval_wikibio_samples_per_second": 72.661, + "eval_wikibio_steps_per_second": 0.145, + "eval_wikibio_token_set_f1": 0.2744272465841197, + "eval_wikibio_token_set_f1_sem": 0.006671840977191013, + "eval_wikibio_token_set_precision": 0.27600638412423095, + "eval_wikibio_token_set_recall": 0.2994770253871302, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 124 + }, + { + "epoch": 0.55, + "eval_bias-bios_accuracy": 0.47809375, + "eval_bias-bios_bleu_score": 13.904461612933037, + "eval_bias-bios_bleu_score_sem": 0.5622423795520898, + "eval_bias-bios_emb_cos_sim": 0.8658474683761597, + "eval_bias-bios_emb_cos_sim_sem": 0.0030276376456932464, + "eval_bias-bios_emb_top1_equal": 0.3160000145435333, + "eval_bias-bios_emb_top1_equal_sem": 0.020812358915445636, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 2.0254554748535156, + "eval_bias-bios_n_ngrams_match_1": 21.246, + "eval_bias-bios_n_ngrams_match_2": 8.81, + "eval_bias-bios_n_ngrams_match_3": 4.386, + "eval_bias-bios_num_pred_words": 47.536, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 7.5795624586388195, + "eval_bias-bios_pred_num_tokens": 62.890625, + "eval_bias-bios_rouge_score": 0.4883049409995084, + "eval_bias-bios_runtime": 7.322, + "eval_bias-bios_samples_per_second": 68.287, + "eval_bias-bios_steps_per_second": 0.137, + "eval_bias-bios_token_set_f1": 0.522675177087755, + "eval_bias-bios_token_set_f1_sem": 0.0059232907683261175, + "eval_bias-bios_token_set_precision": 0.5037827360959235, + "eval_bias-bios_token_set_recall": 0.559121364032907, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 124 + }, + { + "epoch": 0.58, + "learning_rate": 0.001, + "loss": 2.2958, + "step": 132 + }, + { + "epoch": 0.63, + "learning_rate": 0.001, + "loss": 2.0545, + "step": 144 + }, + { + "epoch": 0.68, + "eval_ag_news_accuracy": 0.311, + "eval_ag_news_bleu_score": 5.003358972744811, + "eval_ag_news_bleu_score_sem": 0.1684136330974139, + "eval_ag_news_emb_cos_sim": 0.8219054341316223, + "eval_ag_news_emb_cos_sim_sem": 0.003758947543815256, + "eval_ag_news_emb_top1_equal": 0.28999999165534973, + "eval_ag_news_emb_top1_equal_sem": 0.02031317985982347, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.4661929607391357, + "eval_ag_news_n_ngrams_match_1": 12.61, + "eval_ag_news_n_ngrams_match_2": 2.782, + "eval_ag_news_n_ngrams_match_3": 0.79, + "eval_ag_news_num_pred_words": 34.54, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.01462919700174, + "eval_ag_news_pred_num_tokens": 49.7265625, + "eval_ag_news_rouge_score": 0.3713894017865863, + "eval_ag_news_runtime": 6.9815, + "eval_ag_news_samples_per_second": 71.618, + "eval_ag_news_steps_per_second": 0.143, + "eval_ag_news_token_set_f1": 0.3567973665596035, + "eval_ag_news_token_set_f1_sem": 0.004808474411403699, + "eval_ag_news_token_set_precision": 0.3171467469913889, + "eval_ag_news_token_set_recall": 0.4300849194020247, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 155 + }, + { + "epoch": 0.68, + "eval_anthropic_toxic_prompts_accuracy": 0.1133125, + "eval_anthropic_toxic_prompts_bleu_score": 4.991091639968256, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18676981446968294, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7204309701919556, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004466604566883565, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17000000178813934, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016815633120741882, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.8461811542510986, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.34, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.166, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.842, + "eval_anthropic_toxic_prompts_num_pred_words": 33.19, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 17.221888366963853, + "eval_anthropic_toxic_prompts_pred_num_tokens": 46.6640625, + "eval_anthropic_toxic_prompts_rouge_score": 0.29269005770970935, + "eval_anthropic_toxic_prompts_runtime": 6.7114, + "eval_anthropic_toxic_prompts_samples_per_second": 74.5, + "eval_anthropic_toxic_prompts_steps_per_second": 0.149, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3702121335287552, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065356531821970775, + "eval_anthropic_toxic_prompts_token_set_precision": 0.46274202329252623, + "eval_anthropic_toxic_prompts_token_set_recall": 0.33499300377237795, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 155 + }, + { + "epoch": 0.68, + "eval_arxiv_accuracy": 0.4100625, + "eval_arxiv_bleu_score": 3.9930895970733804, + "eval_arxiv_bleu_score_sem": 0.12444908662542796, + "eval_arxiv_emb_cos_sim": 0.7626603841781616, + "eval_arxiv_emb_cos_sim_sem": 0.005201230139138295, + "eval_arxiv_emb_top1_equal": 0.22599999606609344, + "eval_arxiv_emb_top1_equal_sem": 0.018722957089283943, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.02276873588562, + "eval_arxiv_n_ngrams_match_1": 13.88, + "eval_arxiv_n_ngrams_match_2": 2.696, + "eval_arxiv_n_ngrams_match_3": 0.612, + "eval_arxiv_num_pred_words": 31.28, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.548105273439415, + "eval_arxiv_pred_num_tokens": 50.9765625, + "eval_arxiv_rouge_score": 0.37042341511355137, + "eval_arxiv_runtime": 7.071, + "eval_arxiv_samples_per_second": 70.712, + "eval_arxiv_steps_per_second": 0.141, + "eval_arxiv_token_set_f1": 0.3713592970804077, + "eval_arxiv_token_set_f1_sem": 0.0049161836131566845, + "eval_arxiv_token_set_precision": 0.30512914849944406, + "eval_arxiv_token_set_recall": 0.5025705117271739, + "eval_arxiv_true_num_tokens": 64.0, + "step": 155 + }, + { + "epoch": 0.68, + "eval_python_code_alpaca_accuracy": 0.16328125, + "eval_python_code_alpaca_bleu_score": 7.516714220519165, + "eval_python_code_alpaca_bleu_score_sem": 0.2405822858298543, + "eval_python_code_alpaca_emb_cos_sim": 0.8032925128936768, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.00434053064781819, + "eval_python_code_alpaca_emb_top1_equal": 0.2160000056028366, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018421909471797383, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.440660238265991, + "eval_python_code_alpaca_n_ngrams_match_1": 9.808, + "eval_python_code_alpaca_n_ngrams_match_2": 3.082, + "eval_python_code_alpaca_n_ngrams_match_3": 1.114, + "eval_python_code_alpaca_num_pred_words": 28.448, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.48061818450373, + "eval_python_code_alpaca_pred_num_tokens": 44.390625, + "eval_python_code_alpaca_rouge_score": 0.45539690730460003, + "eval_python_code_alpaca_runtime": 6.7768, + "eval_python_code_alpaca_samples_per_second": 73.781, + "eval_python_code_alpaca_steps_per_second": 0.148, + "eval_python_code_alpaca_token_set_f1": 0.5142372467494214, + "eval_python_code_alpaca_token_set_f1_sem": 0.005921915329945734, + "eval_python_code_alpaca_token_set_precision": 0.557454862667378, + "eval_python_code_alpaca_token_set_recall": 0.49660464965394135, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 155 + }, + { + "epoch": 0.68, + "eval_wikibio_accuracy": 0.35134375, + "eval_wikibio_bleu_score": 5.640416639205569, + "eval_wikibio_bleu_score_sem": 0.22432806451151702, + "eval_wikibio_emb_cos_sim": 0.7272863984107971, + "eval_wikibio_emb_cos_sim_sem": 0.006542096621932352, + "eval_wikibio_emb_top1_equal": 0.1720000058412552, + "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.401550769805908, + "eval_wikibio_n_ngrams_match_1": 8.604, + "eval_wikibio_n_ngrams_match_2": 2.838, + "eval_wikibio_n_ngrams_match_3": 1.082, + "eval_wikibio_num_pred_words": 29.89, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 30.010603517780968, + "eval_wikibio_pred_num_tokens": 56.3046875, + "eval_wikibio_rouge_score": 0.3404123434832165, + "eval_wikibio_runtime": 8.3562, + "eval_wikibio_samples_per_second": 59.835, + "eval_wikibio_steps_per_second": 0.12, + "eval_wikibio_token_set_f1": 0.29415853483245885, + "eval_wikibio_token_set_f1_sem": 0.006476544753051358, + "eval_wikibio_token_set_precision": 0.28791783614238897, + "eval_wikibio_token_set_recall": 0.33035313035858804, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 155 + }, + { + "epoch": 0.68, + "eval_bias-bios_accuracy": 0.4783125, + "eval_bias-bios_bleu_score": 15.409677713634471, + "eval_bias-bios_bleu_score_sem": 0.6793448382828893, + "eval_bias-bios_emb_cos_sim": 0.8709923624992371, + "eval_bias-bios_emb_cos_sim_sem": 0.0027106197477141454, + "eval_bias-bios_emb_top1_equal": 0.31200000643730164, + "eval_bias-bios_emb_top1_equal_sem": 0.0207405942792578, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.9928849935531616, + "eval_bias-bios_n_ngrams_match_1": 20.156, + "eval_bias-bios_n_ngrams_match_2": 8.406, + "eval_bias-bios_n_ngrams_match_3": 4.272, + "eval_bias-bios_num_pred_words": 38.084, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 7.336669503630765, + "eval_bias-bios_pred_num_tokens": 49.7421875, + "eval_bias-bios_rouge_score": 0.5228778033893537, + "eval_bias-bios_runtime": 56.2212, + "eval_bias-bios_samples_per_second": 8.893, + "eval_bias-bios_steps_per_second": 0.018, + "eval_bias-bios_token_set_f1": 0.5380957539166968, + "eval_bias-bios_token_set_f1_sem": 0.006057211310424036, + "eval_bias-bios_token_set_precision": 0.5007483299431186, + "eval_bias-bios_token_set_recall": 0.5938445162067718, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 155 + }, + { + "epoch": 0.69, + "learning_rate": 0.001, + "loss": 2.2312, + "step": 156 + }, + { + "epoch": 0.74, + "learning_rate": 0.001, + "loss": 2.303, + "step": 168 + }, + { + "epoch": 0.79, + "learning_rate": 0.001, + "loss": 2.2642, + "step": 180 + }, + { + "epoch": 0.82, + "eval_ag_news_accuracy": 0.3155, + "eval_ag_news_bleu_score": 4.6238494026674, + "eval_ag_news_bleu_score_sem": 0.16422759660282926, + "eval_ag_news_emb_cos_sim": 0.7977774739265442, + "eval_ag_news_emb_cos_sim_sem": 0.004886535312212688, + "eval_ag_news_emb_top1_equal": 0.2619999945163727, + "eval_ag_news_emb_top1_equal_sem": 0.019684691179025708, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.440540075302124, + "eval_ag_news_n_ngrams_match_1": 12.182, + "eval_ag_news_n_ngrams_match_2": 2.652, + "eval_ag_news_n_ngrams_match_3": 0.724, + "eval_ag_news_num_pred_words": 34.344, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 31.203806023310214, + "eval_ag_news_pred_num_tokens": 51.265625, + "eval_ag_news_rouge_score": 0.3575247504591853, + "eval_ag_news_runtime": 7.0458, + "eval_ag_news_samples_per_second": 70.964, + "eval_ag_news_steps_per_second": 0.142, + "eval_ag_news_token_set_f1": 0.3476905400092977, + "eval_ag_news_token_set_f1_sem": 0.0050474775376901995, + "eval_ag_news_token_set_precision": 0.30283073407725536, + "eval_ag_news_token_set_recall": 0.43549270766889003, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 186 + }, + { + "epoch": 0.82, + "eval_anthropic_toxic_prompts_accuracy": 0.1125625, + "eval_anthropic_toxic_prompts_bleu_score": 4.581247855788949, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18571587878474477, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6939055323600769, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004781795528748738, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1340000033378601, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015249692640233114, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.7806220054626465, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.726, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.83, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.678, + "eval_anthropic_toxic_prompts_num_pred_words": 32.324, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 16.129050185906753, + "eval_anthropic_toxic_prompts_pred_num_tokens": 47.3125, + "eval_anthropic_toxic_prompts_rouge_score": 0.27780555495384585, + "eval_anthropic_toxic_prompts_runtime": 6.8387, + "eval_anthropic_toxic_prompts_samples_per_second": 73.113, + "eval_anthropic_toxic_prompts_steps_per_second": 0.146, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35234555947824386, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006273855655020789, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4208779132955721, + "eval_anthropic_toxic_prompts_token_set_recall": 0.33397278331070135, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 186 + }, + { + "epoch": 0.82, + "eval_arxiv_accuracy": 0.41828125, + "eval_arxiv_bleu_score": 3.7199294246295924, + "eval_arxiv_bleu_score_sem": 0.12395209188721983, + "eval_arxiv_emb_cos_sim": 0.7246992588043213, + "eval_arxiv_emb_cos_sim_sem": 0.00647072716105568, + "eval_arxiv_emb_top1_equal": 0.1860000044107437, + "eval_arxiv_emb_top1_equal_sem": 0.017418806591218323, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0102925300598145, + "eval_arxiv_n_ngrams_match_1": 13.088, + "eval_arxiv_n_ngrams_match_2": 2.46, + "eval_arxiv_n_ngrams_match_3": 0.548, + "eval_arxiv_num_pred_words": 31.562, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.293335467674506, + "eval_arxiv_pred_num_tokens": 54.765625, + "eval_arxiv_rouge_score": 0.35168978112341753, + "eval_arxiv_runtime": 7.2086, + "eval_arxiv_samples_per_second": 69.362, + "eval_arxiv_steps_per_second": 0.139, + "eval_arxiv_token_set_f1": 0.3489796464277637, + "eval_arxiv_token_set_f1_sem": 0.005302434445957346, + "eval_arxiv_token_set_precision": 0.2804699900490891, + "eval_arxiv_token_set_recall": 0.5019397954958496, + "eval_arxiv_true_num_tokens": 64.0, + "step": 186 + }, + { + "epoch": 0.82, + "eval_python_code_alpaca_accuracy": 0.1608125, + "eval_python_code_alpaca_bleu_score": 6.1128485412798765, + "eval_python_code_alpaca_bleu_score_sem": 0.2066307880111669, + "eval_python_code_alpaca_emb_cos_sim": 0.7717223763465881, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0050387301194260315, + "eval_python_code_alpaca_emb_top1_equal": 0.21400000154972076, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018359796975924752, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.430375814437866, + "eval_python_code_alpaca_n_ngrams_match_1": 9.032, + "eval_python_code_alpaca_n_ngrams_match_2": 2.526, + "eval_python_code_alpaca_n_ngrams_match_3": 0.822, + "eval_python_code_alpaca_num_pred_words": 29.698, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.363151714129962, + "eval_python_code_alpaca_pred_num_tokens": 48.4140625, + "eval_python_code_alpaca_rouge_score": 0.4158035294654079, + "eval_python_code_alpaca_runtime": 7.3079, + "eval_python_code_alpaca_samples_per_second": 68.419, + "eval_python_code_alpaca_steps_per_second": 0.137, + "eval_python_code_alpaca_token_set_f1": 0.48281370346698166, + "eval_python_code_alpaca_token_set_f1_sem": 0.0060842657633341036, + "eval_python_code_alpaca_token_set_precision": 0.5093156211832727, + "eval_python_code_alpaca_token_set_recall": 0.4823383370687486, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 186 + }, + { + "epoch": 0.82, + "eval_wikibio_accuracy": 0.3616875, + "eval_wikibio_bleu_score": 5.537482915642417, + "eval_wikibio_bleu_score_sem": 0.23603302250627642, + "eval_wikibio_emb_cos_sim": 0.699407160282135, + "eval_wikibio_emb_cos_sim_sem": 0.007551687574736549, + "eval_wikibio_emb_top1_equal": 0.15000000596046448, + "eval_wikibio_emb_top1_equal_sem": 0.01598471338779901, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.424071788787842, + "eval_wikibio_n_ngrams_match_1": 8.252, + "eval_wikibio_n_ngrams_match_2": 2.666, + "eval_wikibio_n_ngrams_match_3": 1.03, + "eval_wikibio_num_pred_words": 29.136, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 30.694140978938165, + "eval_wikibio_pred_num_tokens": 57.2421875, + "eval_wikibio_rouge_score": 0.3120185963272658, + "eval_wikibio_runtime": 7.0014, + "eval_wikibio_samples_per_second": 71.415, + "eval_wikibio_steps_per_second": 0.143, + "eval_wikibio_token_set_f1": 0.27915879560872614, + "eval_wikibio_token_set_f1_sem": 0.006843179479202318, + "eval_wikibio_token_set_precision": 0.2695843454340866, + "eval_wikibio_token_set_recall": 0.31745052828855647, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 186 + }, + { + "epoch": 0.82, + "eval_bias-bios_accuracy": 0.49115625, + "eval_bias-bios_bleu_score": 15.202837676179207, + "eval_bias-bios_bleu_score_sem": 0.6997835059838426, + "eval_bias-bios_emb_cos_sim": 0.855296790599823, + "eval_bias-bios_emb_cos_sim_sem": 0.003214230031382842, + "eval_bias-bios_emb_top1_equal": 0.26600000262260437, + "eval_bias-bios_emb_top1_equal_sem": 0.01978055817719369, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.9403151273727417, + "eval_bias-bios_n_ngrams_match_1": 19.532, + "eval_bias-bios_n_ngrams_match_2": 8.136, + "eval_bias-bios_n_ngrams_match_3": 4.216, + "eval_bias-bios_num_pred_words": 36.87, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.960944209104801, + "eval_bias-bios_pred_num_tokens": 50.6484375, + "eval_bias-bios_rouge_score": 0.5109369957489318, + "eval_bias-bios_runtime": 7.1885, + "eval_bias-bios_samples_per_second": 69.556, + "eval_bias-bios_steps_per_second": 0.139, + "eval_bias-bios_token_set_f1": 0.5280290804881462, + "eval_bias-bios_token_set_f1_sem": 0.006173417362359757, + "eval_bias-bios_token_set_precision": 0.4782727189071511, + "eval_bias-bios_token_set_recall": 0.6061540264575703, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 186 + }, + { + "epoch": 0.85, + "learning_rate": 0.001, + "loss": 2.0709, + "step": 192 + }, + { + "epoch": 0.9, + "learning_rate": 0.001, + "loss": 2.117, + "step": 204 + }, + { + "epoch": 0.95, + "learning_rate": 0.001, + "loss": 2.2981, + "step": 216 + }, + { + "epoch": 0.96, + "eval_ag_news_accuracy": 0.30978125, + "eval_ag_news_bleu_score": 4.457379225242967, + "eval_ag_news_bleu_score_sem": 0.14593041092341294, + "eval_ag_news_emb_cos_sim": 0.8129716515541077, + "eval_ag_news_emb_cos_sim_sem": 0.004289844915959979, + "eval_ag_news_emb_top1_equal": 0.2540000081062317, + "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.4657886028289795, + "eval_ag_news_n_ngrams_match_1": 13.404, + "eval_ag_news_n_ngrams_match_2": 2.836, + "eval_ag_news_n_ngrams_match_3": 0.748, + "eval_ag_news_num_pred_words": 42.124, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.001686445373565, + "eval_ag_news_pred_num_tokens": 62.734375, + "eval_ag_news_rouge_score": 0.357087203201098, + "eval_ag_news_runtime": 7.1579, + "eval_ag_news_samples_per_second": 69.853, + "eval_ag_news_steps_per_second": 0.14, + "eval_ag_news_token_set_f1": 0.3502126568889519, + "eval_ag_news_token_set_f1_sem": 0.004873482067898881, + "eval_ag_news_token_set_precision": 0.3262434355520826, + "eval_ag_news_token_set_recall": 0.4070500176528214, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 217 + }, + { + "epoch": 0.96, + "eval_anthropic_toxic_prompts_accuracy": 0.10990625, + "eval_anthropic_toxic_prompts_bleu_score": 3.6506017226905993, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13345084858914963, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7062422633171082, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004333991929178808, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1420000046491623, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015625630310786714, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.999067783355713, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.474, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.128, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.834, + "eval_anthropic_toxic_prompts_num_pred_words": 44.49, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.066821576092806, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.2265625, + "eval_anthropic_toxic_prompts_rouge_score": 0.24249253007085042, + "eval_anthropic_toxic_prompts_runtime": 7.1104, + "eval_anthropic_toxic_prompts_samples_per_second": 70.32, + "eval_anthropic_toxic_prompts_steps_per_second": 0.141, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3484126557045319, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00591580762460389, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4671694321444695, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3032982618338858, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 217 + }, + { + "epoch": 0.96, + "eval_arxiv_accuracy": 0.4244375, + "eval_arxiv_bleu_score": 4.147123947233683, + "eval_arxiv_bleu_score_sem": 0.1266643362812505, + "eval_arxiv_emb_cos_sim": 0.7450116276741028, + "eval_arxiv_emb_cos_sim_sem": 0.005653072123844248, + "eval_arxiv_emb_top1_equal": 0.24400000274181366, + "eval_arxiv_emb_top1_equal_sem": 0.0192267343061996, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.9606425762176514, + "eval_arxiv_n_ngrams_match_1": 14.408, + "eval_arxiv_n_ngrams_match_2": 2.74, + "eval_arxiv_n_ngrams_match_3": 0.596, + "eval_arxiv_num_pred_words": 37.866, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.310376158162004, + "eval_arxiv_pred_num_tokens": 62.9375, + "eval_arxiv_rouge_score": 0.3543904040906273, + "eval_arxiv_runtime": 7.2195, + "eval_arxiv_samples_per_second": 69.257, + "eval_arxiv_steps_per_second": 0.139, + "eval_arxiv_token_set_f1": 0.35833270808713374, + "eval_arxiv_token_set_f1_sem": 0.004897445521183248, + "eval_arxiv_token_set_precision": 0.30320829802083726, + "eval_arxiv_token_set_recall": 0.4739172424026966, + "eval_arxiv_true_num_tokens": 64.0, + "step": 217 + }, + { + "epoch": 0.96, + "eval_python_code_alpaca_accuracy": 0.1595625, + "eval_python_code_alpaca_bleu_score": 5.186692098353156, + "eval_python_code_alpaca_bleu_score_sem": 0.15960643693191615, + "eval_python_code_alpaca_emb_cos_sim": 0.7835527658462524, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003771730432211162, + "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346174301986407, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.6080775260925293, + "eval_python_code_alpaca_n_ngrams_match_1": 10.23, + "eval_python_code_alpaca_n_ngrams_match_2": 3.044, + "eval_python_code_alpaca_n_ngrams_match_3": 1.038, + "eval_python_code_alpaca_num_pred_words": 40.37, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.572932145711775, + "eval_python_code_alpaca_pred_num_tokens": 62.0703125, + "eval_python_code_alpaca_rouge_score": 0.37938408557690495, + "eval_python_code_alpaca_runtime": 7.0219, + "eval_python_code_alpaca_samples_per_second": 71.206, + "eval_python_code_alpaca_steps_per_second": 0.142, + "eval_python_code_alpaca_token_set_f1": 0.4818092806192162, + "eval_python_code_alpaca_token_set_f1_sem": 0.005350169286529162, + "eval_python_code_alpaca_token_set_precision": 0.5706438293206109, + "eval_python_code_alpaca_token_set_recall": 0.4376943035920839, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 217 + }, + { + "epoch": 0.96, + "eval_wikibio_accuracy": 0.36484375, + "eval_wikibio_bleu_score": 5.0133655577159395, + "eval_wikibio_bleu_score_sem": 0.21321050674285638, + "eval_wikibio_emb_cos_sim": 0.7070615291595459, + "eval_wikibio_emb_cos_sim_sem": 0.006583151538281355, + "eval_wikibio_emb_top1_equal": 0.16599999368190765, + "eval_wikibio_emb_top1_equal_sem": 0.01665661404240883, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3749570846557617, + "eval_wikibio_n_ngrams_match_1": 8.646, + "eval_wikibio_n_ngrams_match_2": 2.724, + "eval_wikibio_n_ngrams_match_3": 1.032, + "eval_wikibio_num_pred_words": 32.776, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 29.22302963794754, + "eval_wikibio_pred_num_tokens": 62.9453125, + "eval_wikibio_rouge_score": 0.302933650392108, + "eval_wikibio_runtime": 7.0614, + "eval_wikibio_samples_per_second": 70.807, + "eval_wikibio_steps_per_second": 0.142, + "eval_wikibio_token_set_f1": 0.2754425983911362, + "eval_wikibio_token_set_f1_sem": 0.006761952543645421, + "eval_wikibio_token_set_precision": 0.27522435313018684, + "eval_wikibio_token_set_recall": 0.30822619478242524, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 217 + }, + { + "epoch": 0.96, + "eval_bias-bios_accuracy": 0.48771875, + "eval_bias-bios_bleu_score": 15.05513983287074, + "eval_bias-bios_bleu_score_sem": 0.5941410638440162, + "eval_bias-bios_emb_cos_sim": 0.869806706905365, + "eval_bias-bios_emb_cos_sim_sem": 0.0030029188635396996, + "eval_bias-bios_emb_top1_equal": 0.33000001311302185, + "eval_bias-bios_emb_top1_equal_sem": 0.021049612042986412, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.9613577127456665, + "eval_bias-bios_n_ngrams_match_1": 21.756, + "eval_bias-bios_n_ngrams_match_2": 9.298, + "eval_bias-bios_n_ngrams_match_3": 4.902, + "eval_bias-bios_num_pred_words": 47.754, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 7.108972458353611, + "eval_bias-bios_pred_num_tokens": 62.640625, + "eval_bias-bios_rouge_score": 0.4947836024474072, + "eval_bias-bios_runtime": 8.4086, + "eval_bias-bios_samples_per_second": 59.463, + "eval_bias-bios_steps_per_second": 0.119, + "eval_bias-bios_token_set_f1": 0.5331869447306742, + "eval_bias-bios_token_set_f1_sem": 0.0060534508998137575, + "eval_bias-bios_token_set_precision": 0.519431249760555, + "eval_bias-bios_token_set_recall": 0.5606207070026014, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 217 + }, + { + "epoch": 1.0, + "learning_rate": 0.001, + "loss": 2.0476, + "step": 228 + }, + { + "epoch": 1.06, + "learning_rate": 0.001, + "loss": 2.2821, + "step": 240 + }, + { + "epoch": 1.09, + "eval_ag_news_accuracy": 0.30928125, + "eval_ag_news_bleu_score": 4.791953510451809, + "eval_ag_news_bleu_score_sem": 0.15930662519567532, + "eval_ag_news_emb_cos_sim": 0.8136026263237, + "eval_ag_news_emb_cos_sim_sem": 0.004870167857208421, + "eval_ag_news_emb_top1_equal": 0.2800000011920929, + "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.455613851547241, + "eval_ag_news_n_ngrams_match_1": 13.48, + "eval_ag_news_n_ngrams_match_2": 2.948, + "eval_ag_news_n_ngrams_match_3": 0.85, + "eval_ag_news_num_pred_words": 41.558, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 31.67772813762492, + "eval_ag_news_pred_num_tokens": 62.875, + "eval_ag_news_rouge_score": 0.36344000370285456, + "eval_ag_news_runtime": 7.2817, + "eval_ag_news_samples_per_second": 68.665, + "eval_ag_news_steps_per_second": 0.137, + "eval_ag_news_token_set_f1": 0.3561913692182338, + "eval_ag_news_token_set_f1_sem": 0.005130586740939777, + "eval_ag_news_token_set_precision": 0.3272258207777092, + "eval_ag_news_token_set_recall": 0.42120484031248273, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 248 + }, + { + "epoch": 1.09, + "eval_anthropic_toxic_prompts_accuracy": 0.10884375, + "eval_anthropic_toxic_prompts_bleu_score": 3.65233656559296, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12890462494559987, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.698917031288147, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004542096712519974, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.9917824268341064, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.254, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.144, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.786, + "eval_anthropic_toxic_prompts_num_pred_words": 43.646, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 19.921158873287087, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.75, + "eval_anthropic_toxic_prompts_rouge_score": 0.2387839420169841, + "eval_anthropic_toxic_prompts_runtime": 53.6519, + "eval_anthropic_toxic_prompts_samples_per_second": 9.319, + "eval_anthropic_toxic_prompts_steps_per_second": 0.019, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34714249758448196, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006222371501576447, + "eval_anthropic_toxic_prompts_token_set_precision": 0.45275869291025683, + "eval_anthropic_toxic_prompts_token_set_recall": 0.31389262373602134, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 248 + }, + { + "epoch": 1.09, + "eval_arxiv_accuracy": 0.423125, + "eval_arxiv_bleu_score": 4.093781013555646, + "eval_arxiv_bleu_score_sem": 0.12570296310978762, + "eval_arxiv_emb_cos_sim": 0.7403872609138489, + "eval_arxiv_emb_cos_sim_sem": 0.00561281955332095, + "eval_arxiv_emb_top1_equal": 0.23999999463558197, + "eval_arxiv_emb_top1_equal_sem": 0.019118866773455794, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.958312749862671, + "eval_arxiv_n_ngrams_match_1": 13.988, + "eval_arxiv_n_ngrams_match_2": 2.676, + "eval_arxiv_n_ngrams_match_3": 0.594, + "eval_arxiv_num_pred_words": 36.748, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.265438703424365, + "eval_arxiv_pred_num_tokens": 62.96875, + "eval_arxiv_rouge_score": 0.3475298145049057, + "eval_arxiv_runtime": 7.4776, + "eval_arxiv_samples_per_second": 66.866, + "eval_arxiv_steps_per_second": 0.134, + "eval_arxiv_token_set_f1": 0.3533324229050544, + "eval_arxiv_token_set_f1_sem": 0.004971204765366318, + "eval_arxiv_token_set_precision": 0.2929452599792598, + "eval_arxiv_token_set_recall": 0.4866153026712255, + "eval_arxiv_true_num_tokens": 64.0, + "step": 248 + }, + { + "epoch": 1.09, + "eval_python_code_alpaca_accuracy": 0.15734375, + "eval_python_code_alpaca_bleu_score": 5.042946138157769, + "eval_python_code_alpaca_bleu_score_sem": 0.160790946633052, + "eval_python_code_alpaca_emb_cos_sim": 0.7753879427909851, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004412812077351404, + "eval_python_code_alpaca_emb_top1_equal": 0.17399999499320984, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.016971269551723376, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.589085102081299, + "eval_python_code_alpaca_n_ngrams_match_1": 9.824, + "eval_python_code_alpaca_n_ngrams_match_2": 2.964, + "eval_python_code_alpaca_n_ngrams_match_3": 0.972, + "eval_python_code_alpaca_num_pred_words": 39.166, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.31758179996233, + "eval_python_code_alpaca_pred_num_tokens": 62.5546875, + "eval_python_code_alpaca_rouge_score": 0.3776958727600159, + "eval_python_code_alpaca_runtime": 7.7405, + "eval_python_code_alpaca_samples_per_second": 64.595, + "eval_python_code_alpaca_steps_per_second": 0.129, + "eval_python_code_alpaca_token_set_f1": 0.47429047711353645, + "eval_python_code_alpaca_token_set_f1_sem": 0.005526526442278936, + "eval_python_code_alpaca_token_set_precision": 0.546630656850991, + "eval_python_code_alpaca_token_set_recall": 0.44059115919441477, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 248 + }, + { + "epoch": 1.09, + "eval_wikibio_accuracy": 0.36778125, + "eval_wikibio_bleu_score": 4.660533381235605, + "eval_wikibio_bleu_score_sem": 0.2076411844668876, + "eval_wikibio_emb_cos_sim": 0.6815629005432129, + "eval_wikibio_emb_cos_sim_sem": 0.007799395854818424, + "eval_wikibio_emb_top1_equal": 0.15399999916553497, + "eval_wikibio_emb_top1_equal_sem": 0.016158283980625493, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.345656156539917, + "eval_wikibio_n_ngrams_match_1": 7.7, + "eval_wikibio_n_ngrams_match_2": 2.462, + "eval_wikibio_n_ngrams_match_3": 0.938, + "eval_wikibio_num_pred_words": 30.724, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.37919075038963, + "eval_wikibio_pred_num_tokens": 63.0, + "eval_wikibio_rouge_score": 0.2744365265889154, + "eval_wikibio_runtime": 7.0186, + "eval_wikibio_samples_per_second": 71.24, + "eval_wikibio_steps_per_second": 0.142, + "eval_wikibio_token_set_f1": 0.25223998792231406, + "eval_wikibio_token_set_f1_sem": 0.007338838842555383, + "eval_wikibio_token_set_precision": 0.24746688907945488, + "eval_wikibio_token_set_recall": 0.2920593541010084, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 248 + }, + { + "epoch": 1.09, + "eval_bias-bios_accuracy": 0.49, + "eval_bias-bios_bleu_score": 15.187181809196383, + "eval_bias-bios_bleu_score_sem": 0.6108940925547736, + "eval_bias-bios_emb_cos_sim": 0.8737805485725403, + "eval_bias-bios_emb_cos_sim_sem": 0.002709689119852354, + "eval_bias-bios_emb_top1_equal": 0.30799999833106995, + "eval_bias-bios_emb_top1_equal_sem": 0.020667033028164562, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.9598242044448853, + "eval_bias-bios_n_ngrams_match_1": 21.678, + "eval_bias-bios_n_ngrams_match_2": 9.286, + "eval_bias-bios_n_ngrams_match_3": 4.858, + "eval_bias-bios_num_pred_words": 46.952, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 7.0980791447069205, + "eval_bias-bios_pred_num_tokens": 62.9296875, + "eval_bias-bios_rouge_score": 0.5001935759082592, + "eval_bias-bios_runtime": 7.964, + "eval_bias-bios_samples_per_second": 62.783, + "eval_bias-bios_steps_per_second": 0.126, + "eval_bias-bios_token_set_f1": 0.5352723142499153, + "eval_bias-bios_token_set_f1_sem": 0.006176020693455797, + "eval_bias-bios_token_set_precision": 0.5163630183545604, + "eval_bias-bios_token_set_recall": 0.569456990893491, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 248 + }, + { + "epoch": 1.11, + "learning_rate": 0.001, + "loss": 2.2284, + "step": 252 + }, + { + "epoch": 1.16, + "learning_rate": 0.001, + "loss": 2.1384, + "step": 264 + }, + { + "epoch": 1.22, + "learning_rate": 0.001, + "loss": 1.9197, + "step": 276 + }, + { + "epoch": 1.23, + "eval_ag_news_accuracy": 0.3075625, + "eval_ag_news_bleu_score": 4.12785289110093, + "eval_ag_news_bleu_score_sem": 0.15900310936221984, + "eval_ag_news_emb_cos_sim": 0.7948484420776367, + "eval_ag_news_emb_cos_sim_sem": 0.004320750557453928, + "eval_ag_news_emb_top1_equal": 0.25200000405311584, + "eval_ag_news_emb_top1_equal_sem": 0.019435728067390842, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.555957794189453, + "eval_ag_news_n_ngrams_match_1": 10.78, + "eval_ag_news_n_ngrams_match_2": 2.354, + "eval_ag_news_n_ngrams_match_3": 0.628, + "eval_ag_news_num_pred_words": 25.994, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.02134715186829, + "eval_ag_news_pred_num_tokens": 38.1875, + "eval_ag_news_rouge_score": 0.35369982193370303, + "eval_ag_news_runtime": 6.9396, + "eval_ag_news_samples_per_second": 72.05, + "eval_ag_news_steps_per_second": 0.144, + "eval_ag_news_token_set_f1": 0.34152207043374166, + "eval_ag_news_token_set_f1_sem": 0.004979529346488383, + "eval_ag_news_token_set_precision": 0.2837138845906317, + "eval_ag_news_token_set_recall": 0.4584907965605788, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 279 + }, + { + "epoch": 1.23, + "eval_anthropic_toxic_prompts_accuracy": 0.11478125, + "eval_anthropic_toxic_prompts_bleu_score": 6.3220699630707715, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.25242026056820677, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7130110263824463, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004697909073150958, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17599999904632568, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017047853594066943, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.773521900177002, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.608, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.842, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, + "eval_anthropic_toxic_prompts_num_pred_words": 23.556, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 16.014937815731553, + "eval_anthropic_toxic_prompts_pred_num_tokens": 32.7109375, + "eval_anthropic_toxic_prompts_rouge_score": 0.3300720544583728, + "eval_anthropic_toxic_prompts_runtime": 6.6988, + "eval_anthropic_toxic_prompts_samples_per_second": 74.64, + "eval_anthropic_toxic_prompts_steps_per_second": 0.149, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3603427310230735, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064971793770156085, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4225042332482681, + "eval_anthropic_toxic_prompts_token_set_recall": 0.34628301551963964, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 279 + }, + { + "epoch": 1.23, + "eval_arxiv_accuracy": 0.40384375, + "eval_arxiv_bleu_score": 3.082662931867365, + "eval_arxiv_bleu_score_sem": 0.09880748146822564, + "eval_arxiv_emb_cos_sim": 0.7372804284095764, + "eval_arxiv_emb_cos_sim_sem": 0.005092747519021461, + "eval_arxiv_emb_top1_equal": 0.11599999666213989, + "eval_arxiv_emb_top1_equal_sem": 0.014335236978191066, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.1328988075256348, + "eval_arxiv_n_ngrams_match_1": 11.846, + "eval_arxiv_n_ngrams_match_2": 2.19, + "eval_arxiv_n_ngrams_match_3": 0.468, + "eval_arxiv_num_pred_words": 25.296, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.940383005409256, + "eval_arxiv_pred_num_tokens": 41.46875, + "eval_arxiv_rouge_score": 0.34774279797636015, + "eval_arxiv_runtime": 7.0058, + "eval_arxiv_samples_per_second": 71.369, + "eval_arxiv_steps_per_second": 0.143, + "eval_arxiv_token_set_f1": 0.35113133933307145, + "eval_arxiv_token_set_f1_sem": 0.004619234430328476, + "eval_arxiv_token_set_precision": 0.2752955816735799, + "eval_arxiv_token_set_recall": 0.5065531056992818, + "eval_arxiv_true_num_tokens": 64.0, + "step": 279 + }, + { + "epoch": 1.23, + "eval_python_code_alpaca_accuracy": 0.1686875, + "eval_python_code_alpaca_bleu_score": 8.305728890617798, + "eval_python_code_alpaca_bleu_score_sem": 0.2805659938677739, + "eval_python_code_alpaca_emb_cos_sim": 0.8004173040390015, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004346704512354936, + "eval_python_code_alpaca_emb_top1_equal": 0.2199999988079071, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01854420989980125, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.3644516468048096, + "eval_python_code_alpaca_n_ngrams_match_1": 9.022, + "eval_python_code_alpaca_n_ngrams_match_2": 2.61, + "eval_python_code_alpaca_n_ngrams_match_3": 0.888, + "eval_python_code_alpaca_num_pred_words": 22.57, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 10.638203724806774, + "eval_python_code_alpaca_pred_num_tokens": 35.8203125, + "eval_python_code_alpaca_rouge_score": 0.47919721432489615, + "eval_python_code_alpaca_runtime": 6.8787, + "eval_python_code_alpaca_samples_per_second": 72.689, + "eval_python_code_alpaca_steps_per_second": 0.145, + "eval_python_code_alpaca_token_set_f1": 0.5057425160250362, + "eval_python_code_alpaca_token_set_f1_sem": 0.005917123864345868, + "eval_python_code_alpaca_token_set_precision": 0.5222648491118239, + "eval_python_code_alpaca_token_set_recall": 0.5106662898011187, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 279 + }, + { + "epoch": 1.23, + "eval_wikibio_accuracy": 0.35221875, + "eval_wikibio_bleu_score": 6.087351841555892, + "eval_wikibio_bleu_score_sem": 0.24588335047545587, + "eval_wikibio_emb_cos_sim": 0.7169853448867798, + "eval_wikibio_emb_cos_sim_sem": 0.0066086913254658555, + "eval_wikibio_emb_top1_equal": 0.15600000321865082, + "eval_wikibio_emb_top1_equal_sem": 0.01624363651663569, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.538841485977173, + "eval_wikibio_n_ngrams_match_1": 8.42, + "eval_wikibio_n_ngrams_match_2": 2.744, + "eval_wikibio_n_ngrams_match_3": 1.036, + "eval_wikibio_num_pred_words": 26.898, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 34.427011902691085, + "eval_wikibio_pred_num_tokens": 47.8671875, + "eval_wikibio_rouge_score": 0.3474562861357006, + "eval_wikibio_runtime": 7.8621, + "eval_wikibio_samples_per_second": 63.597, + "eval_wikibio_steps_per_second": 0.127, + "eval_wikibio_token_set_f1": 0.29907487029588065, + "eval_wikibio_token_set_f1_sem": 0.0062630922566897064, + "eval_wikibio_token_set_precision": 0.2846087659111068, + "eval_wikibio_token_set_recall": 0.33428339505042515, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 279 + }, + { + "epoch": 1.23, + "eval_bias-bios_accuracy": 0.48678125, + "eval_bias-bios_bleu_score": 15.07724608098222, + "eval_bias-bios_bleu_score_sem": 0.731810536667803, + "eval_bias-bios_emb_cos_sim": 0.8541937470436096, + "eval_bias-bios_emb_cos_sim_sem": 0.003292646007841742, + "eval_bias-bios_emb_top1_equal": 0.2540000081062317, + "eval_bias-bios_emb_top1_equal_sem": 0.019486597059300604, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.9884752035140991, + "eval_bias-bios_n_ngrams_match_1": 18.086, + "eval_bias-bios_n_ngrams_match_2": 7.742, + "eval_bias-bios_n_ngrams_match_3": 4.148, + "eval_bias-bios_num_pred_words": 30.046, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 7.304387562139313, + "eval_bias-bios_pred_num_tokens": 39.796875, + "eval_bias-bios_rouge_score": 0.5196057757221662, + "eval_bias-bios_runtime": 7.1371, + "eval_bias-bios_samples_per_second": 70.056, + "eval_bias-bios_steps_per_second": 0.14, + "eval_bias-bios_token_set_f1": 0.5309677565984916, + "eval_bias-bios_token_set_f1_sem": 0.00656729720405154, + "eval_bias-bios_token_set_precision": 0.4679258027534948, + "eval_bias-bios_token_set_recall": 0.6306590632503222, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 279 + }, + { + "epoch": 1.27, + "learning_rate": 0.001, + "loss": 2.2776, + "step": 288 + }, + { + "epoch": 1.32, + "learning_rate": 0.001, + "loss": 2.2042, + "step": 300 + }, + { + "epoch": 1.37, + "eval_ag_news_accuracy": 0.31025, + "eval_ag_news_bleu_score": 4.693319706150456, + "eval_ag_news_bleu_score_sem": 0.15935382004813128, + "eval_ag_news_emb_cos_sim": 0.8091204166412354, + "eval_ag_news_emb_cos_sim_sem": 0.00475525047688145, + "eval_ag_news_emb_top1_equal": 0.2680000066757202, + "eval_ag_news_emb_top1_equal_sem": 0.019827715320059287, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.466686487197876, + "eval_ag_news_n_ngrams_match_1": 13.084, + "eval_ag_news_n_ngrams_match_2": 2.798, + "eval_ag_news_n_ngrams_match_3": 0.768, + "eval_ag_news_num_pred_words": 39.898, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.03043316309415, + "eval_ag_news_pred_num_tokens": 61.5390625, + "eval_ag_news_rouge_score": 0.3608276579045019, + "eval_ag_news_runtime": 8.1216, + "eval_ag_news_samples_per_second": 61.564, + "eval_ag_news_steps_per_second": 0.123, + "eval_ag_news_token_set_f1": 0.3483842640025073, + "eval_ag_news_token_set_f1_sem": 0.005037946090827277, + "eval_ag_news_token_set_precision": 0.31860558977002634, + "eval_ag_news_token_set_recall": 0.414745433715474, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 310 + }, + { + "epoch": 1.37, + "eval_anthropic_toxic_prompts_accuracy": 0.11003125, + "eval_anthropic_toxic_prompts_bleu_score": 3.6853009424635483, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1363010756669311, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6960632801055908, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004544997885736092, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345322399934358, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.927776575088501, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.02, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, + "eval_anthropic_toxic_prompts_num_pred_words": 40.326, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 18.68603727333281, + "eval_anthropic_toxic_prompts_pred_num_tokens": 59.15625, + "eval_anthropic_toxic_prompts_rouge_score": 0.24376940297754532, + "eval_anthropic_toxic_prompts_runtime": 7.3659, + "eval_anthropic_toxic_prompts_samples_per_second": 67.881, + "eval_anthropic_toxic_prompts_steps_per_second": 0.136, + "eval_anthropic_toxic_prompts_token_set_f1": 0.33859596445362705, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006132379569659659, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4414920439613121, + "eval_anthropic_toxic_prompts_token_set_recall": 0.30141584905289376, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 310 + }, + { + "epoch": 1.37, + "eval_arxiv_accuracy": 0.4235, + "eval_arxiv_bleu_score": 4.218543068302892, + "eval_arxiv_bleu_score_sem": 0.13150743222363528, + "eval_arxiv_emb_cos_sim": 0.7446539998054504, + "eval_arxiv_emb_cos_sim_sem": 0.005929241023132271, + "eval_arxiv_emb_top1_equal": 0.25600001215934753, + "eval_arxiv_emb_top1_equal_sem": 0.019536923601457774, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.970463275909424, + "eval_arxiv_n_ngrams_match_1": 14.362, + "eval_arxiv_n_ngrams_match_2": 2.72, + "eval_arxiv_n_ngrams_match_3": 0.63, + "eval_arxiv_num_pred_words": 36.08, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.50095182485385, + "eval_arxiv_pred_num_tokens": 62.0390625, + "eval_arxiv_rouge_score": 0.3580856168544577, + "eval_arxiv_runtime": 7.3514, + "eval_arxiv_samples_per_second": 68.014, + "eval_arxiv_steps_per_second": 0.136, + "eval_arxiv_token_set_f1": 0.3597396879590065, + "eval_arxiv_token_set_f1_sem": 0.004979600128843339, + "eval_arxiv_token_set_precision": 0.30250953382052703, + "eval_arxiv_token_set_recall": 0.47400110874266405, + "eval_arxiv_true_num_tokens": 64.0, + "step": 310 + }, + { + "epoch": 1.37, + "eval_python_code_alpaca_accuracy": 0.15559375, + "eval_python_code_alpaca_bleu_score": 5.160248775704173, + "eval_python_code_alpaca_bleu_score_sem": 0.16995230777879264, + "eval_python_code_alpaca_emb_cos_sim": 0.7683752775192261, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004745081210020772, + "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346174301986407, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.561814069747925, + "eval_python_code_alpaca_n_ngrams_match_1": 9.544, + "eval_python_code_alpaca_n_ngrams_match_2": 2.728, + "eval_python_code_alpaca_n_ngrams_match_3": 0.898, + "eval_python_code_alpaca_num_pred_words": 36.88, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.959305088187559, + "eval_python_code_alpaca_pred_num_tokens": 59.703125, + "eval_python_code_alpaca_rouge_score": 0.37555207515750494, + "eval_python_code_alpaca_runtime": 7.0304, + "eval_python_code_alpaca_samples_per_second": 71.12, + "eval_python_code_alpaca_steps_per_second": 0.142, + "eval_python_code_alpaca_token_set_f1": 0.4665177670255288, + "eval_python_code_alpaca_token_set_f1_sem": 0.0058097335617844715, + "eval_python_code_alpaca_token_set_precision": 0.5313029679560264, + "eval_python_code_alpaca_token_set_recall": 0.4370481057831364, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 310 + }, + { + "epoch": 1.37, + "eval_wikibio_accuracy": 0.36859375, + "eval_wikibio_bleu_score": 4.81258814988866, + "eval_wikibio_bleu_score_sem": 0.22299112466528667, + "eval_wikibio_emb_cos_sim": 0.6779038906097412, + "eval_wikibio_emb_cos_sim_sem": 0.008168129739345551, + "eval_wikibio_emb_top1_equal": 0.12399999797344208, + "eval_wikibio_emb_top1_equal_sem": 0.014754096152018748, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3508071899414062, + "eval_wikibio_n_ngrams_match_1": 7.686, + "eval_wikibio_n_ngrams_match_2": 2.39, + "eval_wikibio_n_ngrams_match_3": 0.942, + "eval_wikibio_num_pred_words": 30.312, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.525750051723897, + "eval_wikibio_pred_num_tokens": 62.8125, + "eval_wikibio_rouge_score": 0.2758010943062876, + "eval_wikibio_runtime": 8.08, + "eval_wikibio_samples_per_second": 61.881, + "eval_wikibio_steps_per_second": 0.124, + "eval_wikibio_token_set_f1": 0.252609745548634, + "eval_wikibio_token_set_f1_sem": 0.0070662065239258855, + "eval_wikibio_token_set_precision": 0.2461256238356199, + "eval_wikibio_token_set_recall": 0.2928348544405085, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 310 + }, + { + "epoch": 1.37, + "eval_bias-bios_accuracy": 0.4968125, + "eval_bias-bios_bleu_score": 16.103834432664616, + "eval_bias-bios_bleu_score_sem": 0.6847879152877013, + "eval_bias-bios_emb_cos_sim": 0.8709802627563477, + "eval_bias-bios_emb_cos_sim_sem": 0.0028683356804806155, + "eval_bias-bios_emb_top1_equal": 0.31200000643730164, + "eval_bias-bios_emb_top1_equal_sem": 0.0207405942792578, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8962446451187134, + "eval_bias-bios_n_ngrams_match_1": 21.612, + "eval_bias-bios_n_ngrams_match_2": 9.308, + "eval_bias-bios_n_ngrams_match_3": 4.906, + "eval_bias-bios_num_pred_words": 45.066, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.660833621539512, + "eval_bias-bios_pred_num_tokens": 60.7890625, + "eval_bias-bios_rouge_score": 0.5095092930898859, + "eval_bias-bios_runtime": 8.1234, + "eval_bias-bios_samples_per_second": 61.55, + "eval_bias-bios_steps_per_second": 0.123, + "eval_bias-bios_token_set_f1": 0.5380991233090037, + "eval_bias-bios_token_set_f1_sem": 0.006292962348520924, + "eval_bias-bios_token_set_precision": 0.5172965980264513, + "eval_bias-bios_token_set_recall": 0.5718470133513807, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 310 + }, + { + "epoch": 1.37, + "learning_rate": 0.001, + "loss": 2.1261, + "step": 312 + }, + { + "epoch": 1.43, + "learning_rate": 0.001, + "loss": 1.9135, + "step": 324 + }, + { + "epoch": 1.48, + "learning_rate": 0.001, + "loss": 2.2064, + "step": 336 + }, + { + "epoch": 1.5, + "eval_ag_news_accuracy": 0.30609375, + "eval_ag_news_bleu_score": 4.719656086935481, + "eval_ag_news_bleu_score_sem": 0.15740510117641943, + "eval_ag_news_emb_cos_sim": 0.804071307182312, + "eval_ag_news_emb_cos_sim_sem": 0.005197311039635828, + "eval_ag_news_emb_top1_equal": 0.25600001215934753, + "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.493675470352173, + "eval_ag_news_n_ngrams_match_1": 13.14, + "eval_ag_news_n_ngrams_match_2": 2.93, + "eval_ag_news_n_ngrams_match_3": 0.822, + "eval_ag_news_num_pred_words": 42.324, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.90667321059251, + "eval_ag_news_pred_num_tokens": 62.5078125, + "eval_ag_news_rouge_score": 0.3529883145442065, + "eval_ag_news_runtime": 19.2463, + "eval_ag_news_samples_per_second": 25.979, + "eval_ag_news_steps_per_second": 0.052, + "eval_ag_news_token_set_f1": 0.34624435301840834, + "eval_ag_news_token_set_f1_sem": 0.005172152165157513, + "eval_ag_news_token_set_precision": 0.31946456007584323, + "eval_ag_news_token_set_recall": 0.4099159459965073, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 341 + }, + { + "epoch": 1.5, + "eval_anthropic_toxic_prompts_accuracy": 0.10834375, + "eval_anthropic_toxic_prompts_bleu_score": 3.655266926251835, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1304988346023646, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6967235207557678, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004537156687930315, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055009156667442, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.007629871368408, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.136, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.8, + "eval_anthropic_toxic_prompts_num_pred_words": 43.484, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.23937311515898, + "eval_anthropic_toxic_prompts_pred_num_tokens": 61.6015625, + "eval_anthropic_toxic_prompts_rouge_score": 0.2394587760880788, + "eval_anthropic_toxic_prompts_runtime": 8.2771, + "eval_anthropic_toxic_prompts_samples_per_second": 60.408, + "eval_anthropic_toxic_prompts_steps_per_second": 0.121, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3446042686958433, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005849076849865448, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4542920684406606, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3043220484487612, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 341 + }, + { + "epoch": 1.5, + "eval_arxiv_accuracy": 0.42690625, + "eval_arxiv_bleu_score": 4.013162696446355, + "eval_arxiv_bleu_score_sem": 0.12361634310105885, + "eval_arxiv_emb_cos_sim": 0.7171130180358887, + "eval_arxiv_emb_cos_sim_sem": 0.006713028936416672, + "eval_arxiv_emb_top1_equal": 0.20000000298023224, + "eval_arxiv_emb_top1_equal_sem": 0.017906459589198134, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.9682939052581787, + "eval_arxiv_n_ngrams_match_1": 13.592, + "eval_arxiv_n_ngrams_match_2": 2.658, + "eval_arxiv_n_ngrams_match_3": 0.58, + "eval_arxiv_num_pred_words": 37.424, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.4586928865171, + "eval_arxiv_pred_num_tokens": 62.859375, + "eval_arxiv_rouge_score": 0.3330393479935816, + "eval_arxiv_runtime": 11.536, + "eval_arxiv_samples_per_second": 43.343, + "eval_arxiv_steps_per_second": 0.087, + "eval_arxiv_token_set_f1": 0.3419093272156702, + "eval_arxiv_token_set_f1_sem": 0.00535918406194554, + "eval_arxiv_token_set_precision": 0.28198100534330306, + "eval_arxiv_token_set_recall": 0.4941405864834983, + "eval_arxiv_true_num_tokens": 64.0, + "step": 341 + }, + { + "epoch": 1.5, + "eval_python_code_alpaca_accuracy": 0.15521875, + "eval_python_code_alpaca_bleu_score": 5.032228005406816, + "eval_python_code_alpaca_bleu_score_sem": 0.16508121574472945, + "eval_python_code_alpaca_emb_cos_sim": 0.7669004201889038, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004512636826232725, + "eval_python_code_alpaca_emb_top1_equal": 0.18799999356269836, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017490679184236527, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.653953790664673, + "eval_python_code_alpaca_n_ngrams_match_1": 9.894, + "eval_python_code_alpaca_n_ngrams_match_2": 2.998, + "eval_python_code_alpaca_n_ngrams_match_3": 0.95, + "eval_python_code_alpaca_num_pred_words": 39.462, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.210111528413647, + "eval_python_code_alpaca_pred_num_tokens": 61.59375, + "eval_python_code_alpaca_rouge_score": 0.37453041601046916, + "eval_python_code_alpaca_runtime": 11.8851, + "eval_python_code_alpaca_samples_per_second": 42.07, + "eval_python_code_alpaca_steps_per_second": 0.084, + "eval_python_code_alpaca_token_set_f1": 0.4676837883490022, + "eval_python_code_alpaca_token_set_f1_sem": 0.005546046178802121, + "eval_python_code_alpaca_token_set_precision": 0.5459422436754255, + "eval_python_code_alpaca_token_set_recall": 0.4287897598195947, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 341 + }, + { + "epoch": 1.5, + "eval_wikibio_accuracy": 0.37015625, + "eval_wikibio_bleu_score": 4.113717211494571, + "eval_wikibio_bleu_score_sem": 0.2014863036543364, + "eval_wikibio_emb_cos_sim": 0.6409357190132141, + "eval_wikibio_emb_cos_sim_sem": 0.008766130611566907, + "eval_wikibio_emb_top1_equal": 0.16200000047683716, + "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.347160577774048, + "eval_wikibio_n_ngrams_match_1": 6.912, + "eval_wikibio_n_ngrams_match_2": 2.12, + "eval_wikibio_n_ngrams_match_3": 0.782, + "eval_wikibio_num_pred_words": 28.338, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.421917138746423, + "eval_wikibio_pred_num_tokens": 62.96875, + "eval_wikibio_rouge_score": 0.24506871939674832, + "eval_wikibio_runtime": 8.7393, + "eval_wikibio_samples_per_second": 57.213, + "eval_wikibio_steps_per_second": 0.114, + "eval_wikibio_token_set_f1": 0.225897167864661, + "eval_wikibio_token_set_f1_sem": 0.0075924411709069875, + "eval_wikibio_token_set_precision": 0.2193463805245025, + "eval_wikibio_token_set_recall": 0.2687629784354538, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 341 + }, + { + "epoch": 1.5, + "eval_bias-bios_accuracy": 0.49153125, + "eval_bias-bios_bleu_score": 15.395719862226972, + "eval_bias-bios_bleu_score_sem": 0.6157561000113725, + "eval_bias-bios_emb_cos_sim": 0.8640764951705933, + "eval_bias-bios_emb_cos_sim_sem": 0.0031932264302743985, + "eval_bias-bios_emb_top1_equal": 0.2980000078678131, + "eval_bias-bios_emb_top1_equal_sem": 0.020475119103777986, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.94288969039917, + "eval_bias-bios_n_ngrams_match_1": 21.756, + "eval_bias-bios_n_ngrams_match_2": 9.39, + "eval_bias-bios_n_ngrams_match_3": 4.956, + "eval_bias-bios_num_pred_words": 47.264, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.978888688379133, + "eval_bias-bios_pred_num_tokens": 62.4921875, + "eval_bias-bios_rouge_score": 0.49805101735783913, + "eval_bias-bios_runtime": 8.6552, + "eval_bias-bios_samples_per_second": 57.769, + "eval_bias-bios_steps_per_second": 0.116, + "eval_bias-bios_token_set_f1": 0.5376376099546539, + "eval_bias-bios_token_set_f1_sem": 0.006270653343759874, + "eval_bias-bios_token_set_precision": 0.5173410887209161, + "eval_bias-bios_token_set_recall": 0.5767708356294744, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 341 + }, + { + "epoch": 1.53, + "learning_rate": 0.001, + "loss": 2.2102, + "step": 348 + }, + { + "epoch": 1.59, + "learning_rate": 0.001, + "loss": 2.1441, + "step": 360 + }, + { + "epoch": 1.64, + "learning_rate": 0.001, + "loss": 1.9089, + "step": 372 + }, + { + "epoch": 1.64, + "eval_ag_news_accuracy": 0.30925, + "eval_ag_news_bleu_score": 4.688042448833678, + "eval_ag_news_bleu_score_sem": 0.16248013966675176, + "eval_ag_news_emb_cos_sim": 0.7975764870643616, + "eval_ag_news_emb_cos_sim_sem": 0.005275002305890863, + "eval_ag_news_emb_top1_equal": 0.257999986410141, + "eval_ag_news_emb_top1_equal_sem": 0.019586711692263472, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.494887113571167, + "eval_ag_news_n_ngrams_match_1": 11.944, + "eval_ag_news_n_ngrams_match_2": 2.586, + "eval_ag_news_n_ngrams_match_3": 0.704, + "eval_ag_news_num_pred_words": 32.084, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.94656852260913, + "eval_ag_news_pred_num_tokens": 48.0078125, + "eval_ag_news_rouge_score": 0.36139502901690423, + "eval_ag_news_runtime": 7.5346, + "eval_ag_news_samples_per_second": 66.361, + "eval_ag_news_steps_per_second": 0.133, + "eval_ag_news_token_set_f1": 0.34298495738014895, + "eval_ag_news_token_set_f1_sem": 0.004847096265450112, + "eval_ag_news_token_set_precision": 0.29882186799241595, + "eval_ag_news_token_set_recall": 0.42795943849577756, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 372 + }, + { + "epoch": 1.64, + "eval_anthropic_toxic_prompts_accuracy": 0.111375, + "eval_anthropic_toxic_prompts_bleu_score": 5.018590065002315, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18062605094910428, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7002917528152466, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005111374737061413, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1420000046491623, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015625630310786714, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.8211557865142822, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.738, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.876, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.718, + "eval_anthropic_toxic_prompts_num_pred_words": 29.236, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 16.796252339831998, + "eval_anthropic_toxic_prompts_pred_num_tokens": 43.2734375, + "eval_anthropic_toxic_prompts_rouge_score": 0.2933406224694952, + "eval_anthropic_toxic_prompts_runtime": 8.6745, + "eval_anthropic_toxic_prompts_samples_per_second": 57.64, + "eval_anthropic_toxic_prompts_steps_per_second": 0.115, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35145152234689003, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065480762736281565, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4267148235801868, + "eval_anthropic_toxic_prompts_token_set_recall": 0.32579150192546075, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 372 + }, + { + "epoch": 1.64, + "eval_arxiv_accuracy": 0.415125, + "eval_arxiv_bleu_score": 3.8688483882338414, + "eval_arxiv_bleu_score_sem": 0.12425377464639385, + "eval_arxiv_emb_cos_sim": 0.7446158528327942, + "eval_arxiv_emb_cos_sim_sem": 0.00546308976040103, + "eval_arxiv_emb_top1_equal": 0.1940000057220459, + "eval_arxiv_emb_top1_equal_sem": 0.017701828083634023, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0469841957092285, + "eval_arxiv_n_ngrams_match_1": 13.512, + "eval_arxiv_n_ngrams_match_2": 2.482, + "eval_arxiv_n_ngrams_match_3": 0.548, + "eval_arxiv_num_pred_words": 31.494, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.051760602600126, + "eval_arxiv_pred_num_tokens": 53.2578125, + "eval_arxiv_rouge_score": 0.359901260786132, + "eval_arxiv_runtime": 7.5017, + "eval_arxiv_samples_per_second": 66.652, + "eval_arxiv_steps_per_second": 0.133, + "eval_arxiv_token_set_f1": 0.35944233136816656, + "eval_arxiv_token_set_f1_sem": 0.004761192847849154, + "eval_arxiv_token_set_precision": 0.2945148751601931, + "eval_arxiv_token_set_recall": 0.4859506595277603, + "eval_arxiv_true_num_tokens": 64.0, + "step": 372 + }, + { + "epoch": 1.64, + "eval_python_code_alpaca_accuracy": 0.16, + "eval_python_code_alpaca_bleu_score": 6.502313122815586, + "eval_python_code_alpaca_bleu_score_sem": 0.2160554941968861, + "eval_python_code_alpaca_emb_cos_sim": 0.7811670303344727, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004759169576383917, + "eval_python_code_alpaca_emb_top1_equal": 0.20399999618530273, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018039369108186407, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.4665610790252686, + "eval_python_code_alpaca_n_ngrams_match_1": 9.412, + "eval_python_code_alpaca_n_ngrams_match_2": 2.678, + "eval_python_code_alpaca_n_ngrams_match_3": 0.86, + "eval_python_code_alpaca_num_pred_words": 28.818, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.78186021830665, + "eval_python_code_alpaca_pred_num_tokens": 46.53125, + "eval_python_code_alpaca_rouge_score": 0.43334761445749104, + "eval_python_code_alpaca_runtime": 26.2085, + "eval_python_code_alpaca_samples_per_second": 19.078, + "eval_python_code_alpaca_steps_per_second": 0.038, + "eval_python_code_alpaca_token_set_f1": 0.48901485926774935, + "eval_python_code_alpaca_token_set_f1_sem": 0.00599859770187912, + "eval_python_code_alpaca_token_set_precision": 0.5291827441521135, + "eval_python_code_alpaca_token_set_recall": 0.4760439727772603, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 372 + }, + { + "epoch": 1.64, + "eval_wikibio_accuracy": 0.357375, + "eval_wikibio_bleu_score": 5.612141235113202, + "eval_wikibio_bleu_score_sem": 0.23968984385259356, + "eval_wikibio_emb_cos_sim": 0.705348789691925, + "eval_wikibio_emb_cos_sim_sem": 0.0076665762998554595, + "eval_wikibio_emb_top1_equal": 0.17599999904632568, + "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4384877681732178, + "eval_wikibio_n_ngrams_match_1": 8.348, + "eval_wikibio_n_ngrams_match_2": 2.648, + "eval_wikibio_n_ngrams_match_3": 0.996, + "eval_wikibio_num_pred_words": 28.77, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 31.139831899489486, + "eval_wikibio_pred_num_tokens": 55.4453125, + "eval_wikibio_rouge_score": 0.3224364790302755, + "eval_wikibio_runtime": 171.104, + "eval_wikibio_samples_per_second": 2.922, + "eval_wikibio_steps_per_second": 0.006, + "eval_wikibio_token_set_f1": 0.28131254068170125, + "eval_wikibio_token_set_f1_sem": 0.006813958761638849, + "eval_wikibio_token_set_precision": 0.2743595630282444, + "eval_wikibio_token_set_recall": 0.31132292902546804, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 372 + }, + { + "epoch": 1.64, + "eval_bias-bios_accuracy": 0.500125, + "eval_bias-bios_bleu_score": 16.492712649226394, + "eval_bias-bios_bleu_score_sem": 0.7277856036871401, + "eval_bias-bios_emb_cos_sim": 0.8637259006500244, + "eval_bias-bios_emb_cos_sim_sem": 0.0030929461968649467, + "eval_bias-bios_emb_top1_equal": 0.2759999930858612, + "eval_bias-bios_emb_top1_equal_sem": 0.02001121794127971, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.886657953262329, + "eval_bias-bios_n_ngrams_match_1": 20.186, + "eval_bias-bios_n_ngrams_match_2": 8.578, + "eval_bias-bios_n_ngrams_match_3": 4.562, + "eval_bias-bios_num_pred_words": 36.648, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.597283367169158, + "eval_bias-bios_pred_num_tokens": 48.828125, + "eval_bias-bios_rouge_score": 0.5275492861718887, + "eval_bias-bios_runtime": 38.8094, + "eval_bias-bios_samples_per_second": 12.883, + "eval_bias-bios_steps_per_second": 0.026, + "eval_bias-bios_token_set_f1": 0.5402486356387385, + "eval_bias-bios_token_set_f1_sem": 0.0064675749666265585, + "eval_bias-bios_token_set_precision": 0.4971851498629837, + "eval_bias-bios_token_set_recall": 0.6049556338933638, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 372 + }, + { + "epoch": 1.69, + "learning_rate": 0.001, + "loss": 2.135, + "step": 384 + }, + { + "epoch": 1.74, + "learning_rate": 0.001, + "loss": 2.1987, + "step": 396 + }, + { + "epoch": 1.78, + "eval_ag_news_accuracy": 0.30578125, + "eval_ag_news_bleu_score": 4.625886016349093, + "eval_ag_news_bleu_score_sem": 0.14406997539717725, + "eval_ag_news_emb_cos_sim": 0.8163206577301025, + "eval_ag_news_emb_cos_sim_sem": 0.004300577290979063, + "eval_ag_news_emb_top1_equal": 0.25600001215934753, + "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.4820914268493652, + "eval_ag_news_n_ngrams_match_1": 13.566, + "eval_ag_news_n_ngrams_match_2": 2.89, + "eval_ag_news_n_ngrams_match_3": 0.756, + "eval_ag_news_num_pred_words": 42.24, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.52768025014022, + "eval_ag_news_pred_num_tokens": 62.8984375, + "eval_ag_news_rouge_score": 0.36302604683503525, + "eval_ag_news_runtime": 18.2763, + "eval_ag_news_samples_per_second": 27.358, + "eval_ag_news_steps_per_second": 0.055, + "eval_ag_news_token_set_f1": 0.3577522859271455, + "eval_ag_news_token_set_f1_sem": 0.004878435934676312, + "eval_ag_news_token_set_precision": 0.3290160449978668, + "eval_ag_news_token_set_recall": 0.4168787106703573, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 403 + }, + { + "epoch": 1.78, + "eval_anthropic_toxic_prompts_accuracy": 0.1088125, + "eval_anthropic_toxic_prompts_bleu_score": 3.650061382857713, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1321186844449668, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6943516731262207, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0044313876493768285, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12800000607967377, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014955914115991394, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.054664134979248, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.188, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.1, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.83, + "eval_anthropic_toxic_prompts_num_pred_words": 43.168, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 21.214059269791896, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.828125, + "eval_anthropic_toxic_prompts_rouge_score": 0.23684138554996986, + "eval_anthropic_toxic_prompts_runtime": 8.2401, + "eval_anthropic_toxic_prompts_samples_per_second": 60.679, + "eval_anthropic_toxic_prompts_steps_per_second": 0.121, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34346371837251183, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006149349002418812, + "eval_anthropic_toxic_prompts_token_set_precision": 0.45335623185190155, + "eval_anthropic_toxic_prompts_token_set_recall": 0.30478903824776704, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 403 + }, + { + "epoch": 1.78, + "eval_arxiv_accuracy": 0.42478125, + "eval_arxiv_bleu_score": 4.28136821937714, + "eval_arxiv_bleu_score_sem": 0.13025702484443386, + "eval_arxiv_emb_cos_sim": 0.7463322877883911, + "eval_arxiv_emb_cos_sim_sem": 0.005858995112971661, + "eval_arxiv_emb_top1_equal": 0.23399999737739563, + "eval_arxiv_emb_top1_equal_sem": 0.01895274120352364, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.958116292953491, + "eval_arxiv_n_ngrams_match_1": 14.482, + "eval_arxiv_n_ngrams_match_2": 2.812, + "eval_arxiv_n_ngrams_match_3": 0.646, + "eval_arxiv_num_pred_words": 37.338, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.261654246636216, + "eval_arxiv_pred_num_tokens": 62.984375, + "eval_arxiv_rouge_score": 0.3559682047719085, + "eval_arxiv_runtime": 8.7138, + "eval_arxiv_samples_per_second": 57.38, + "eval_arxiv_steps_per_second": 0.115, + "eval_arxiv_token_set_f1": 0.3626915967922936, + "eval_arxiv_token_set_f1_sem": 0.0051081697883503824, + "eval_arxiv_token_set_precision": 0.3045233229125568, + "eval_arxiv_token_set_recall": 0.48386160444507925, + "eval_arxiv_true_num_tokens": 64.0, + "step": 403 + }, + { + "epoch": 1.78, + "eval_python_code_alpaca_accuracy": 0.15565625, + "eval_python_code_alpaca_bleu_score": 5.16488546776206, + "eval_python_code_alpaca_bleu_score_sem": 0.17072322807655255, + "eval_python_code_alpaca_emb_cos_sim": 0.774861752986908, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004186832108581838, + "eval_python_code_alpaca_emb_top1_equal": 0.1860000044107437, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017418806591218323, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.666151762008667, + "eval_python_code_alpaca_n_ngrams_match_1": 9.89, + "eval_python_code_alpaca_n_ngrams_match_2": 3.0, + "eval_python_code_alpaca_n_ngrams_match_3": 1.044, + "eval_python_code_alpaca_num_pred_words": 39.832, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.384507538028931, + "eval_python_code_alpaca_pred_num_tokens": 62.6953125, + "eval_python_code_alpaca_rouge_score": 0.3792787066426493, + "eval_python_code_alpaca_runtime": 7.7257, + "eval_python_code_alpaca_samples_per_second": 64.719, + "eval_python_code_alpaca_steps_per_second": 0.129, + "eval_python_code_alpaca_token_set_f1": 0.46963115529803334, + "eval_python_code_alpaca_token_set_f1_sem": 0.005420621063290702, + "eval_python_code_alpaca_token_set_precision": 0.5468159527558938, + "eval_python_code_alpaca_token_set_recall": 0.4311605987905754, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 403 + }, + { + "epoch": 1.78, + "eval_wikibio_accuracy": 0.36740625, + "eval_wikibio_bleu_score": 4.623217624073544, + "eval_wikibio_bleu_score_sem": 0.2091302090800041, + "eval_wikibio_emb_cos_sim": 0.6995702385902405, + "eval_wikibio_emb_cos_sim_sem": 0.006969743615032104, + "eval_wikibio_emb_top1_equal": 0.15199999511241913, + "eval_wikibio_emb_top1_equal_sem": 0.01607198249074835, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3406922817230225, + "eval_wikibio_n_ngrams_match_1": 7.92, + "eval_wikibio_n_ngrams_match_2": 2.5, + "eval_wikibio_n_ngrams_match_3": 0.946, + "eval_wikibio_num_pred_words": 30.96, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.23866905469156, + "eval_wikibio_pred_num_tokens": 62.984375, + "eval_wikibio_rouge_score": 0.27730911242226863, + "eval_wikibio_runtime": 169.5553, + "eval_wikibio_samples_per_second": 2.949, + "eval_wikibio_steps_per_second": 0.006, + "eval_wikibio_token_set_f1": 0.2573279276017233, + "eval_wikibio_token_set_f1_sem": 0.007053153351734212, + "eval_wikibio_token_set_precision": 0.2515329644167605, + "eval_wikibio_token_set_recall": 0.2959943478405537, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 403 + }, + { + "epoch": 1.78, + "eval_bias-bios_accuracy": 0.49528125, + "eval_bias-bios_bleu_score": 15.562289011586145, + "eval_bias-bios_bleu_score_sem": 0.6052003609209405, + "eval_bias-bios_emb_cos_sim": 0.8724682331085205, + "eval_bias-bios_emb_cos_sim_sem": 0.0028584383048964224, + "eval_bias-bios_emb_top1_equal": 0.30000001192092896, + "eval_bias-bios_emb_top1_equal_sem": 0.020514426052435274, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.929211139678955, + "eval_bias-bios_n_ngrams_match_1": 21.876, + "eval_bias-bios_n_ngrams_match_2": 9.522, + "eval_bias-bios_n_ngrams_match_3": 5.054, + "eval_bias-bios_num_pred_words": 46.974, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.884077523429902, + "eval_bias-bios_pred_num_tokens": 62.8671875, + "eval_bias-bios_rouge_score": 0.5015844383406751, + "eval_bias-bios_runtime": 43.9646, + "eval_bias-bios_samples_per_second": 11.373, + "eval_bias-bios_steps_per_second": 0.023, + "eval_bias-bios_token_set_f1": 0.5400765163098435, + "eval_bias-bios_token_set_f1_sem": 0.006176592482609455, + "eval_bias-bios_token_set_precision": 0.5211310372476144, + "eval_bias-bios_token_set_recall": 0.5756587114679573, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 403 + }, + { + "epoch": 1.8, + "learning_rate": 0.001, + "loss": 2.1609, + "step": 408 + }, + { + "epoch": 1.85, + "learning_rate": 0.001, + "loss": 1.9403, + "step": 420 + }, + { + "epoch": 1.9, + "learning_rate": 0.001, + "loss": 2.0764, + "step": 432 + }, + { + "epoch": 1.91, + "eval_ag_news_accuracy": 0.3091875, + "eval_ag_news_bleu_score": 4.55914803176902, + "eval_ag_news_bleu_score_sem": 0.16542642739054694, + "eval_ag_news_emb_cos_sim": 0.8069360852241516, + "eval_ag_news_emb_cos_sim_sem": 0.004155177433277181, + "eval_ag_news_emb_top1_equal": 0.28200000524520874, + "eval_ag_news_emb_top1_equal_sem": 0.020143573015312013, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.504622220993042, + "eval_ag_news_n_ngrams_match_1": 11.554, + "eval_ag_news_n_ngrams_match_2": 2.572, + "eval_ag_news_n_ngrams_match_3": 0.704, + "eval_ag_news_num_pred_words": 30.168, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.268873196209576, + "eval_ag_news_pred_num_tokens": 43.34375, + "eval_ag_news_rouge_score": 0.3611896576013237, + "eval_ag_news_runtime": 65.9447, + "eval_ag_news_samples_per_second": 7.582, + "eval_ag_news_steps_per_second": 0.015, + "eval_ag_news_token_set_f1": 0.34635594938922654, + "eval_ag_news_token_set_f1_sem": 0.00483248319105664, + "eval_ag_news_token_set_precision": 0.29541295491834085, + "eval_ag_news_token_set_recall": 0.4391054759993797, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 434 + }, + { + "epoch": 1.91, + "eval_anthropic_toxic_prompts_accuracy": 0.11271875, + "eval_anthropic_toxic_prompts_bleu_score": 5.6427665524664095, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1959700760747935, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7147530913352966, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00433352245024674, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.8229117393493652, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.834, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.922, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752, + "eval_anthropic_toxic_prompts_num_pred_words": 27.112, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 16.82577167644338, + "eval_anthropic_toxic_prompts_pred_num_tokens": 37.3125, + "eval_anthropic_toxic_prompts_rouge_score": 0.31049158171250457, + "eval_anthropic_toxic_prompts_runtime": 188.6449, + "eval_anthropic_toxic_prompts_samples_per_second": 2.65, + "eval_anthropic_toxic_prompts_steps_per_second": 0.005, + "eval_anthropic_toxic_prompts_token_set_f1": 0.36958399302570577, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006043389481903698, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43776933009113417, + "eval_anthropic_toxic_prompts_token_set_recall": 0.34707368376528736, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 434 + }, + { + "epoch": 1.91, + "eval_arxiv_accuracy": 0.40725, + "eval_arxiv_bleu_score": 3.6418646525368423, + "eval_arxiv_bleu_score_sem": 0.11629012868979755, + "eval_arxiv_emb_cos_sim": 0.7531729340553284, + "eval_arxiv_emb_cos_sim_sem": 0.00491539443873094, + "eval_arxiv_emb_top1_equal": 0.15199999511241913, + "eval_arxiv_emb_top1_equal_sem": 0.01607198249074835, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.079040288925171, + "eval_arxiv_n_ngrams_match_1": 13.058, + "eval_arxiv_n_ngrams_match_2": 2.494, + "eval_arxiv_n_ngrams_match_3": 0.576, + "eval_arxiv_num_pred_words": 28.4, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.73753063347976, + "eval_arxiv_pred_num_tokens": 45.5703125, + "eval_arxiv_rouge_score": 0.3647748498973116, + "eval_arxiv_runtime": 56.9032, + "eval_arxiv_samples_per_second": 8.787, + "eval_arxiv_steps_per_second": 0.018, + "eval_arxiv_token_set_f1": 0.3648733612451334, + "eval_arxiv_token_set_f1_sem": 0.0045413721621260245, + "eval_arxiv_token_set_precision": 0.2944474299834195, + "eval_arxiv_token_set_recall": 0.4991810318663169, + "eval_arxiv_true_num_tokens": 64.0, + "step": 434 + }, + { + "epoch": 1.91, + "eval_python_code_alpaca_accuracy": 0.1664375, + "eval_python_code_alpaca_bleu_score": 8.43929602287766, + "eval_python_code_alpaca_bleu_score_sem": 0.26301217317333, + "eval_python_code_alpaca_emb_cos_sim": 0.8045483231544495, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004009448707082787, + "eval_python_code_alpaca_emb_top1_equal": 0.20200000703334808, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017973259543989376, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.4012229442596436, + "eval_python_code_alpaca_n_ngrams_match_1": 9.258, + "eval_python_code_alpaca_n_ngrams_match_2": 2.752, + "eval_python_code_alpaca_n_ngrams_match_3": 0.984, + "eval_python_code_alpaca_num_pred_words": 23.718, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.036665357371508, + "eval_python_code_alpaca_pred_num_tokens": 36.3515625, + "eval_python_code_alpaca_rouge_score": 0.4748925266615377, + "eval_python_code_alpaca_runtime": 7.87, + "eval_python_code_alpaca_samples_per_second": 63.532, + "eval_python_code_alpaca_steps_per_second": 0.127, + "eval_python_code_alpaca_token_set_f1": 0.5204629328352092, + "eval_python_code_alpaca_token_set_f1_sem": 0.005860754538010466, + "eval_python_code_alpaca_token_set_precision": 0.5424322102606044, + "eval_python_code_alpaca_token_set_recall": 0.5191744318018764, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 434 + }, + { + "epoch": 1.91, + "eval_wikibio_accuracy": 0.35740625, + "eval_wikibio_bleu_score": 5.9326789113657705, + "eval_wikibio_bleu_score_sem": 0.22446540672582094, + "eval_wikibio_emb_cos_sim": 0.7443342208862305, + "eval_wikibio_emb_cos_sim_sem": 0.005850383889749271, + "eval_wikibio_emb_top1_equal": 0.1720000058412552, + "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4056010246276855, + "eval_wikibio_n_ngrams_match_1": 9.216, + "eval_wikibio_n_ngrams_match_2": 2.928, + "eval_wikibio_n_ngrams_match_3": 1.076, + "eval_wikibio_num_pred_words": 31.292, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 30.132400597485695, + "eval_wikibio_pred_num_tokens": 53.8203125, + "eval_wikibio_rouge_score": 0.357777507241817, + "eval_wikibio_runtime": 8.583, + "eval_wikibio_samples_per_second": 58.254, + "eval_wikibio_steps_per_second": 0.117, + "eval_wikibio_token_set_f1": 0.30733041663159916, + "eval_wikibio_token_set_f1_sem": 0.0060036693992158545, + "eval_wikibio_token_set_precision": 0.3026905663203406, + "eval_wikibio_token_set_recall": 0.3309501986443075, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 434 + }, + { + "epoch": 1.91, + "eval_bias-bios_accuracy": 0.48434375, + "eval_bias-bios_bleu_score": 16.377499841333293, + "eval_bias-bios_bleu_score_sem": 0.7691216166778587, + "eval_bias-bios_emb_cos_sim": 0.8604341149330139, + "eval_bias-bios_emb_cos_sim_sem": 0.003229498259277889, + "eval_bias-bios_emb_top1_equal": 0.28200000524520874, + "eval_bias-bios_emb_top1_equal_sem": 0.020143573015312013, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.9417322874069214, + "eval_bias-bios_n_ngrams_match_1": 18.712, + "eval_bias-bios_n_ngrams_match_2": 8.296, + "eval_bias-bios_n_ngrams_match_3": 4.604, + "eval_bias-bios_num_pred_words": 32.288, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.970815974321508, + "eval_bias-bios_pred_num_tokens": 42.203125, + "eval_bias-bios_rouge_score": 0.5305062820350761, + "eval_bias-bios_runtime": 55.3521, + "eval_bias-bios_samples_per_second": 9.033, + "eval_bias-bios_steps_per_second": 0.018, + "eval_bias-bios_token_set_f1": 0.5359290413191452, + "eval_bias-bios_token_set_f1_sem": 0.006735846184355515, + "eval_bias-bios_token_set_precision": 0.4766454705041298, + "eval_bias-bios_token_set_recall": 0.627384497190948, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 434 + }, + { + "epoch": 1.96, + "learning_rate": 0.001, + "loss": 2.2159, + "step": 444 + }, + { + "epoch": 2.01, + "learning_rate": 0.001, + "loss": 1.9549, + "step": 456 + }, + { + "epoch": 2.05, + "eval_ag_news_accuracy": 0.30590625, + "eval_ag_news_bleu_score": 4.822678096872545, + "eval_ag_news_bleu_score_sem": 0.16083087908591537, + "eval_ag_news_emb_cos_sim": 0.8099291920661926, + "eval_ag_news_emb_cos_sim_sem": 0.00472055568392159, + "eval_ag_news_emb_top1_equal": 0.28200000524520874, + "eval_ag_news_emb_top1_equal_sem": 0.020143573015312013, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.4794719219207764, + "eval_ag_news_n_ngrams_match_1": 13.164, + "eval_ag_news_n_ngrams_match_2": 2.948, + "eval_ag_news_n_ngrams_match_3": 0.802, + "eval_ag_news_num_pred_words": 39.948, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.442585333345114, + "eval_ag_news_pred_num_tokens": 60.078125, + "eval_ag_news_rouge_score": 0.36153641474600484, + "eval_ag_news_runtime": 46.6624, + "eval_ag_news_samples_per_second": 10.715, + "eval_ag_news_steps_per_second": 0.021, + "eval_ag_news_token_set_f1": 0.3532317796867009, + "eval_ag_news_token_set_f1_sem": 0.005142214646342723, + "eval_ag_news_token_set_precision": 0.32060392153292905, + "eval_ag_news_token_set_recall": 0.4227661607319599, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 465 + }, + { + "epoch": 2.05, + "eval_anthropic_toxic_prompts_accuracy": 0.10884375, + "eval_anthropic_toxic_prompts_bleu_score": 3.959874623385117, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13816720459798965, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.699434220790863, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004483873661341469, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345323732734733, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.9180328845977783, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.084, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.054, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.8, + "eval_anthropic_toxic_prompts_num_pred_words": 38.804, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 18.504850457251738, + "eval_anthropic_toxic_prompts_pred_num_tokens": 56.421875, + "eval_anthropic_toxic_prompts_rouge_score": 0.25094133174243116, + "eval_anthropic_toxic_prompts_runtime": 7.2708, + "eval_anthropic_toxic_prompts_samples_per_second": 68.768, + "eval_anthropic_toxic_prompts_steps_per_second": 0.138, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3514441073423798, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0060101823762325176, + "eval_anthropic_toxic_prompts_token_set_precision": 0.44442348087365346, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3166909629988077, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 465 + }, + { + "epoch": 2.05, + "eval_arxiv_accuracy": 0.4220625, + "eval_arxiv_bleu_score": 4.262612265285608, + "eval_arxiv_bleu_score_sem": 0.1264903806145487, + "eval_arxiv_emb_cos_sim": 0.7446135878562927, + "eval_arxiv_emb_cos_sim_sem": 0.006035515859428042, + "eval_arxiv_emb_top1_equal": 0.2240000069141388, + "eval_arxiv_emb_top1_equal_sem": 0.01866399400069726, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.983703374862671, + "eval_arxiv_n_ngrams_match_1": 14.304, + "eval_arxiv_n_ngrams_match_2": 2.81, + "eval_arxiv_n_ngrams_match_3": 0.616, + "eval_arxiv_num_pred_words": 36.052, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.760863185901616, + "eval_arxiv_pred_num_tokens": 61.8125, + "eval_arxiv_rouge_score": 0.3556709302874719, + "eval_arxiv_runtime": 95.7762, + "eval_arxiv_samples_per_second": 5.221, + "eval_arxiv_steps_per_second": 0.01, + "eval_arxiv_token_set_f1": 0.35673938740964367, + "eval_arxiv_token_set_f1_sem": 0.005106620682017066, + "eval_arxiv_token_set_precision": 0.29837823430756255, + "eval_arxiv_token_set_recall": 0.4749938707041415, + "eval_arxiv_true_num_tokens": 64.0, + "step": 465 + }, + { + "epoch": 2.05, + "eval_python_code_alpaca_accuracy": 0.15734375, + "eval_python_code_alpaca_bleu_score": 6.366092347760931, + "eval_python_code_alpaca_bleu_score_sem": 0.2046752995963428, + "eval_python_code_alpaca_emb_cos_sim": 0.7874159216880798, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.00428374135664301, + "eval_python_code_alpaca_emb_top1_equal": 0.21199999749660492, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01829703673906991, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.538551092147827, + "eval_python_code_alpaca_n_ngrams_match_1": 9.946, + "eval_python_code_alpaca_n_ngrams_match_2": 3.158, + "eval_python_code_alpaca_n_ngrams_match_3": 1.128, + "eval_python_code_alpaca_num_pred_words": 34.552, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.661312599007509, + "eval_python_code_alpaca_pred_num_tokens": 54.9609375, + "eval_python_code_alpaca_rouge_score": 0.4099865377696851, + "eval_python_code_alpaca_runtime": 10.0581, + "eval_python_code_alpaca_samples_per_second": 49.711, + "eval_python_code_alpaca_steps_per_second": 0.099, + "eval_python_code_alpaca_token_set_f1": 0.4949877378470005, + "eval_python_code_alpaca_token_set_f1_sem": 0.0058422653141193045, + "eval_python_code_alpaca_token_set_precision": 0.5573490328307835, + "eval_python_code_alpaca_token_set_recall": 0.461735096324448, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 465 + }, + { + "epoch": 2.05, + "eval_wikibio_accuracy": 0.37009375, + "eval_wikibio_bleu_score": 4.919931286316885, + "eval_wikibio_bleu_score_sem": 0.21407350443020087, + "eval_wikibio_emb_cos_sim": 0.6965718865394592, + "eval_wikibio_emb_cos_sim_sem": 0.007230857201284975, + "eval_wikibio_emb_top1_equal": 0.1459999978542328, + "eval_wikibio_emb_top1_equal_sem": 0.015807205702664997, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3172950744628906, + "eval_wikibio_n_ngrams_match_1": 8.042, + "eval_wikibio_n_ngrams_match_2": 2.538, + "eval_wikibio_n_ngrams_match_3": 0.978, + "eval_wikibio_num_pred_words": 30.932, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.58563246891694, + "eval_wikibio_pred_num_tokens": 62.5, + "eval_wikibio_rouge_score": 0.2864471028441363, + "eval_wikibio_runtime": 7.7415, + "eval_wikibio_samples_per_second": 64.587, + "eval_wikibio_steps_per_second": 0.129, + "eval_wikibio_token_set_f1": 0.26047722756912484, + "eval_wikibio_token_set_f1_sem": 0.007163829543694026, + "eval_wikibio_token_set_precision": 0.256809804635162, + "eval_wikibio_token_set_recall": 0.2936675715034865, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 465 + }, + { + "epoch": 2.05, + "eval_bias-bios_accuracy": 0.49628125, + "eval_bias-bios_bleu_score": 16.98683275890492, + "eval_bias-bios_bleu_score_sem": 0.7217988752336373, + "eval_bias-bios_emb_cos_sim": 0.8703109622001648, + "eval_bias-bios_emb_cos_sim_sem": 0.0029760236183739, + "eval_bias-bios_emb_top1_equal": 0.30399999022483826, + "eval_bias-bios_emb_top1_equal_sem": 0.020591649838958805, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8828622102737427, + "eval_bias-bios_n_ngrams_match_1": 21.976, + "eval_bias-bios_n_ngrams_match_2": 9.688, + "eval_bias-bios_n_ngrams_match_3": 5.232, + "eval_bias-bios_num_pred_words": 45.322, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.572289240733505, + "eval_bias-bios_pred_num_tokens": 60.9921875, + "eval_bias-bios_rouge_score": 0.5184656659022286, + "eval_bias-bios_runtime": 8.3916, + "eval_bias-bios_samples_per_second": 59.584, + "eval_bias-bios_steps_per_second": 0.119, + "eval_bias-bios_token_set_f1": 0.5493044015119622, + "eval_bias-bios_token_set_f1_sem": 0.006290346034109139, + "eval_bias-bios_token_set_precision": 0.525720347228826, + "eval_bias-bios_token_set_recall": 0.587055844834679, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 465 + }, + { + "epoch": 2.06, + "learning_rate": 0.001, + "loss": 2.2039, + "step": 468 + }, + { + "epoch": 2.11, + "learning_rate": 0.001, + "loss": 2.1345, + "step": 480 + }, + { + "epoch": 2.17, + "learning_rate": 0.001, + "loss": 2.0227, + "step": 492 + }, + { + "epoch": 2.19, + "eval_ag_news_accuracy": 0.30540625, + "eval_ag_news_bleu_score": 4.668250881532122, + "eval_ag_news_bleu_score_sem": 0.15988691631826146, + "eval_ag_news_emb_cos_sim": 0.8072042465209961, + "eval_ag_news_emb_cos_sim_sem": 0.0046731316477809145, + "eval_ag_news_emb_top1_equal": 0.24199999868869781, + "eval_ag_news_emb_top1_equal_sem": 0.019173085092707744, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.51320219039917, + "eval_ag_news_n_ngrams_match_1": 12.482, + "eval_ag_news_n_ngrams_match_2": 2.586, + "eval_ag_news_n_ngrams_match_3": 0.718, + "eval_ag_news_num_pred_words": 36.002, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.55554717876515, + "eval_ag_news_pred_num_tokens": 56.2734375, + "eval_ag_news_rouge_score": 0.36259427294137575, + "eval_ag_news_runtime": 178.0758, + "eval_ag_news_samples_per_second": 2.808, + "eval_ag_news_steps_per_second": 0.006, + "eval_ag_news_token_set_f1": 0.3456775660047359, + "eval_ag_news_token_set_f1_sem": 0.005010765573039154, + "eval_ag_news_token_set_precision": 0.3088513221500971, + "eval_ag_news_token_set_recall": 0.41505437710191306, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 496 + }, + { + "epoch": 2.19, + "eval_anthropic_toxic_prompts_accuracy": 0.10984375, + "eval_anthropic_toxic_prompts_bleu_score": 4.4912971391928895, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.16681860043496632, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6957270503044128, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00475449144706793, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15199999511241913, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01607198249074835, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.8501698970794678, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.858, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77, + "eval_anthropic_toxic_prompts_num_pred_words": 33.982, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 17.29071923373302, + "eval_anthropic_toxic_prompts_pred_num_tokens": 49.703125, + "eval_anthropic_toxic_prompts_rouge_score": 0.2722764609394385, + "eval_anthropic_toxic_prompts_runtime": 7.3923, + "eval_anthropic_toxic_prompts_samples_per_second": 67.638, + "eval_anthropic_toxic_prompts_steps_per_second": 0.135, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3534656539571027, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006123770732202779, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43206533638597494, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3274092318834864, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 496 + }, + { + "epoch": 2.19, + "eval_arxiv_accuracy": 0.41796875, + "eval_arxiv_bleu_score": 4.164145065124605, + "eval_arxiv_bleu_score_sem": 0.11827294796233954, + "eval_arxiv_emb_cos_sim": 0.7487242817878723, + "eval_arxiv_emb_cos_sim_sem": 0.005828643916433681, + "eval_arxiv_emb_top1_equal": 0.21400000154972076, + "eval_arxiv_emb_top1_equal_sem": 0.01835979564312438, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0731747150421143, + "eval_arxiv_n_ngrams_match_1": 14.366, + "eval_arxiv_n_ngrams_match_2": 2.74, + "eval_arxiv_n_ngrams_match_3": 0.59, + "eval_arxiv_num_pred_words": 34.388, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.610400750864795, + "eval_arxiv_pred_num_tokens": 58.109375, + "eval_arxiv_rouge_score": 0.36396878972148394, + "eval_arxiv_runtime": 7.7906, + "eval_arxiv_samples_per_second": 64.18, + "eval_arxiv_steps_per_second": 0.128, + "eval_arxiv_token_set_f1": 0.36518113630628457, + "eval_arxiv_token_set_f1_sem": 0.0047476112934269005, + "eval_arxiv_token_set_precision": 0.3069979542932561, + "eval_arxiv_token_set_recall": 0.4743010616126002, + "eval_arxiv_true_num_tokens": 64.0, + "step": 496 + }, + { + "epoch": 2.19, + "eval_python_code_alpaca_accuracy": 0.15715625, + "eval_python_code_alpaca_bleu_score": 6.223549285381672, + "eval_python_code_alpaca_bleu_score_sem": 0.206436710584848, + "eval_python_code_alpaca_emb_cos_sim": 0.7807042598724365, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0045085411306805805, + "eval_python_code_alpaca_emb_top1_equal": 0.2160000056028366, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018421909471797383, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.5024209022521973, + "eval_python_code_alpaca_n_ngrams_match_1": 9.552, + "eval_python_code_alpaca_n_ngrams_match_2": 2.754, + "eval_python_code_alpaca_n_ngrams_match_3": 0.916, + "eval_python_code_alpaca_num_pred_words": 31.346, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.212022315979649, + "eval_python_code_alpaca_pred_num_tokens": 50.46875, + "eval_python_code_alpaca_rouge_score": 0.4221245259695163, + "eval_python_code_alpaca_runtime": 7.3188, + "eval_python_code_alpaca_samples_per_second": 68.317, + "eval_python_code_alpaca_steps_per_second": 0.137, + "eval_python_code_alpaca_token_set_f1": 0.48784522361653215, + "eval_python_code_alpaca_token_set_f1_sem": 0.0058729159175178874, + "eval_python_code_alpaca_token_set_precision": 0.5370299536629132, + "eval_python_code_alpaca_token_set_recall": 0.4647996593704775, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 496 + }, + { + "epoch": 2.19, + "eval_wikibio_accuracy": 0.35896875, + "eval_wikibio_bleu_score": 5.454117014872539, + "eval_wikibio_bleu_score_sem": 0.2204721001818411, + "eval_wikibio_emb_cos_sim": 0.719548761844635, + "eval_wikibio_emb_cos_sim_sem": 0.006735396658708852, + "eval_wikibio_emb_top1_equal": 0.1720000058412552, + "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.470444917678833, + "eval_wikibio_n_ngrams_match_1": 8.478, + "eval_wikibio_n_ngrams_match_2": 2.72, + "eval_wikibio_n_ngrams_match_3": 1.01, + "eval_wikibio_num_pred_words": 30.79, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 32.15104383084076, + "eval_wikibio_pred_num_tokens": 59.2734375, + "eval_wikibio_rouge_score": 0.31754796272682606, + "eval_wikibio_runtime": 7.6641, + "eval_wikibio_samples_per_second": 65.239, + "eval_wikibio_steps_per_second": 0.13, + "eval_wikibio_token_set_f1": 0.2818988883768649, + "eval_wikibio_token_set_f1_sem": 0.006598024742830464, + "eval_wikibio_token_set_precision": 0.27590995276757235, + "eval_wikibio_token_set_recall": 0.31430922001444034, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 496 + }, + { + "epoch": 2.19, + "eval_bias-bios_accuracy": 0.50121875, + "eval_bias-bios_bleu_score": 17.215132232614394, + "eval_bias-bios_bleu_score_sem": 0.7343335349514112, + "eval_bias-bios_emb_cos_sim": 0.8732749223709106, + "eval_bias-bios_emb_cos_sim_sem": 0.002797716253014933, + "eval_bias-bios_emb_top1_equal": 0.2840000092983246, + "eval_bias-bios_emb_top1_equal_sem": 0.020186705101045338, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8789442777633667, + "eval_bias-bios_n_ngrams_match_1": 21.28, + "eval_bias-bios_n_ngrams_match_2": 9.284, + "eval_bias-bios_n_ngrams_match_3": 5.012, + "eval_bias-bios_num_pred_words": 41.134, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.546589832197976, + "eval_bias-bios_pred_num_tokens": 56.4453125, + "eval_bias-bios_rouge_score": 0.5248652780897624, + "eval_bias-bios_runtime": 8.232, + "eval_bias-bios_samples_per_second": 60.739, + "eval_bias-bios_steps_per_second": 0.121, + "eval_bias-bios_token_set_f1": 0.5451987629708742, + "eval_bias-bios_token_set_f1_sem": 0.006575960199532963, + "eval_bias-bios_token_set_precision": 0.5150912165153675, + "eval_bias-bios_token_set_recall": 0.5888287947698326, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 496 + }, + { + "epoch": 2.22, + "learning_rate": 0.001, + "loss": 1.8183, + "step": 504 + }, + { + "epoch": 2.27, + "learning_rate": 0.001, + "loss": 2.2791, + "step": 516 + }, + { + "epoch": 2.32, + "eval_ag_news_accuracy": 0.306125, + "eval_ag_news_bleu_score": 4.7201156512212785, + "eval_ag_news_bleu_score_sem": 0.15726246147004735, + "eval_ag_news_emb_cos_sim": 0.8120728731155396, + "eval_ag_news_emb_cos_sim_sem": 0.004673524823891517, + "eval_ag_news_emb_top1_equal": 0.27799999713897705, + "eval_ag_news_emb_top1_equal_sem": 0.0200558347666307, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.4953548908233643, + "eval_ag_news_n_ngrams_match_1": 13.506, + "eval_ag_news_n_ngrams_match_2": 2.876, + "eval_ag_news_n_ngrams_match_3": 0.796, + "eval_ag_news_num_pred_words": 42.818, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 32.96198378307496, + "eval_ag_news_pred_num_tokens": 62.71875, + "eval_ag_news_rouge_score": 0.35785736889738995, + "eval_ag_news_runtime": 202.08, + "eval_ag_news_samples_per_second": 2.474, + "eval_ag_news_steps_per_second": 0.005, + "eval_ag_news_token_set_f1": 0.351394254769294, + "eval_ag_news_token_set_f1_sem": 0.004847415999858728, + "eval_ag_news_token_set_precision": 0.3297289801897619, + "eval_ag_news_token_set_recall": 0.3999756145796819, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 527 + }, + { + "epoch": 2.32, + "eval_anthropic_toxic_prompts_accuracy": 0.109, + "eval_anthropic_toxic_prompts_bleu_score": 3.659994327362634, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1316802797000685, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7012580633163452, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004484919909635785, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15000000596046448, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01598471338779901, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0036044120788574, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.412, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.082, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.846, + "eval_anthropic_toxic_prompts_num_pred_words": 43.826, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.158064105488453, + "eval_anthropic_toxic_prompts_pred_num_tokens": 61.8515625, + "eval_anthropic_toxic_prompts_rouge_score": 0.23854465360649846, + "eval_anthropic_toxic_prompts_runtime": 67.8278, + "eval_anthropic_toxic_prompts_samples_per_second": 7.372, + "eval_anthropic_toxic_prompts_steps_per_second": 0.015, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3379282851444896, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005772174167376433, + "eval_anthropic_toxic_prompts_token_set_precision": 0.46172926677129095, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2918538154689661, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 527 + }, + { + "epoch": 2.32, + "eval_arxiv_accuracy": 0.4254375, + "eval_arxiv_bleu_score": 4.145314235326048, + "eval_arxiv_bleu_score_sem": 0.11821382340409703, + "eval_arxiv_emb_cos_sim": 0.7381144165992737, + "eval_arxiv_emb_cos_sim_sem": 0.006309414999677569, + "eval_arxiv_emb_top1_equal": 0.25600001215934753, + "eval_arxiv_emb_top1_equal_sem": 0.019536923601457774, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.979003429412842, + "eval_arxiv_n_ngrams_match_1": 14.622, + "eval_arxiv_n_ngrams_match_2": 2.772, + "eval_arxiv_n_ngrams_match_3": 0.57, + "eval_arxiv_num_pred_words": 38.324, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.668206119028458, + "eval_arxiv_pred_num_tokens": 62.9140625, + "eval_arxiv_rouge_score": 0.35247261343150804, + "eval_arxiv_runtime": 130.6734, + "eval_arxiv_samples_per_second": 3.826, + "eval_arxiv_steps_per_second": 0.008, + "eval_arxiv_token_set_f1": 0.35541673856122147, + "eval_arxiv_token_set_f1_sem": 0.005079548130897894, + "eval_arxiv_token_set_precision": 0.30314107444116767, + "eval_arxiv_token_set_recall": 0.46493872398715763, + "eval_arxiv_true_num_tokens": 64.0, + "step": 527 + }, + { + "epoch": 2.32, + "eval_python_code_alpaca_accuracy": 0.156, + "eval_python_code_alpaca_bleu_score": 5.21731257718578, + "eval_python_code_alpaca_bleu_score_sem": 0.1584529336329874, + "eval_python_code_alpaca_emb_cos_sim": 0.7805857062339783, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004051591521737822, + "eval_python_code_alpaca_emb_top1_equal": 0.1940000057220459, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017701826750833646, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.6478047370910645, + "eval_python_code_alpaca_n_ngrams_match_1": 10.218, + "eval_python_code_alpaca_n_ngrams_match_2": 3.078, + "eval_python_code_alpaca_n_ngrams_match_3": 1.028, + "eval_python_code_alpaca_num_pred_words": 40.092, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.123000889807464, + "eval_python_code_alpaca_pred_num_tokens": 61.7265625, + "eval_python_code_alpaca_rouge_score": 0.38214918518907504, + "eval_python_code_alpaca_runtime": 92.9861, + "eval_python_code_alpaca_samples_per_second": 5.377, + "eval_python_code_alpaca_steps_per_second": 0.011, + "eval_python_code_alpaca_token_set_f1": 0.4712166923748369, + "eval_python_code_alpaca_token_set_f1_sem": 0.0053732417427534845, + "eval_python_code_alpaca_token_set_precision": 0.5681916403675348, + "eval_python_code_alpaca_token_set_recall": 0.41949507105905237, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 527 + }, + { + "epoch": 2.32, + "eval_wikibio_accuracy": 0.36740625, + "eval_wikibio_bleu_score": 4.968187379806609, + "eval_wikibio_bleu_score_sem": 0.2135883339125878, + "eval_wikibio_emb_cos_sim": 0.7012571096420288, + "eval_wikibio_emb_cos_sim_sem": 0.006859008562281646, + "eval_wikibio_emb_top1_equal": 0.15800000727176666, + "eval_wikibio_emb_top1_equal_sem": 0.01632805076118194, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3648173809051514, + "eval_wikibio_n_ngrams_match_1": 8.462, + "eval_wikibio_n_ngrams_match_2": 2.71, + "eval_wikibio_n_ngrams_match_3": 1.024, + "eval_wikibio_num_pred_words": 32.64, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.92821397239545, + "eval_wikibio_pred_num_tokens": 62.8984375, + "eval_wikibio_rouge_score": 0.2981068112492569, + "eval_wikibio_runtime": 7.308, + "eval_wikibio_samples_per_second": 68.418, + "eval_wikibio_steps_per_second": 0.137, + "eval_wikibio_token_set_f1": 0.2712183003628458, + "eval_wikibio_token_set_f1_sem": 0.006982557410045616, + "eval_wikibio_token_set_precision": 0.270139310786642, + "eval_wikibio_token_set_recall": 0.2990774367348193, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 527 + }, + { + "epoch": 2.32, + "eval_bias-bios_accuracy": 0.501375, + "eval_bias-bios_bleu_score": 16.21711718360367, + "eval_bias-bios_bleu_score_sem": 0.6479797993591269, + "eval_bias-bios_emb_cos_sim": 0.8746238350868225, + "eval_bias-bios_emb_cos_sim_sem": 0.002782096499020999, + "eval_bias-bios_emb_top1_equal": 0.335999995470047, + "eval_bias-bios_emb_top1_equal_sem": 0.02114479131616093, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8893781900405884, + "eval_bias-bios_n_ngrams_match_1": 22.216, + "eval_bias-bios_n_ngrams_match_2": 9.848, + "eval_bias-bios_n_ngrams_match_3": 5.332, + "eval_bias-bios_num_pred_words": 47.686, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.615253971088581, + "eval_bias-bios_pred_num_tokens": 62.8125, + "eval_bias-bios_rouge_score": 0.5050361311054431, + "eval_bias-bios_runtime": 7.4646, + "eval_bias-bios_samples_per_second": 66.983, + "eval_bias-bios_steps_per_second": 0.134, + "eval_bias-bios_token_set_f1": 0.5426654504434694, + "eval_bias-bios_token_set_f1_sem": 0.0063148590036132875, + "eval_bias-bios_token_set_precision": 0.5333376764248211, + "eval_bias-bios_token_set_recall": 0.5644582496982357, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 527 + }, + { + "epoch": 2.33, + "learning_rate": 0.001, + "loss": 2.1562, + "step": 528 + }, + { + "epoch": 2.38, + "learning_rate": 0.001, + "loss": 2.0533, + "step": 540 + }, + { + "epoch": 2.43, + "learning_rate": 0.001, + "loss": 1.8237, + "step": 552 + }, + { + "epoch": 2.46, + "eval_ag_news_accuracy": 0.305, + "eval_ag_news_bleu_score": 4.17830284982609, + "eval_ag_news_bleu_score_sem": 0.1646197309754618, + "eval_ag_news_emb_cos_sim": 0.794195830821991, + "eval_ag_news_emb_cos_sim_sem": 0.004617011089193853, + "eval_ag_news_emb_top1_equal": 0.2540000081062317, + "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.561718225479126, + "eval_ag_news_n_ngrams_match_1": 10.746, + "eval_ag_news_n_ngrams_match_2": 2.262, + "eval_ag_news_n_ngrams_match_3": 0.63, + "eval_ag_news_num_pred_words": 26.666, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.223667382242766, + "eval_ag_news_pred_num_tokens": 40.484375, + "eval_ag_news_rouge_score": 0.35462382870198267, + "eval_ag_news_runtime": 142.7305, + "eval_ag_news_samples_per_second": 3.503, + "eval_ag_news_steps_per_second": 0.007, + "eval_ag_news_token_set_f1": 0.33559673288595876, + "eval_ag_news_token_set_f1_sem": 0.005097527789773717, + "eval_ag_news_token_set_precision": 0.279699209130729, + "eval_ag_news_token_set_recall": 0.4440647256504291, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 558 + }, + { + "epoch": 2.46, + "eval_anthropic_toxic_prompts_accuracy": 0.11325, + "eval_anthropic_toxic_prompts_bleu_score": 6.4154336131325485, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.2339881642763996, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7061901688575745, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005124980296488741, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016328049428381567, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.805786609649658, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.594, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.868, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7, + "eval_anthropic_toxic_prompts_num_pred_words": 23.004, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 16.54008137789522, + "eval_anthropic_toxic_prompts_pred_num_tokens": 33.375, + "eval_anthropic_toxic_prompts_rouge_score": 0.3356250127185185, + "eval_anthropic_toxic_prompts_runtime": 6.8433, + "eval_anthropic_toxic_prompts_samples_per_second": 73.064, + "eval_anthropic_toxic_prompts_steps_per_second": 0.146, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3638587971000849, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063676985920877545, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4205459789432383, + "eval_anthropic_toxic_prompts_token_set_recall": 0.34815521435094127, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 558 + }, + { + "epoch": 2.46, + "eval_arxiv_accuracy": 0.407375, + "eval_arxiv_bleu_score": 3.1676163846149956, + "eval_arxiv_bleu_score_sem": 0.09382946641044632, + "eval_arxiv_emb_cos_sim": 0.7366307973861694, + "eval_arxiv_emb_cos_sim_sem": 0.00562818807444421, + "eval_arxiv_emb_top1_equal": 0.14000000059604645, + "eval_arxiv_emb_top1_equal_sem": 0.015533272576005909, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.1155734062194824, + "eval_arxiv_n_ngrams_match_1": 12.256, + "eval_arxiv_n_ngrams_match_2": 2.306, + "eval_arxiv_n_ngrams_match_3": 0.478, + "eval_arxiv_num_pred_words": 24.856, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.546354867749912, + "eval_arxiv_pred_num_tokens": 40.1171875, + "eval_arxiv_rouge_score": 0.3596539092889215, + "eval_arxiv_runtime": 7.2229, + "eval_arxiv_samples_per_second": 69.225, + "eval_arxiv_steps_per_second": 0.138, + "eval_arxiv_token_set_f1": 0.3574995386820636, + "eval_arxiv_token_set_f1_sem": 0.004623552007997876, + "eval_arxiv_token_set_precision": 0.28173053784426294, + "eval_arxiv_token_set_recall": 0.5071577095029983, + "eval_arxiv_true_num_tokens": 64.0, + "step": 558 + }, + { + "epoch": 2.46, + "eval_python_code_alpaca_accuracy": 0.16434375, + "eval_python_code_alpaca_bleu_score": 8.688607617329108, + "eval_python_code_alpaca_bleu_score_sem": 0.302776161989141, + "eval_python_code_alpaca_emb_cos_sim": 0.8036087155342102, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004132519493703362, + "eval_python_code_alpaca_emb_top1_equal": 0.23600000143051147, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.019008700160065242, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.3998489379882812, + "eval_python_code_alpaca_n_ngrams_match_1": 8.842, + "eval_python_code_alpaca_n_ngrams_match_2": 2.514, + "eval_python_code_alpaca_n_ngrams_match_3": 0.838, + "eval_python_code_alpaca_num_pred_words": 20.93, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.02151132320864, + "eval_python_code_alpaca_pred_num_tokens": 31.46875, + "eval_python_code_alpaca_rouge_score": 0.4927890115023661, + "eval_python_code_alpaca_runtime": 6.8949, + "eval_python_code_alpaca_samples_per_second": 72.517, + "eval_python_code_alpaca_steps_per_second": 0.145, + "eval_python_code_alpaca_token_set_f1": 0.5100735743849811, + "eval_python_code_alpaca_token_set_f1_sem": 0.005861874662831224, + "eval_python_code_alpaca_token_set_precision": 0.5207688191851509, + "eval_python_code_alpaca_token_set_recall": 0.5181048782640013, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 558 + }, + { + "epoch": 2.46, + "eval_wikibio_accuracy": 0.35678125, + "eval_wikibio_bleu_score": 5.5551563909930115, + "eval_wikibio_bleu_score_sem": 0.22468231620514026, + "eval_wikibio_emb_cos_sim": 0.7146796584129333, + "eval_wikibio_emb_cos_sim_sem": 0.006399664244265241, + "eval_wikibio_emb_top1_equal": 0.15000000596046448, + "eval_wikibio_emb_top1_equal_sem": 0.01598471338779901, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4578168392181396, + "eval_wikibio_n_ngrams_match_1": 7.942, + "eval_wikibio_n_ngrams_match_2": 2.468, + "eval_wikibio_n_ngrams_match_3": 0.9, + "eval_wikibio_num_pred_words": 26.488, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 31.74759070708929, + "eval_wikibio_pred_num_tokens": 49.1640625, + "eval_wikibio_rouge_score": 0.32450583191323545, + "eval_wikibio_runtime": 9.7808, + "eval_wikibio_samples_per_second": 51.12, + "eval_wikibio_steps_per_second": 0.102, + "eval_wikibio_token_set_f1": 0.28186914042405625, + "eval_wikibio_token_set_f1_sem": 0.006523463043082381, + "eval_wikibio_token_set_precision": 0.2670717795693041, + "eval_wikibio_token_set_recall": 0.3212183854151821, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 558 + }, + { + "epoch": 2.46, + "eval_bias-bios_accuracy": 0.49503125, + "eval_bias-bios_bleu_score": 15.804996894943608, + "eval_bias-bios_bleu_score_sem": 0.7817717179636867, + "eval_bias-bios_emb_cos_sim": 0.857208788394928, + "eval_bias-bios_emb_cos_sim_sem": 0.0032387648870846487, + "eval_bias-bios_emb_top1_equal": 0.257999986410141, + "eval_bias-bios_emb_top1_equal_sem": 0.019586711692263472, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.9215714931488037, + "eval_bias-bios_n_ngrams_match_1": 17.874, + "eval_bias-bios_n_ngrams_match_2": 7.994, + "eval_bias-bios_n_ngrams_match_3": 4.424, + "eval_bias-bios_num_pred_words": 28.774, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.831685985695067, + "eval_bias-bios_pred_num_tokens": 38.359375, + "eval_bias-bios_rouge_score": 0.5230174325337358, + "eval_bias-bios_runtime": 7.248, + "eval_bias-bios_samples_per_second": 68.985, + "eval_bias-bios_steps_per_second": 0.138, + "eval_bias-bios_token_set_f1": 0.5333727037684048, + "eval_bias-bios_token_set_f1_sem": 0.00680824842461018, + "eval_bias-bios_token_set_precision": 0.46402045143806697, + "eval_bias-bios_token_set_recall": 0.6437579715420798, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 558 + }, + { + "epoch": 2.48, + "learning_rate": 0.001, + "loss": 2.1767, + "step": 564 + }, + { + "epoch": 2.54, + "learning_rate": 0.001, + "loss": 2.125, + "step": 576 + }, + { + "epoch": 2.59, + "learning_rate": 0.001, + "loss": 2.0658, + "step": 588 + }, + { + "epoch": 2.59, + "eval_ag_news_accuracy": 0.308375, + "eval_ag_news_bleu_score": 4.825938925986893, + "eval_ag_news_bleu_score_sem": 0.17537066666011905, + "eval_ag_news_emb_cos_sim": 0.8115467429161072, + "eval_ag_news_emb_cos_sim_sem": 0.004464737979958483, + "eval_ag_news_emb_top1_equal": 0.2720000147819519, + "eval_ag_news_emb_top1_equal_sem": 0.019920483557355567, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.498967170715332, + "eval_ag_news_n_ngrams_match_1": 12.898, + "eval_ag_news_n_ngrams_match_2": 2.776, + "eval_ag_news_n_ngrams_match_3": 0.74, + "eval_ag_news_num_pred_words": 38.22, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.08126700678294, + "eval_ag_news_pred_num_tokens": 58.78125, + "eval_ag_news_rouge_score": 0.3649716418907004, + "eval_ag_news_runtime": 104.8253, + "eval_ag_news_samples_per_second": 4.77, + "eval_ag_news_steps_per_second": 0.01, + "eval_ag_news_token_set_f1": 0.35050368829625284, + "eval_ag_news_token_set_f1_sem": 0.004982748591545938, + "eval_ag_news_token_set_precision": 0.3167835362371466, + "eval_ag_news_token_set_recall": 0.4121145961802362, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 589 + }, + { + "epoch": 2.59, + "eval_anthropic_toxic_prompts_accuracy": 0.10878125, + "eval_anthropic_toxic_prompts_bleu_score": 4.171680099560288, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1593958533538672, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6833683252334595, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005055784299823478, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055009156667442, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.92020845413208, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.804, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.862, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.726, + "eval_anthropic_toxic_prompts_num_pred_words": 35.886, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 18.545152870598372, + "eval_anthropic_toxic_prompts_pred_num_tokens": 52.578125, + "eval_anthropic_toxic_prompts_rouge_score": 0.2567360788471482, + "eval_anthropic_toxic_prompts_runtime": 53.4305, + "eval_anthropic_toxic_prompts_samples_per_second": 9.358, + "eval_anthropic_toxic_prompts_steps_per_second": 0.019, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3451024730372745, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006029522771575968, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4288044873442379, + "eval_anthropic_toxic_prompts_token_set_recall": 0.31550608856249085, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 589 + }, + { + "epoch": 2.59, + "eval_arxiv_accuracy": 0.4219375, + "eval_arxiv_bleu_score": 4.140443900056267, + "eval_arxiv_bleu_score_sem": 0.12015419626352614, + "eval_arxiv_emb_cos_sim": 0.7514610886573792, + "eval_arxiv_emb_cos_sim_sem": 0.0054524023673946, + "eval_arxiv_emb_top1_equal": 0.21199999749660492, + "eval_arxiv_emb_top1_equal_sem": 0.01829703673906991, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.029334783554077, + "eval_arxiv_n_ngrams_match_1": 14.722, + "eval_arxiv_n_ngrams_match_2": 2.734, + "eval_arxiv_n_ngrams_match_3": 0.554, + "eval_arxiv_num_pred_words": 35.644, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.683469028267105, + "eval_arxiv_pred_num_tokens": 59.8203125, + "eval_arxiv_rouge_score": 0.3687405463610957, + "eval_arxiv_runtime": 24.1607, + "eval_arxiv_samples_per_second": 20.695, + "eval_arxiv_steps_per_second": 0.041, + "eval_arxiv_token_set_f1": 0.3668459906499189, + "eval_arxiv_token_set_f1_sem": 0.004671911518585731, + "eval_arxiv_token_set_precision": 0.3116453335220418, + "eval_arxiv_token_set_recall": 0.46383344443392316, + "eval_arxiv_true_num_tokens": 64.0, + "step": 589 + }, + { + "epoch": 2.59, + "eval_python_code_alpaca_accuracy": 0.15428125, + "eval_python_code_alpaca_bleu_score": 5.864369043870192, + "eval_python_code_alpaca_bleu_score_sem": 0.19204430713131432, + "eval_python_code_alpaca_emb_cos_sim": 0.774185836315155, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004486241714407617, + "eval_python_code_alpaca_emb_top1_equal": 0.1979999989271164, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017838958581409683, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.568850040435791, + "eval_python_code_alpaca_n_ngrams_match_1": 9.564, + "eval_python_code_alpaca_n_ngrams_match_2": 2.782, + "eval_python_code_alpaca_n_ngrams_match_3": 0.926, + "eval_python_code_alpaca_num_pred_words": 33.608, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.050807907012663, + "eval_python_code_alpaca_pred_num_tokens": 53.8359375, + "eval_python_code_alpaca_rouge_score": 0.40507381092872385, + "eval_python_code_alpaca_runtime": 77.0073, + "eval_python_code_alpaca_samples_per_second": 6.493, + "eval_python_code_alpaca_steps_per_second": 0.013, + "eval_python_code_alpaca_token_set_f1": 0.4784863540932467, + "eval_python_code_alpaca_token_set_f1_sem": 0.0057110262975351034, + "eval_python_code_alpaca_token_set_precision": 0.5350334116878842, + "eval_python_code_alpaca_token_set_recall": 0.4515350360067306, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 589 + }, + { + "epoch": 2.59, + "eval_wikibio_accuracy": 0.3678125, + "eval_wikibio_bleu_score": 5.08333193380027, + "eval_wikibio_bleu_score_sem": 0.21579463987083333, + "eval_wikibio_emb_cos_sim": 0.7033864855766296, + "eval_wikibio_emb_cos_sim_sem": 0.0071376558038668434, + "eval_wikibio_emb_top1_equal": 0.1860000044107437, + "eval_wikibio_emb_top1_equal_sem": 0.017418806591218323, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3738596439361572, + "eval_wikibio_n_ngrams_match_1": 8.356, + "eval_wikibio_n_ngrams_match_2": 2.586, + "eval_wikibio_n_ngrams_match_3": 0.97, + "eval_wikibio_num_pred_words": 30.618, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 29.190976686596628, + "eval_wikibio_pred_num_tokens": 61.5078125, + "eval_wikibio_rouge_score": 0.3009390844051775, + "eval_wikibio_runtime": 104.4552, + "eval_wikibio_samples_per_second": 4.787, + "eval_wikibio_steps_per_second": 0.01, + "eval_wikibio_token_set_f1": 0.27607929651752483, + "eval_wikibio_token_set_f1_sem": 0.006884092160953384, + "eval_wikibio_token_set_precision": 0.2687024695785008, + "eval_wikibio_token_set_recall": 0.31430470518840553, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 589 + }, + { + "epoch": 2.59, + "eval_bias-bios_accuracy": 0.5073125, + "eval_bias-bios_bleu_score": 17.78100403924648, + "eval_bias-bios_bleu_score_sem": 0.7668617383166656, + "eval_bias-bios_emb_cos_sim": 0.8714081048965454, + "eval_bias-bios_emb_cos_sim_sem": 0.003140707764700572, + "eval_bias-bios_emb_top1_equal": 0.3319999873638153, + "eval_bias-bios_emb_top1_equal_sem": 0.02108176585203148, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.832431674003601, + "eval_bias-bios_n_ngrams_match_1": 21.574, + "eval_bias-bios_n_ngrams_match_2": 9.494, + "eval_bias-bios_n_ngrams_match_3": 5.212, + "eval_bias-bios_num_pred_words": 42.016, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.249063884207591, + "eval_bias-bios_pred_num_tokens": 57.7578125, + "eval_bias-bios_rouge_score": 0.5275146750904657, + "eval_bias-bios_runtime": 46.2079, + "eval_bias-bios_samples_per_second": 10.821, + "eval_bias-bios_steps_per_second": 0.022, + "eval_bias-bios_token_set_f1": 0.5490236205565298, + "eval_bias-bios_token_set_f1_sem": 0.006594456906976342, + "eval_bias-bios_token_set_precision": 0.52137228578852, + "eval_bias-bios_token_set_recall": 0.5902546299142385, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 589 + }, + { + "epoch": 2.64, + "learning_rate": 0.001, + "loss": 1.8307, + "step": 600 + }, + { + "epoch": 2.7, + "learning_rate": 0.001, + "loss": 2.1034, + "step": 612 + }, + { + "epoch": 2.73, + "eval_ag_news_accuracy": 0.30353125, + "eval_ag_news_bleu_score": 4.53689408224436, + "eval_ag_news_bleu_score_sem": 0.1407891868623165, + "eval_ag_news_emb_cos_sim": 0.808462381362915, + "eval_ag_news_emb_cos_sim_sem": 0.004580344751279384, + "eval_ag_news_emb_top1_equal": 0.2639999985694885, + "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.506286144256592, + "eval_ag_news_n_ngrams_match_1": 13.222, + "eval_ag_news_n_ngrams_match_2": 2.836, + "eval_ag_news_n_ngrams_match_3": 0.734, + "eval_ag_news_num_pred_words": 42.192, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.32427612860431, + "eval_ag_news_pred_num_tokens": 62.1015625, + "eval_ag_news_rouge_score": 0.35460702911253306, + "eval_ag_news_runtime": 22.9681, + "eval_ag_news_samples_per_second": 21.769, + "eval_ag_news_steps_per_second": 0.044, + "eval_ag_news_token_set_f1": 0.34999672868570797, + "eval_ag_news_token_set_f1_sem": 0.0049020199900705235, + "eval_ag_news_token_set_precision": 0.3237693505139728, + "eval_ag_news_token_set_recall": 0.4076253755054518, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 620 + }, + { + "epoch": 2.73, + "eval_anthropic_toxic_prompts_accuracy": 0.107875, + "eval_anthropic_toxic_prompts_bleu_score": 3.63208410886937, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13144121713299511, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6909038424491882, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00449909224242255, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15600000321865082, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016243635183835314, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0125765800476074, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.2, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782, + "eval_anthropic_toxic_prompts_num_pred_words": 41.784, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.339739434618117, + "eval_anthropic_toxic_prompts_pred_num_tokens": 60.3984375, + "eval_anthropic_toxic_prompts_rouge_score": 0.24196796556361308, + "eval_anthropic_toxic_prompts_runtime": 6.9661, + "eval_anthropic_toxic_prompts_samples_per_second": 71.776, + "eval_anthropic_toxic_prompts_steps_per_second": 0.144, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3394931645156119, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005807740971075464, + "eval_anthropic_toxic_prompts_token_set_precision": 0.44422747807761476, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2990046287227596, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 620 + }, + { + "epoch": 2.73, + "eval_arxiv_accuracy": 0.4233125, + "eval_arxiv_bleu_score": 4.284955203954001, + "eval_arxiv_bleu_score_sem": 0.12627207210690689, + "eval_arxiv_emb_cos_sim": 0.741362452507019, + "eval_arxiv_emb_cos_sim_sem": 0.005599541529585759, + "eval_arxiv_emb_top1_equal": 0.27799999713897705, + "eval_arxiv_emb_top1_equal_sem": 0.0200558347666307, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0010297298431396, + "eval_arxiv_n_ngrams_match_1": 14.89, + "eval_arxiv_n_ngrams_match_2": 2.868, + "eval_arxiv_n_ngrams_match_3": 0.614, + "eval_arxiv_num_pred_words": 38.106, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.106230252413784, + "eval_arxiv_pred_num_tokens": 61.8046875, + "eval_arxiv_rouge_score": 0.3562602292924252, + "eval_arxiv_runtime": 7.3893, + "eval_arxiv_samples_per_second": 67.665, + "eval_arxiv_steps_per_second": 0.135, + "eval_arxiv_token_set_f1": 0.3608819560059443, + "eval_arxiv_token_set_f1_sem": 0.004948931806275159, + "eval_arxiv_token_set_precision": 0.30757811736542806, + "eval_arxiv_token_set_recall": 0.4654241563081048, + "eval_arxiv_true_num_tokens": 64.0, + "step": 620 + }, + { + "epoch": 2.73, + "eval_python_code_alpaca_accuracy": 0.152125, + "eval_python_code_alpaca_bleu_score": 5.215313521009815, + "eval_python_code_alpaca_bleu_score_sem": 0.16132353607710642, + "eval_python_code_alpaca_emb_cos_sim": 0.7622130513191223, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004359099555841739, + "eval_python_code_alpaca_emb_top1_equal": 0.15600000321865082, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01624363651663569, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.671508312225342, + "eval_python_code_alpaca_n_ngrams_match_1": 9.816, + "eval_python_code_alpaca_n_ngrams_match_2": 2.878, + "eval_python_code_alpaca_n_ngrams_match_3": 0.96, + "eval_python_code_alpaca_num_pred_words": 37.962, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.461765608637934, + "eval_python_code_alpaca_pred_num_tokens": 59.875, + "eval_python_code_alpaca_rouge_score": 0.3860518554997959, + "eval_python_code_alpaca_runtime": 98.8415, + "eval_python_code_alpaca_samples_per_second": 5.059, + "eval_python_code_alpaca_steps_per_second": 0.01, + "eval_python_code_alpaca_token_set_f1": 0.4681935618748671, + "eval_python_code_alpaca_token_set_f1_sem": 0.005613756137103073, + "eval_python_code_alpaca_token_set_precision": 0.5427428112223796, + "eval_python_code_alpaca_token_set_recall": 0.4299327669158068, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 620 + }, + { + "epoch": 2.73, + "eval_wikibio_accuracy": 0.3721875, + "eval_wikibio_bleu_score": 4.7620090481459, + "eval_wikibio_bleu_score_sem": 0.20192915764833883, + "eval_wikibio_emb_cos_sim": 0.7033131122589111, + "eval_wikibio_emb_cos_sim_sem": 0.007050469334540749, + "eval_wikibio_emb_top1_equal": 0.16200000047683716, + "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3212852478027344, + "eval_wikibio_n_ngrams_match_1": 8.292, + "eval_wikibio_n_ngrams_match_2": 2.552, + "eval_wikibio_n_ngrams_match_3": 0.932, + "eval_wikibio_num_pred_words": 31.282, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.6959238186253, + "eval_wikibio_pred_num_tokens": 62.828125, + "eval_wikibio_rouge_score": 0.2892639774887934, + "eval_wikibio_runtime": 7.1225, + "eval_wikibio_samples_per_second": 70.2, + "eval_wikibio_steps_per_second": 0.14, + "eval_wikibio_token_set_f1": 0.26863722552618025, + "eval_wikibio_token_set_f1_sem": 0.00692945422994504, + "eval_wikibio_token_set_precision": 0.26243191660598264, + "eval_wikibio_token_set_recall": 0.3061311954238753, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 620 + }, + { + "epoch": 2.73, + "eval_bias-bios_accuracy": 0.5038125, + "eval_bias-bios_bleu_score": 16.77870924904314, + "eval_bias-bios_bleu_score_sem": 0.7032499787869223, + "eval_bias-bios_emb_cos_sim": 0.8721063137054443, + "eval_bias-bios_emb_cos_sim_sem": 0.0027367717900708775, + "eval_bias-bios_emb_top1_equal": 0.32199999690055847, + "eval_bias-bios_emb_top1_equal_sem": 0.020916667871188392, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8617655038833618, + "eval_bias-bios_n_ngrams_match_1": 22.31, + "eval_bias-bios_n_ngrams_match_2": 9.792, + "eval_bias-bios_n_ngrams_match_3": 5.316, + "eval_bias-bios_num_pred_words": 46.466, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.435087920945597, + "eval_bias-bios_pred_num_tokens": 61.8046875, + "eval_bias-bios_rouge_score": 0.5140055790947482, + "eval_bias-bios_runtime": 8.3194, + "eval_bias-bios_samples_per_second": 60.1, + "eval_bias-bios_steps_per_second": 0.12, + "eval_bias-bios_token_set_f1": 0.5454037892631416, + "eval_bias-bios_token_set_f1_sem": 0.006311858395773849, + "eval_bias-bios_token_set_precision": 0.5311017560395707, + "eval_bias-bios_token_set_recall": 0.5703081426345601, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 620 + }, + { + "epoch": 2.75, + "learning_rate": 0.001, + "loss": 2.1311, + "step": 624 + }, + { + "epoch": 2.8, + "learning_rate": 0.001, + "loss": 2.0834, + "step": 636 + }, + { + "epoch": 2.85, + "learning_rate": 0.001, + "loss": 1.8634, + "step": 648 + }, + { + "epoch": 2.87, + "eval_ag_news_accuracy": 0.30515625, + "eval_ag_news_bleu_score": 4.473343439625787, + "eval_ag_news_bleu_score_sem": 0.15810206804676052, + "eval_ag_news_emb_cos_sim": 0.8005340695381165, + "eval_ag_news_emb_cos_sim_sem": 0.004695550349687434, + "eval_ag_news_emb_top1_equal": 0.2879999876022339, + "eval_ag_news_emb_top1_equal_sem": 0.020271503192099565, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5658531188964844, + "eval_ag_news_n_ngrams_match_1": 11.65, + "eval_ag_news_n_ngrams_match_2": 2.418, + "eval_ag_news_n_ngrams_match_3": 0.658, + "eval_ag_news_num_pred_words": 30.516, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.3696150236647, + "eval_ag_news_pred_num_tokens": 46.015625, + "eval_ag_news_rouge_score": 0.3610716279802496, + "eval_ag_news_runtime": 7.1672, + "eval_ag_news_samples_per_second": 69.762, + "eval_ag_news_steps_per_second": 0.14, + "eval_ag_news_token_set_f1": 0.34436389102369475, + "eval_ag_news_token_set_f1_sem": 0.004961501646001305, + "eval_ag_news_token_set_precision": 0.29844150324042096, + "eval_ag_news_token_set_recall": 0.42716703328221317, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 651 + }, + { + "epoch": 2.87, + "eval_anthropic_toxic_prompts_accuracy": 0.11078125, + "eval_anthropic_toxic_prompts_bleu_score": 5.265969515801792, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.19656878745061954, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7007984519004822, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004628107985115478, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15600000321865082, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016243635183835314, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.847456455230713, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.706, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.832, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684, + "eval_anthropic_toxic_prompts_num_pred_words": 27.388, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 17.243865468800184, + "eval_anthropic_toxic_prompts_pred_num_tokens": 40.8984375, + "eval_anthropic_toxic_prompts_rouge_score": 0.30469045903105896, + "eval_anthropic_toxic_prompts_runtime": 6.928, + "eval_anthropic_toxic_prompts_samples_per_second": 72.171, + "eval_anthropic_toxic_prompts_steps_per_second": 0.144, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34764036986523544, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006320732915520322, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4238956512970005, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3209230248717735, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 651 + }, + { + "epoch": 2.87, + "eval_arxiv_accuracy": 0.4124375, + "eval_arxiv_bleu_score": 3.725053262080295, + "eval_arxiv_bleu_score_sem": 0.11067579921127993, + "eval_arxiv_emb_cos_sim": 0.7445892095565796, + "eval_arxiv_emb_cos_sim_sem": 0.005560418840580304, + "eval_arxiv_emb_top1_equal": 0.17399999499320984, + "eval_arxiv_emb_top1_equal_sem": 0.016971270884523753, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.109774112701416, + "eval_arxiv_n_ngrams_match_1": 13.534, + "eval_arxiv_n_ngrams_match_2": 2.47, + "eval_arxiv_n_ngrams_match_3": 0.534, + "eval_arxiv_num_pred_words": 29.738, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.41598034356986, + "eval_arxiv_pred_num_tokens": 50.703125, + "eval_arxiv_rouge_score": 0.36506026933321756, + "eval_arxiv_runtime": 7.2303, + "eval_arxiv_samples_per_second": 69.154, + "eval_arxiv_steps_per_second": 0.138, + "eval_arxiv_token_set_f1": 0.36532853948773913, + "eval_arxiv_token_set_f1_sem": 0.004653775964601775, + "eval_arxiv_token_set_precision": 0.30101683877486113, + "eval_arxiv_token_set_recall": 0.4827124457846711, + "eval_arxiv_true_num_tokens": 64.0, + "step": 651 + }, + { + "epoch": 2.87, + "eval_python_code_alpaca_accuracy": 0.1569375, + "eval_python_code_alpaca_bleu_score": 7.137868958322952, + "eval_python_code_alpaca_bleu_score_sem": 0.2364063665756055, + "eval_python_code_alpaca_emb_cos_sim": 0.7880061268806458, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004177613128388481, + "eval_python_code_alpaca_emb_top1_equal": 0.1899999976158142, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017561800077843276, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.4980950355529785, + "eval_python_code_alpaca_n_ngrams_match_1": 9.17, + "eval_python_code_alpaca_n_ngrams_match_2": 2.492, + "eval_python_code_alpaca_n_ngrams_match_3": 0.788, + "eval_python_code_alpaca_num_pred_words": 25.402, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.159308833265369, + "eval_python_code_alpaca_pred_num_tokens": 41.65625, + "eval_python_code_alpaca_rouge_score": 0.4624655642995602, + "eval_python_code_alpaca_runtime": 26.2419, + "eval_python_code_alpaca_samples_per_second": 19.053, + "eval_python_code_alpaca_steps_per_second": 0.038, + "eval_python_code_alpaca_token_set_f1": 0.4860980481971889, + "eval_python_code_alpaca_token_set_f1_sem": 0.00580305500608555, + "eval_python_code_alpaca_token_set_precision": 0.5229148183140903, + "eval_python_code_alpaca_token_set_recall": 0.4709995327571117, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 651 + }, + { + "epoch": 2.87, + "eval_wikibio_accuracy": 0.35765625, + "eval_wikibio_bleu_score": 5.85492257184771, + "eval_wikibio_bleu_score_sem": 0.23014008018986573, + "eval_wikibio_emb_cos_sim": 0.7288503646850586, + "eval_wikibio_emb_cos_sim_sem": 0.006177860272270408, + "eval_wikibio_emb_top1_equal": 0.16200000047683716, + "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4955646991729736, + "eval_wikibio_n_ngrams_match_1": 8.694, + "eval_wikibio_n_ngrams_match_2": 2.738, + "eval_wikibio_n_ngrams_match_3": 1.004, + "eval_wikibio_num_pred_words": 29.054, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 32.96890020802882, + "eval_wikibio_pred_num_tokens": 52.8046875, + "eval_wikibio_rouge_score": 0.3403089616964229, + "eval_wikibio_runtime": 32.6626, + "eval_wikibio_samples_per_second": 15.308, + "eval_wikibio_steps_per_second": 0.031, + "eval_wikibio_token_set_f1": 0.29763297721712917, + "eval_wikibio_token_set_f1_sem": 0.006160074286052675, + "eval_wikibio_token_set_precision": 0.2869254675733824, + "eval_wikibio_token_set_recall": 0.33152872531158917, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 651 + }, + { + "epoch": 2.87, + "eval_bias-bios_accuracy": 0.50496875, + "eval_bias-bios_bleu_score": 17.404360770996192, + "eval_bias-bios_bleu_score_sem": 0.8065126842992738, + "eval_bias-bios_emb_cos_sim": 0.865310549736023, + "eval_bias-bios_emb_cos_sim_sem": 0.003374083442750598, + "eval_bias-bios_emb_top1_equal": 0.33000001311302185, + "eval_bias-bios_emb_top1_equal_sem": 0.021049612042986412, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.858695387840271, + "eval_bias-bios_n_ngrams_match_1": 20.136, + "eval_bias-bios_n_ngrams_match_2": 8.792, + "eval_bias-bios_n_ngrams_match_3": 4.816, + "eval_bias-bios_num_pred_words": 35.484, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.415361750591107, + "eval_bias-bios_pred_num_tokens": 48.4296875, + "eval_bias-bios_rouge_score": 0.5350815981968027, + "eval_bias-bios_runtime": 33.4966, + "eval_bias-bios_samples_per_second": 14.927, + "eval_bias-bios_steps_per_second": 0.03, + "eval_bias-bios_token_set_f1": 0.5462957317603716, + "eval_bias-bios_token_set_f1_sem": 0.006750628440175147, + "eval_bias-bios_token_set_precision": 0.5028559854585791, + "eval_bias-bios_token_set_recall": 0.6104946925640782, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 651 + }, + { + "epoch": 2.91, + "learning_rate": 0.001, + "loss": 2.0138, + "step": 660 + }, + { + "epoch": 2.96, + "learning_rate": 0.001, + "loss": 2.11, + "step": 672 + }, + { + "epoch": 3.0, + "eval_ag_news_accuracy": 0.3066875, + "eval_ag_news_bleu_score": 3.9999289719672073, + "eval_ag_news_bleu_score_sem": 0.14619700926918378, + "eval_ag_news_emb_cos_sim": 0.7976146936416626, + "eval_ag_news_emb_cos_sim_sem": 0.0042587303580072615, + "eval_ag_news_emb_top1_equal": 0.2540000081062317, + "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.557628870010376, + "eval_ag_news_n_ngrams_match_1": 10.736, + "eval_ag_news_n_ngrams_match_2": 2.24, + "eval_ag_news_n_ngrams_match_3": 0.614, + "eval_ag_news_num_pred_words": 25.642, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.07991940401688, + "eval_ag_news_pred_num_tokens": 37.46875, + "eval_ag_news_rouge_score": 0.35758803023918306, + "eval_ag_news_runtime": 8.2019, + "eval_ag_news_samples_per_second": 60.962, + "eval_ag_news_steps_per_second": 0.122, + "eval_ag_news_token_set_f1": 0.3417297542358024, + "eval_ag_news_token_set_f1_sem": 0.0049447789658150195, + "eval_ag_news_token_set_precision": 0.2815595439954063, + "eval_ag_news_token_set_recall": 0.45808750249581603, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 682 + }, + { + "epoch": 3.0, + "eval_anthropic_toxic_prompts_accuracy": 0.11465625, + "eval_anthropic_toxic_prompts_bleu_score": 6.620783700595906, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.2504343615589252, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.708827793598175, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004979361193125184, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1720000058412552, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01689386850274998, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.778644323348999, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.462, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.776, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694, + "eval_anthropic_toxic_prompts_num_pred_words": 21.826, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 16.097183573245367, + "eval_anthropic_toxic_prompts_pred_num_tokens": 30.984375, + "eval_anthropic_toxic_prompts_rouge_score": 0.3427616398928224, + "eval_anthropic_toxic_prompts_runtime": 7.17, + "eval_anthropic_toxic_prompts_samples_per_second": 69.735, + "eval_anthropic_toxic_prompts_steps_per_second": 0.139, + "eval_anthropic_toxic_prompts_token_set_f1": 0.36721266069559144, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006592633901857433, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4157216533469948, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3591411989218731, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 682 + }, + { + "epoch": 3.0, + "eval_arxiv_accuracy": 0.41234375, + "eval_arxiv_bleu_score": 3.2402474085140467, + "eval_arxiv_bleu_score_sem": 0.11054316602251692, + "eval_arxiv_emb_cos_sim": 0.7450404763221741, + "eval_arxiv_emb_cos_sim_sem": 0.005016281096310982, + "eval_arxiv_emb_top1_equal": 0.1459999978542328, + "eval_arxiv_emb_top1_equal_sem": 0.01580720436986462, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.100191593170166, + "eval_arxiv_n_ngrams_match_1": 12.564, + "eval_arxiv_n_ngrams_match_2": 2.338, + "eval_arxiv_n_ngrams_match_3": 0.484, + "eval_arxiv_num_pred_words": 25.684, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.20220466474542, + "eval_arxiv_pred_num_tokens": 39.765625, + "eval_arxiv_rouge_score": 0.3643006369430338, + "eval_arxiv_runtime": 7.1688, + "eval_arxiv_samples_per_second": 69.747, + "eval_arxiv_steps_per_second": 0.139, + "eval_arxiv_token_set_f1": 0.36394150696450756, + "eval_arxiv_token_set_f1_sem": 0.004365745503615316, + "eval_arxiv_token_set_precision": 0.288894392717936, + "eval_arxiv_token_set_recall": 0.5062836949338945, + "eval_arxiv_true_num_tokens": 64.0, + "step": 682 + }, + { + "epoch": 3.0, + "eval_python_code_alpaca_accuracy": 0.17009375, + "eval_python_code_alpaca_bleu_score": 8.932379578566175, + "eval_python_code_alpaca_bleu_score_sem": 0.33461997730428233, + "eval_python_code_alpaca_emb_cos_sim": 0.8038181662559509, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004188257205382726, + "eval_python_code_alpaca_emb_top1_equal": 0.24400000274181366, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.0192267343061996, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.3401527404785156, + "eval_python_code_alpaca_n_ngrams_match_1": 8.898, + "eval_python_code_alpaca_n_ngrams_match_2": 2.476, + "eval_python_code_alpaca_n_ngrams_match_3": 0.838, + "eval_python_code_alpaca_num_pred_words": 20.412, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 10.382822318873513, + "eval_python_code_alpaca_pred_num_tokens": 31.5078125, + "eval_python_code_alpaca_rouge_score": 0.4983959997409262, + "eval_python_code_alpaca_runtime": 55.2072, + "eval_python_code_alpaca_samples_per_second": 9.057, + "eval_python_code_alpaca_steps_per_second": 0.018, + "eval_python_code_alpaca_token_set_f1": 0.5136739256107152, + "eval_python_code_alpaca_token_set_f1_sem": 0.005907835885019311, + "eval_python_code_alpaca_token_set_precision": 0.5236014854603724, + "eval_python_code_alpaca_token_set_recall": 0.5256264438798506, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 682 + }, + { + "epoch": 3.0, + "eval_wikibio_accuracy": 0.357625, + "eval_wikibio_bleu_score": 5.884401182468587, + "eval_wikibio_bleu_score_sem": 0.2169968312792518, + "eval_wikibio_emb_cos_sim": 0.7289230823516846, + "eval_wikibio_emb_cos_sim_sem": 0.005783947123060196, + "eval_wikibio_emb_top1_equal": 0.12200000137090683, + "eval_wikibio_emb_top1_equal_sem": 0.014651325247908655, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4971401691436768, + "eval_wikibio_n_ngrams_match_1": 8.648, + "eval_wikibio_n_ngrams_match_2": 2.708, + "eval_wikibio_n_ngrams_match_3": 0.984, + "eval_wikibio_num_pred_words": 27.808, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 33.02088265791592, + "eval_wikibio_pred_num_tokens": 48.890625, + "eval_wikibio_rouge_score": 0.3478947595641799, + "eval_wikibio_runtime": 7.1125, + "eval_wikibio_samples_per_second": 70.298, + "eval_wikibio_steps_per_second": 0.141, + "eval_wikibio_token_set_f1": 0.30490824395081195, + "eval_wikibio_token_set_f1_sem": 0.0059019070100866785, + "eval_wikibio_token_set_precision": 0.2908642059105115, + "eval_wikibio_token_set_recall": 0.33954481636458195, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 682 + }, + { + "epoch": 3.0, + "eval_bias-bios_accuracy": 0.50125, + "eval_bias-bios_bleu_score": 15.98849001010628, + "eval_bias-bios_bleu_score_sem": 0.7974510881576483, + "eval_bias-bios_emb_cos_sim": 0.8551180958747864, + "eval_bias-bios_emb_cos_sim_sem": 0.00377614433385282, + "eval_bias-bios_emb_top1_equal": 0.2720000147819519, + "eval_bias-bios_emb_top1_equal_sem": 0.019920483557355567, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.8715966939926147, + "eval_bias-bios_n_ngrams_match_1": 18.124, + "eval_bias-bios_n_ngrams_match_2": 8.164, + "eval_bias-bios_n_ngrams_match_3": 4.542, + "eval_bias-bios_num_pred_words": 28.884, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.498664498307737, + "eval_bias-bios_pred_num_tokens": 38.703125, + "eval_bias-bios_rouge_score": 0.5264331775686338, + "eval_bias-bios_runtime": 7.7438, + "eval_bias-bios_samples_per_second": 64.568, + "eval_bias-bios_steps_per_second": 0.129, + "eval_bias-bios_token_set_f1": 0.5371140795022766, + "eval_bias-bios_token_set_f1_sem": 0.006801180559363128, + "eval_bias-bios_token_set_precision": 0.46791660140110625, + "eval_bias-bios_token_set_recall": 0.6506526258419852, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 682 + }, + { + "epoch": 3.01, + "learning_rate": 0.001, + "loss": 1.9015, + "step": 684 + }, + { + "epoch": 3.07, + "learning_rate": 0.001, + "loss": 2.1372, + "step": 696 + }, + { + "epoch": 3.12, + "learning_rate": 0.001, + "loss": 2.0761, + "step": 708 + }, + { + "epoch": 3.14, + "eval_ag_news_accuracy": 0.30365625, + "eval_ag_news_bleu_score": 4.807231978089563, + "eval_ag_news_bleu_score_sem": 0.16163813088315626, + "eval_ag_news_emb_cos_sim": 0.8174247145652771, + "eval_ag_news_emb_cos_sim_sem": 0.004546760847432024, + "eval_ag_news_emb_top1_equal": 0.25999999046325684, + "eval_ag_news_emb_top1_equal_sem": 0.0196359666629192, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5345346927642822, + "eval_ag_news_n_ngrams_match_1": 13.404, + "eval_ag_news_n_ngrams_match_2": 2.846, + "eval_ag_news_n_ngrams_match_3": 0.77, + "eval_ag_news_num_pred_words": 41.124, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.27906070777844, + "eval_ag_news_pred_num_tokens": 61.5234375, + "eval_ag_news_rouge_score": 0.36701003791655307, + "eval_ag_news_runtime": 7.3142, + "eval_ag_news_samples_per_second": 68.36, + "eval_ag_news_steps_per_second": 0.137, + "eval_ag_news_token_set_f1": 0.35534880626682386, + "eval_ag_news_token_set_f1_sem": 0.004803456660972898, + "eval_ag_news_token_set_precision": 0.32871916471570295, + "eval_ag_news_token_set_recall": 0.4079597792220003, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 713 + }, + { + "epoch": 3.14, + "eval_anthropic_toxic_prompts_accuracy": 0.10734375, + "eval_anthropic_toxic_prompts_bleu_score": 3.7004674765467946, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1351453047196849, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.693345308303833, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004463090638695075, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12200000137090683, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014651325247908655, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.01737380027771, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.18, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.95, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.726, + "eval_anthropic_toxic_prompts_num_pred_words": 40.516, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.43754806129512, + "eval_anthropic_toxic_prompts_pred_num_tokens": 59.2265625, + "eval_anthropic_toxic_prompts_rouge_score": 0.24905914379520377, + "eval_anthropic_toxic_prompts_runtime": 7.0211, + "eval_anthropic_toxic_prompts_samples_per_second": 71.214, + "eval_anthropic_toxic_prompts_steps_per_second": 0.142, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34231952265276866, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00582109173922676, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4466406316128299, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3017256420993679, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 713 + }, + { + "epoch": 3.14, + "eval_arxiv_accuracy": 0.42584375, + "eval_arxiv_bleu_score": 4.510228645858232, + "eval_arxiv_bleu_score_sem": 0.12525631185548242, + "eval_arxiv_emb_cos_sim": 0.7616528272628784, + "eval_arxiv_emb_cos_sim_sem": 0.004894787345734036, + "eval_arxiv_emb_top1_equal": 0.2540000081062317, + "eval_arxiv_emb_top1_equal_sem": 0.019486597059300604, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.996361017227173, + "eval_arxiv_n_ngrams_match_1": 15.418, + "eval_arxiv_n_ngrams_match_2": 2.996, + "eval_arxiv_n_ngrams_match_3": 0.676, + "eval_arxiv_num_pred_words": 37.702, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.012578827478347, + "eval_arxiv_pred_num_tokens": 62.390625, + "eval_arxiv_rouge_score": 0.37442979613112276, + "eval_arxiv_runtime": 99.6345, + "eval_arxiv_samples_per_second": 5.018, + "eval_arxiv_steps_per_second": 0.01, + "eval_arxiv_token_set_f1": 0.3760448173627775, + "eval_arxiv_token_set_f1_sem": 0.004571537213954523, + "eval_arxiv_token_set_precision": 0.32504903646266936, + "eval_arxiv_token_set_recall": 0.4649487924076311, + "eval_arxiv_true_num_tokens": 64.0, + "step": 713 + }, + { + "epoch": 3.14, + "eval_python_code_alpaca_accuracy": 0.15446875, + "eval_python_code_alpaca_bleu_score": 5.5939644522079615, + "eval_python_code_alpaca_bleu_score_sem": 0.17768506193955427, + "eval_python_code_alpaca_emb_cos_sim": 0.7780863642692566, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038236100200052118, + "eval_python_code_alpaca_emb_top1_equal": 0.1940000057220459, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017701828083634023, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.6528801918029785, + "eval_python_code_alpaca_n_ngrams_match_1": 10.01, + "eval_python_code_alpaca_n_ngrams_match_2": 2.964, + "eval_python_code_alpaca_n_ngrams_match_3": 0.988, + "eval_python_code_alpaca_num_pred_words": 36.804, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.194863755312682, + "eval_python_code_alpaca_pred_num_tokens": 59.1875, + "eval_python_code_alpaca_rouge_score": 0.39856666647945677, + "eval_python_code_alpaca_runtime": 7.0358, + "eval_python_code_alpaca_samples_per_second": 71.065, + "eval_python_code_alpaca_steps_per_second": 0.142, + "eval_python_code_alpaca_token_set_f1": 0.47840825139573334, + "eval_python_code_alpaca_token_set_f1_sem": 0.0054816918459534405, + "eval_python_code_alpaca_token_set_precision": 0.5579828664085862, + "eval_python_code_alpaca_token_set_recall": 0.4363118980299957, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 713 + }, + { + "epoch": 3.14, + "eval_wikibio_accuracy": 0.367875, + "eval_wikibio_bleu_score": 4.735244091280166, + "eval_wikibio_bleu_score_sem": 0.20963187820347412, + "eval_wikibio_emb_cos_sim": 0.6974137425422668, + "eval_wikibio_emb_cos_sim_sem": 0.007348966638509892, + "eval_wikibio_emb_top1_equal": 0.17399999499320984, + "eval_wikibio_emb_top1_equal_sem": 0.016971270884523753, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.351142644882202, + "eval_wikibio_n_ngrams_match_1": 7.906, + "eval_wikibio_n_ngrams_match_2": 2.508, + "eval_wikibio_n_ngrams_match_3": 0.934, + "eval_wikibio_num_pred_words": 30.27, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.535320760699715, + "eval_wikibio_pred_num_tokens": 62.890625, + "eval_wikibio_rouge_score": 0.2857146526105555, + "eval_wikibio_runtime": 7.1168, + "eval_wikibio_samples_per_second": 70.257, + "eval_wikibio_steps_per_second": 0.141, + "eval_wikibio_token_set_f1": 0.25992420696120655, + "eval_wikibio_token_set_f1_sem": 0.007061032205840481, + "eval_wikibio_token_set_precision": 0.25434709052883214, + "eval_wikibio_token_set_recall": 0.2923511219456792, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 713 + }, + { + "epoch": 3.14, + "eval_bias-bios_accuracy": 0.510375, + "eval_bias-bios_bleu_score": 17.46569017961834, + "eval_bias-bios_bleu_score_sem": 0.7332074185725256, + "eval_bias-bios_emb_cos_sim": 0.8748842477798462, + "eval_bias-bios_emb_cos_sim_sem": 0.0030136818929674064, + "eval_bias-bios_emb_top1_equal": 0.3619999885559082, + "eval_bias-bios_emb_top1_equal_sem": 0.02151366247912668, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8190138339996338, + "eval_bias-bios_n_ngrams_match_1": 22.302, + "eval_bias-bios_n_ngrams_match_2": 9.934, + "eval_bias-bios_n_ngrams_match_3": 5.444, + "eval_bias-bios_num_pred_words": 45.506, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.165774973073229, + "eval_bias-bios_pred_num_tokens": 61.265625, + "eval_bias-bios_rouge_score": 0.5202713511356998, + "eval_bias-bios_runtime": 7.4369, + "eval_bias-bios_samples_per_second": 67.232, + "eval_bias-bios_steps_per_second": 0.134, + "eval_bias-bios_token_set_f1": 0.549705304733179, + "eval_bias-bios_token_set_f1_sem": 0.006451127709977456, + "eval_bias-bios_token_set_precision": 0.5326632628189287, + "eval_bias-bios_token_set_recall": 0.5795277275593579, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 713 + }, + { + "epoch": 3.17, + "learning_rate": 0.001, + "loss": 1.944, + "step": 720 + }, + { + "epoch": 3.22, + "learning_rate": 0.001, + "loss": 1.7949, + "step": 732 + }, + { + "epoch": 3.28, + "learning_rate": 0.001, + "loss": 2.1684, + "step": 744 + }, + { + "epoch": 3.28, + "eval_ag_news_accuracy": 0.3044375, + "eval_ag_news_bleu_score": 4.672991899082724, + "eval_ag_news_bleu_score_sem": 0.15476185602666204, + "eval_ag_news_emb_cos_sim": 0.811370849609375, + "eval_ag_news_emb_cos_sim_sem": 0.004829103278056701, + "eval_ag_news_emb_top1_equal": 0.25600001215934753, + "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.504905939102173, + "eval_ag_news_n_ngrams_match_1": 13.25, + "eval_ag_news_n_ngrams_match_2": 2.822, + "eval_ag_news_n_ngrams_match_3": 0.754, + "eval_ag_news_num_pred_words": 41.12, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.27831351713739, + "eval_ag_news_pred_num_tokens": 60.546875, + "eval_ag_news_rouge_score": 0.3606610416794005, + "eval_ag_news_runtime": 7.4579, + "eval_ag_news_samples_per_second": 67.043, + "eval_ag_news_steps_per_second": 0.134, + "eval_ag_news_token_set_f1": 0.3520361867777557, + "eval_ag_news_token_set_f1_sem": 0.004946594020772925, + "eval_ag_news_token_set_precision": 0.3243223940123956, + "eval_ag_news_token_set_recall": 0.4096605761331192, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 744 + }, + { + "epoch": 3.28, + "eval_anthropic_toxic_prompts_accuracy": 0.10796875, + "eval_anthropic_toxic_prompts_bleu_score": 3.9505558874336377, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14533239487420707, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.691646933555603, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00474973001772851, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14800000190734863, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015896458012572223, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0150554180145264, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.154, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.818, + "eval_anthropic_toxic_prompts_num_pred_words": 40.01, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.390220894796755, + "eval_anthropic_toxic_prompts_pred_num_tokens": 55.5703125, + "eval_anthropic_toxic_prompts_rouge_score": 0.2475437197594741, + "eval_anthropic_toxic_prompts_runtime": 7.0734, + "eval_anthropic_toxic_prompts_samples_per_second": 70.687, + "eval_anthropic_toxic_prompts_steps_per_second": 0.141, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3445415672624848, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005892741461353698, + "eval_anthropic_toxic_prompts_token_set_precision": 0.44540730751231317, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3069377172835802, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 744 + }, + { + "epoch": 3.28, + "eval_arxiv_accuracy": 0.42346875, + "eval_arxiv_bleu_score": 4.406566856808188, + "eval_arxiv_bleu_score_sem": 0.1332551318523246, + "eval_arxiv_emb_cos_sim": 0.7289456129074097, + "eval_arxiv_emb_cos_sim_sem": 0.00676433508684941, + "eval_arxiv_emb_top1_equal": 0.25200000405311584, + "eval_arxiv_emb_top1_equal_sem": 0.019435728067390842, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.984241008758545, + "eval_arxiv_n_ngrams_match_1": 14.726, + "eval_arxiv_n_ngrams_match_2": 2.96, + "eval_arxiv_n_ngrams_match_3": 0.69, + "eval_arxiv_num_pred_words": 37.278, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.77149015221476, + "eval_arxiv_pred_num_tokens": 61.8984375, + "eval_arxiv_rouge_score": 0.35180234388079, + "eval_arxiv_runtime": 8.2469, + "eval_arxiv_samples_per_second": 60.629, + "eval_arxiv_steps_per_second": 0.121, + "eval_arxiv_token_set_f1": 0.35925759294812704, + "eval_arxiv_token_set_f1_sem": 0.005295346068408739, + "eval_arxiv_token_set_precision": 0.30474853688198655, + "eval_arxiv_token_set_recall": 0.4687242650845372, + "eval_arxiv_true_num_tokens": 64.0, + "step": 744 + }, + { + "epoch": 3.28, + "eval_python_code_alpaca_accuracy": 0.15509375, + "eval_python_code_alpaca_bleu_score": 6.1177832646981205, + "eval_python_code_alpaca_bleu_score_sem": 0.2048795479105771, + "eval_python_code_alpaca_emb_cos_sim": 0.7686617374420166, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004338874310152252, + "eval_python_code_alpaca_emb_top1_equal": 0.15800000727176666, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01632805076118194, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.619371175765991, + "eval_python_code_alpaca_n_ngrams_match_1": 9.736, + "eval_python_code_alpaca_n_ngrams_match_2": 2.998, + "eval_python_code_alpaca_n_ngrams_match_3": 1.076, + "eval_python_code_alpaca_num_pred_words": 35.354, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.727088944336183, + "eval_python_code_alpaca_pred_num_tokens": 55.4296875, + "eval_python_code_alpaca_rouge_score": 0.4021383557670485, + "eval_python_code_alpaca_runtime": 7.0106, + "eval_python_code_alpaca_samples_per_second": 71.32, + "eval_python_code_alpaca_steps_per_second": 0.143, + "eval_python_code_alpaca_token_set_f1": 0.4800895200151073, + "eval_python_code_alpaca_token_set_f1_sem": 0.005615133377042818, + "eval_python_code_alpaca_token_set_precision": 0.5409584793979987, + "eval_python_code_alpaca_token_set_recall": 0.4499633268060651, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 744 + }, + { + "epoch": 3.28, + "eval_wikibio_accuracy": 0.37146875, + "eval_wikibio_bleu_score": 4.702581855515065, + "eval_wikibio_bleu_score_sem": 0.20529887964322238, + "eval_wikibio_emb_cos_sim": 0.682021975517273, + "eval_wikibio_emb_cos_sim_sem": 0.007582080088086151, + "eval_wikibio_emb_top1_equal": 0.164000004529953, + "eval_wikibio_emb_top1_equal_sem": 0.016575811686878626, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.2913637161254883, + "eval_wikibio_n_ngrams_match_1": 7.786, + "eval_wikibio_n_ngrams_match_2": 2.476, + "eval_wikibio_n_ngrams_match_3": 0.928, + "eval_wikibio_num_pred_words": 29.428, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 26.879494673398067, + "eval_wikibio_pred_num_tokens": 62.5703125, + "eval_wikibio_rouge_score": 0.2758099282727248, + "eval_wikibio_runtime": 7.0912, + "eval_wikibio_samples_per_second": 70.51, + "eval_wikibio_steps_per_second": 0.141, + "eval_wikibio_token_set_f1": 0.2562598491451561, + "eval_wikibio_token_set_f1_sem": 0.007338948588135056, + "eval_wikibio_token_set_precision": 0.24709290983369253, + "eval_wikibio_token_set_recall": 0.3000468657914441, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 744 + }, + { + "epoch": 3.28, + "eval_bias-bios_accuracy": 0.5049375, + "eval_bias-bios_bleu_score": 17.635346680899122, + "eval_bias-bios_bleu_score_sem": 0.7664231815693775, + "eval_bias-bios_emb_cos_sim": 0.8713113069534302, + "eval_bias-bios_emb_cos_sim_sem": 0.003266782016966041, + "eval_bias-bios_emb_top1_equal": 0.33000001311302185, + "eval_bias-bios_emb_top1_equal_sem": 0.021049612042986412, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8327958583831787, + "eval_bias-bios_n_ngrams_match_1": 22.168, + "eval_bias-bios_n_ngrams_match_2": 9.896, + "eval_bias-bios_n_ngrams_match_3": 5.414, + "eval_bias-bios_num_pred_words": 45.57, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.251340110119005, + "eval_bias-bios_pred_num_tokens": 60.21875, + "eval_bias-bios_rouge_score": 0.5185499712129331, + "eval_bias-bios_runtime": 22.9658, + "eval_bias-bios_samples_per_second": 21.771, + "eval_bias-bios_steps_per_second": 0.044, + "eval_bias-bios_token_set_f1": 0.5517670294522569, + "eval_bias-bios_token_set_f1_sem": 0.00664155954121532, + "eval_bias-bios_token_set_precision": 0.5298568658456634, + "eval_bias-bios_token_set_recall": 0.58644952362815, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 744 + }, + { + "epoch": 3.33, + "learning_rate": 0.001, + "loss": 2.0783, + "step": 756 + }, + { + "epoch": 3.38, + "learning_rate": 0.001, + "loss": 1.9614, + "step": 768 + }, + { + "epoch": 3.41, + "eval_ag_news_accuracy": 0.3060625, + "eval_ag_news_bleu_score": 4.830644842559454, + "eval_ag_news_bleu_score_sem": 0.17131361214852292, + "eval_ag_news_emb_cos_sim": 0.8080878257751465, + "eval_ag_news_emb_cos_sim_sem": 0.004762135390418219, + "eval_ag_news_emb_top1_equal": 0.2639999985694885, + "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5362191200256348, + "eval_ag_news_n_ngrams_match_1": 12.318, + "eval_ag_news_n_ngrams_match_2": 2.566, + "eval_ag_news_n_ngrams_match_3": 0.756, + "eval_ag_news_num_pred_words": 33.394, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.33684994935138, + "eval_ag_news_pred_num_tokens": 50.8125, + "eval_ag_news_rouge_score": 0.3686651207110883, + "eval_ag_news_runtime": 54.0329, + "eval_ag_news_samples_per_second": 9.254, + "eval_ag_news_steps_per_second": 0.019, + "eval_ag_news_token_set_f1": 0.35017067187233036, + "eval_ag_news_token_set_f1_sem": 0.004965635312749522, + "eval_ag_news_token_set_precision": 0.30866475239635843, + "eval_ag_news_token_set_recall": 0.4222894602236413, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 775 + }, + { + "epoch": 3.41, + "eval_anthropic_toxic_prompts_accuracy": 0.11034375, + "eval_anthropic_toxic_prompts_bleu_score": 4.791208765914185, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.17235693520941445, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6980757117271423, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004599339155822628, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016328049428381567, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.8975107669830322, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.794, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.804, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67, + "eval_anthropic_toxic_prompts_num_pred_words": 29.974, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 18.128961946099636, + "eval_anthropic_toxic_prompts_pred_num_tokens": 43.9765625, + "eval_anthropic_toxic_prompts_rouge_score": 0.28998494944399944, + "eval_anthropic_toxic_prompts_runtime": 6.7811, + "eval_anthropic_toxic_prompts_samples_per_second": 73.734, + "eval_anthropic_toxic_prompts_steps_per_second": 0.147, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3506330942661759, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006079127723106288, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43022796801569213, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3216893300248196, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 775 + }, + { + "epoch": 3.41, + "eval_arxiv_accuracy": 0.41875, + "eval_arxiv_bleu_score": 3.985202336909602, + "eval_arxiv_bleu_score_sem": 0.11427823441579887, + "eval_arxiv_emb_cos_sim": 0.7555274367332458, + "eval_arxiv_emb_cos_sim_sem": 0.0048518951640682, + "eval_arxiv_emb_top1_equal": 0.20399999618530273, + "eval_arxiv_emb_top1_equal_sem": 0.018039369108186407, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0569183826446533, + "eval_arxiv_n_ngrams_match_1": 14.21, + "eval_arxiv_n_ngrams_match_2": 2.628, + "eval_arxiv_n_ngrams_match_3": 0.562, + "eval_arxiv_num_pred_words": 32.458, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.26193495492643, + "eval_arxiv_pred_num_tokens": 54.359375, + "eval_arxiv_rouge_score": 0.37090033558555063, + "eval_arxiv_runtime": 10.0922, + "eval_arxiv_samples_per_second": 49.543, + "eval_arxiv_steps_per_second": 0.099, + "eval_arxiv_token_set_f1": 0.3694772950313383, + "eval_arxiv_token_set_f1_sem": 0.004405857439219603, + "eval_arxiv_token_set_precision": 0.3106585602925783, + "eval_arxiv_token_set_recall": 0.4722938585818135, + "eval_arxiv_true_num_tokens": 64.0, + "step": 775 + }, + { + "epoch": 3.41, + "eval_python_code_alpaca_accuracy": 0.15715625, + "eval_python_code_alpaca_bleu_score": 6.703733250427161, + "eval_python_code_alpaca_bleu_score_sem": 0.21109497746566985, + "eval_python_code_alpaca_emb_cos_sim": 0.7801447510719299, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004055824828928678, + "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.5180015563964844, + "eval_python_code_alpaca_n_ngrams_match_1": 9.252, + "eval_python_code_alpaca_n_ngrams_match_2": 2.58, + "eval_python_code_alpaca_n_ngrams_match_3": 0.83, + "eval_python_code_alpaca_num_pred_words": 28.04, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.403783616070726, + "eval_python_code_alpaca_pred_num_tokens": 45.625, + "eval_python_code_alpaca_rouge_score": 0.4425684197842046, + "eval_python_code_alpaca_runtime": 7.0427, + "eval_python_code_alpaca_samples_per_second": 70.996, + "eval_python_code_alpaca_steps_per_second": 0.142, + "eval_python_code_alpaca_token_set_f1": 0.48194826632417753, + "eval_python_code_alpaca_token_set_f1_sem": 0.005645163528078008, + "eval_python_code_alpaca_token_set_precision": 0.5236863218849204, + "eval_python_code_alpaca_token_set_recall": 0.465454270189137, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 775 + }, + { + "epoch": 3.41, + "eval_wikibio_accuracy": 0.3623125, + "eval_wikibio_bleu_score": 5.491105598896529, + "eval_wikibio_bleu_score_sem": 0.22105851156678893, + "eval_wikibio_emb_cos_sim": 0.7249027490615845, + "eval_wikibio_emb_cos_sim_sem": 0.006267459110275117, + "eval_wikibio_emb_top1_equal": 0.16599999368190765, + "eval_wikibio_emb_top1_equal_sem": 0.016656615375209204, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.428741693496704, + "eval_wikibio_n_ngrams_match_1": 8.588, + "eval_wikibio_n_ngrams_match_2": 2.652, + "eval_wikibio_n_ngrams_match_3": 0.99, + "eval_wikibio_num_pred_words": 30.438, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 30.837814903094237, + "eval_wikibio_pred_num_tokens": 57.0234375, + "eval_wikibio_rouge_score": 0.32513790206198945, + "eval_wikibio_runtime": 7.7874, + "eval_wikibio_samples_per_second": 64.206, + "eval_wikibio_steps_per_second": 0.128, + "eval_wikibio_token_set_f1": 0.28688274851414197, + "eval_wikibio_token_set_f1_sem": 0.006164553502671263, + "eval_wikibio_token_set_precision": 0.2807405610348686, + "eval_wikibio_token_set_recall": 0.31940677840867493, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 775 + }, + { + "epoch": 3.41, + "eval_bias-bios_accuracy": 0.51146875, + "eval_bias-bios_bleu_score": 18.121918930666734, + "eval_bias-bios_bleu_score_sem": 0.8110035460979876, + "eval_bias-bios_emb_cos_sim": 0.8704509139060974, + "eval_bias-bios_emb_cos_sim_sem": 0.003329327339760428, + "eval_bias-bios_emb_top1_equal": 0.35199999809265137, + "eval_bias-bios_emb_top1_equal_sem": 0.021380041244738194, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.806106686592102, + "eval_bias-bios_n_ngrams_match_1": 21.02, + "eval_bias-bios_n_ngrams_match_2": 9.28, + "eval_bias-bios_n_ngrams_match_3": 5.16, + "eval_bias-bios_num_pred_words": 37.962, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.086703796015009, + "eval_bias-bios_pred_num_tokens": 51.6171875, + "eval_bias-bios_rouge_score": 0.5405338845092296, + "eval_bias-bios_runtime": 7.2575, + "eval_bias-bios_samples_per_second": 68.894, + "eval_bias-bios_steps_per_second": 0.138, + "eval_bias-bios_token_set_f1": 0.5530509264634874, + "eval_bias-bios_token_set_f1_sem": 0.006721981375685361, + "eval_bias-bios_token_set_precision": 0.5179105484815628, + "eval_bias-bios_token_set_recall": 0.6050661590454904, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 775 + }, + { + "epoch": 3.44, + "learning_rate": 0.001, + "loss": 1.7502, + "step": 780 + }, + { + "epoch": 3.49, + "learning_rate": 0.001, + "loss": 2.136, + "step": 792 + }, + { + "epoch": 3.54, + "learning_rate": 0.001, + "loss": 2.0723, + "step": 804 + }, + { + "epoch": 3.55, + "eval_ag_news_accuracy": 0.304625, + "eval_ag_news_bleu_score": 4.60205913529834, + "eval_ag_news_bleu_score_sem": 0.15505346124207306, + "eval_ag_news_emb_cos_sim": 0.8124366998672485, + "eval_ag_news_emb_cos_sim_sem": 0.004842483927420761, + "eval_ag_news_emb_top1_equal": 0.30000001192092896, + "eval_ag_news_emb_top1_equal_sem": 0.020514426052435274, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5086352825164795, + "eval_ag_news_n_ngrams_match_1": 13.326, + "eval_ag_news_n_ngrams_match_2": 2.786, + "eval_ag_news_n_ngrams_match_3": 0.78, + "eval_ag_news_num_pred_words": 42.364, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.40265148186903, + "eval_ag_news_pred_num_tokens": 62.515625, + "eval_ag_news_rouge_score": 0.3569843557545479, + "eval_ag_news_runtime": 7.0297, + "eval_ag_news_samples_per_second": 71.126, + "eval_ag_news_steps_per_second": 0.142, + "eval_ag_news_token_set_f1": 0.3495832375018209, + "eval_ag_news_token_set_f1_sem": 0.004923836215222782, + "eval_ag_news_token_set_precision": 0.3228266549890825, + "eval_ag_news_token_set_recall": 0.4072312643510978, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 806 + }, + { + "epoch": 3.55, + "eval_anthropic_toxic_prompts_accuracy": 0.10778125, + "eval_anthropic_toxic_prompts_bleu_score": 3.478540847435448, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11949226015249044, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6928165555000305, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004549284504941945, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14399999380111694, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01571693380047095, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.05265212059021, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.078, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.922, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.728, + "eval_anthropic_toxic_prompts_num_pred_words": 42.112, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 21.17141918790496, + "eval_anthropic_toxic_prompts_pred_num_tokens": 60.5078125, + "eval_anthropic_toxic_prompts_rouge_score": 0.23727029553517592, + "eval_anthropic_toxic_prompts_runtime": 6.8213, + "eval_anthropic_toxic_prompts_samples_per_second": 73.3, + "eval_anthropic_toxic_prompts_steps_per_second": 0.147, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3390444125571501, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005674077469671755, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4443096392091491, + "eval_anthropic_toxic_prompts_token_set_recall": 0.29979884045938265, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 806 + }, + { + "epoch": 3.55, + "eval_arxiv_accuracy": 0.42653125, + "eval_arxiv_bleu_score": 4.436794308460483, + "eval_arxiv_bleu_score_sem": 0.1341657588866233, + "eval_arxiv_emb_cos_sim": 0.7514486312866211, + "eval_arxiv_emb_cos_sim_sem": 0.005709252325248086, + "eval_arxiv_emb_top1_equal": 0.27399998903274536, + "eval_arxiv_emb_top1_equal_sem": 0.01996610531418925, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.9720091819763184, + "eval_arxiv_n_ngrams_match_1": 14.996, + "eval_arxiv_n_ngrams_match_2": 2.906, + "eval_arxiv_n_ngrams_match_3": 0.702, + "eval_arxiv_num_pred_words": 38.318, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.53112177853892, + "eval_arxiv_pred_num_tokens": 62.4609375, + "eval_arxiv_rouge_score": 0.3604203239944716, + "eval_arxiv_runtime": 7.3771, + "eval_arxiv_samples_per_second": 67.777, + "eval_arxiv_steps_per_second": 0.136, + "eval_arxiv_token_set_f1": 0.36417605653305596, + "eval_arxiv_token_set_f1_sem": 0.004806454033055331, + "eval_arxiv_token_set_precision": 0.31203181590774637, + "eval_arxiv_token_set_recall": 0.4612931928641072, + "eval_arxiv_true_num_tokens": 64.0, + "step": 806 + }, + { + "epoch": 3.55, + "eval_python_code_alpaca_accuracy": 0.15625, + "eval_python_code_alpaca_bleu_score": 5.248670371367343, + "eval_python_code_alpaca_bleu_score_sem": 0.1658923075864523, + "eval_python_code_alpaca_emb_cos_sim": 0.772881031036377, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004222808389102189, + "eval_python_code_alpaca_emb_top1_equal": 0.1940000057220459, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017701828083634023, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.6592369079589844, + "eval_python_code_alpaca_n_ngrams_match_1": 9.83, + "eval_python_code_alpaca_n_ngrams_match_2": 2.906, + "eval_python_code_alpaca_n_ngrams_match_3": 0.95, + "eval_python_code_alpaca_num_pred_words": 37.892, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.285383875623985, + "eval_python_code_alpaca_pred_num_tokens": 60.4453125, + "eval_python_code_alpaca_rouge_score": 0.38629347615812193, + "eval_python_code_alpaca_runtime": 6.9202, + "eval_python_code_alpaca_samples_per_second": 72.253, + "eval_python_code_alpaca_steps_per_second": 0.145, + "eval_python_code_alpaca_token_set_f1": 0.4710159508399305, + "eval_python_code_alpaca_token_set_f1_sem": 0.005425578193334968, + "eval_python_code_alpaca_token_set_precision": 0.5464500732328734, + "eval_python_code_alpaca_token_set_recall": 0.43287514402648203, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 806 + }, + { + "epoch": 3.55, + "eval_wikibio_accuracy": 0.3736875, + "eval_wikibio_bleu_score": 4.874848732011922, + "eval_wikibio_bleu_score_sem": 0.2031192765037749, + "eval_wikibio_emb_cos_sim": 0.7125859260559082, + "eval_wikibio_emb_cos_sim_sem": 0.00642388589187894, + "eval_wikibio_emb_top1_equal": 0.17599999904632568, + "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.30753231048584, + "eval_wikibio_n_ngrams_match_1": 8.302, + "eval_wikibio_n_ngrams_match_2": 2.6, + "eval_wikibio_n_ngrams_match_3": 0.98, + "eval_wikibio_num_pred_words": 31.694, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.31763079713352, + "eval_wikibio_pred_num_tokens": 62.9765625, + "eval_wikibio_rouge_score": 0.2901026002628322, + "eval_wikibio_runtime": 7.0902, + "eval_wikibio_samples_per_second": 70.52, + "eval_wikibio_steps_per_second": 0.141, + "eval_wikibio_token_set_f1": 0.2677244947313496, + "eval_wikibio_token_set_f1_sem": 0.006927357451301914, + "eval_wikibio_token_set_precision": 0.26264790463960996, + "eval_wikibio_token_set_recall": 0.30136590727706775, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 806 + }, + { + "epoch": 3.55, + "eval_bias-bios_accuracy": 0.50834375, + "eval_bias-bios_bleu_score": 17.11151424904348, + "eval_bias-bios_bleu_score_sem": 0.7161483293889718, + "eval_bias-bios_emb_cos_sim": 0.8768417239189148, + "eval_bias-bios_emb_cos_sim_sem": 0.003176824322460384, + "eval_bias-bios_emb_top1_equal": 0.335999995470047, + "eval_bias-bios_emb_top1_equal_sem": 0.02114479131616093, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8117848634719849, + "eval_bias-bios_n_ngrams_match_1": 22.56, + "eval_bias-bios_n_ngrams_match_2": 10.072, + "eval_bias-bios_n_ngrams_match_3": 5.464, + "eval_bias-bios_num_pred_words": 46.834, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.121363485584354, + "eval_bias-bios_pred_num_tokens": 62.03125, + "eval_bias-bios_rouge_score": 0.5183632183459099, + "eval_bias-bios_runtime": 7.4291, + "eval_bias-bios_samples_per_second": 67.303, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.5541207654751751, + "eval_bias-bios_token_set_f1_sem": 0.006455986166626027, + "eval_bias-bios_token_set_precision": 0.5388610772243735, + "eval_bias-bios_token_set_recall": 0.5830024266618952, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 806 + }, + { + "epoch": 3.59, + "learning_rate": 0.001, + "loss": 1.9926, + "step": 816 + }, + { + "epoch": 3.65, + "learning_rate": 0.001, + "loss": 1.7553, + "step": 828 + }, + { + "epoch": 3.69, + "eval_ag_news_accuracy": 0.3098125, + "eval_ag_news_bleu_score": 4.778759162593388, + "eval_ag_news_bleu_score_sem": 0.16529183512727746, + "eval_ag_news_emb_cos_sim": 0.8075339794158936, + "eval_ag_news_emb_cos_sim_sem": 0.004170562947605222, + "eval_ag_news_emb_top1_equal": 0.2720000147819519, + "eval_ag_news_emb_top1_equal_sem": 0.019920483557355567, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5137295722961426, + "eval_ag_news_n_ngrams_match_1": 11.836, + "eval_ag_news_n_ngrams_match_2": 2.59, + "eval_ag_news_n_ngrams_match_3": 0.718, + "eval_ag_news_num_pred_words": 30.43, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.57324843414079, + "eval_ag_news_pred_num_tokens": 44.484375, + "eval_ag_news_rouge_score": 0.3698284591121457, + "eval_ag_news_runtime": 25.8981, + "eval_ag_news_samples_per_second": 19.306, + "eval_ag_news_steps_per_second": 0.039, + "eval_ag_news_token_set_f1": 0.3519000375758839, + "eval_ag_news_token_set_f1_sem": 0.004968601342353017, + "eval_ag_news_token_set_precision": 0.2999630348624532, + "eval_ag_news_token_set_recall": 0.4435569510076943, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 837 + }, + { + "epoch": 3.69, + "eval_anthropic_toxic_prompts_accuracy": 0.1125625, + "eval_anthropic_toxic_prompts_bleu_score": 6.020973366102066, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.23054669884648044, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7090463042259216, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0047895847473398935, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.164000004529953, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016575810354078253, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.860866069793701, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.784, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.862, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, + "eval_anthropic_toxic_prompts_num_pred_words": 25.654, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 17.476656388246514, + "eval_anthropic_toxic_prompts_pred_num_tokens": 37.1171875, + "eval_anthropic_toxic_prompts_rouge_score": 0.32144459354531985, + "eval_anthropic_toxic_prompts_runtime": 32.6489, + "eval_anthropic_toxic_prompts_samples_per_second": 15.314, + "eval_anthropic_toxic_prompts_steps_per_second": 0.031, + "eval_anthropic_toxic_prompts_token_set_f1": 0.36752908789633254, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006169528972758457, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4319967039477071, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3459659996987827, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 837 + }, + { + "epoch": 3.69, + "eval_arxiv_accuracy": 0.4183125, + "eval_arxiv_bleu_score": 3.5945334417121058, + "eval_arxiv_bleu_score_sem": 0.10744192704090952, + "eval_arxiv_emb_cos_sim": 0.7484161853790283, + "eval_arxiv_emb_cos_sim_sem": 0.0052398406995996846, + "eval_arxiv_emb_top1_equal": 0.18000000715255737, + "eval_arxiv_emb_top1_equal_sem": 0.017198593316470962, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0348353385925293, + "eval_arxiv_n_ngrams_match_1": 13.398, + "eval_arxiv_n_ngrams_match_2": 2.454, + "eval_arxiv_n_ngrams_match_3": 0.532, + "eval_arxiv_num_pred_words": 29.392, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.797553063154922, + "eval_arxiv_pred_num_tokens": 48.25, + "eval_arxiv_rouge_score": 0.3667639714517909, + "eval_arxiv_runtime": 7.4071, + "eval_arxiv_samples_per_second": 67.503, + "eval_arxiv_steps_per_second": 0.135, + "eval_arxiv_token_set_f1": 0.3633731359269907, + "eval_arxiv_token_set_f1_sem": 0.004638444139085674, + "eval_arxiv_token_set_precision": 0.29672680849330163, + "eval_arxiv_token_set_recall": 0.48394511783078614, + "eval_arxiv_true_num_tokens": 64.0, + "step": 837 + }, + { + "epoch": 3.69, + "eval_python_code_alpaca_accuracy": 0.16496875, + "eval_python_code_alpaca_bleu_score": 8.204287617402592, + "eval_python_code_alpaca_bleu_score_sem": 0.2525555146312472, + "eval_python_code_alpaca_emb_cos_sim": 0.8038115501403809, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037339978539967537, + "eval_python_code_alpaca_emb_top1_equal": 0.20399999618530273, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01803936777538603, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.432777166366577, + "eval_python_code_alpaca_n_ngrams_match_1": 9.406, + "eval_python_code_alpaca_n_ngrams_match_2": 2.684, + "eval_python_code_alpaca_n_ngrams_match_3": 0.878, + "eval_python_code_alpaca_num_pred_words": 23.662, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.390471429412177, + "eval_python_code_alpaca_pred_num_tokens": 36.5546875, + "eval_python_code_alpaca_rouge_score": 0.4847810986828926, + "eval_python_code_alpaca_runtime": 7.0241, + "eval_python_code_alpaca_samples_per_second": 71.184, + "eval_python_code_alpaca_steps_per_second": 0.142, + "eval_python_code_alpaca_token_set_f1": 0.5182590446946109, + "eval_python_code_alpaca_token_set_f1_sem": 0.00562349793252812, + "eval_python_code_alpaca_token_set_precision": 0.5384421834239288, + "eval_python_code_alpaca_token_set_recall": 0.5156545489627503, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 837 + }, + { + "epoch": 3.69, + "eval_wikibio_accuracy": 0.3656875, + "eval_wikibio_bleu_score": 5.745106458262442, + "eval_wikibio_bleu_score_sem": 0.22901795042124162, + "eval_wikibio_emb_cos_sim": 0.7403951287269592, + "eval_wikibio_emb_cos_sim_sem": 0.005469197984519312, + "eval_wikibio_emb_top1_equal": 0.17599999904632568, + "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3659920692443848, + "eval_wikibio_n_ngrams_match_1": 8.816, + "eval_wikibio_n_ngrams_match_2": 2.794, + "eval_wikibio_n_ngrams_match_3": 1.048, + "eval_wikibio_num_pred_words": 30.122, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.962215574756698, + "eval_wikibio_pred_num_tokens": 55.6640625, + "eval_wikibio_rouge_score": 0.34001650158245367, + "eval_wikibio_runtime": 7.2558, + "eval_wikibio_samples_per_second": 68.91, + "eval_wikibio_steps_per_second": 0.138, + "eval_wikibio_token_set_f1": 0.29675582828204017, + "eval_wikibio_token_set_f1_sem": 0.0062596510158436265, + "eval_wikibio_token_set_precision": 0.2891007207276478, + "eval_wikibio_token_set_recall": 0.3230393369655439, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 837 + }, + { + "epoch": 3.69, + "eval_bias-bios_accuracy": 0.50478125, + "eval_bias-bios_bleu_score": 17.50427548811937, + "eval_bias-bios_bleu_score_sem": 0.8169224344931361, + "eval_bias-bios_emb_cos_sim": 0.8618428111076355, + "eval_bias-bios_emb_cos_sim_sem": 0.0035489455191403843, + "eval_bias-bios_emb_top1_equal": 0.28200000524520874, + "eval_bias-bios_emb_top1_equal_sem": 0.02014357168251164, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8244915008544922, + "eval_bias-bios_n_ngrams_match_1": 19.316, + "eval_bias-bios_n_ngrams_match_2": 8.598, + "eval_bias-bios_n_ngrams_match_3": 4.876, + "eval_bias-bios_num_pred_words": 32.384, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.199641704934617, + "eval_bias-bios_pred_num_tokens": 43.8203125, + "eval_bias-bios_rouge_score": 0.5388605533551043, + "eval_bias-bios_runtime": 7.3982, + "eval_bias-bios_samples_per_second": 67.584, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.5473429726394254, + "eval_bias-bios_token_set_f1_sem": 0.006850989831666622, + "eval_bias-bios_token_set_precision": 0.48906374168199385, + "eval_bias-bios_token_set_recall": 0.6374659787111201, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 837 + }, + { + "epoch": 3.7, + "learning_rate": 0.001, + "loss": 2.0441, + "step": 840 + }, + { + "epoch": 3.75, + "learning_rate": 0.001, + "loss": 2.0718, + "step": 852 + }, + { + "epoch": 3.81, + "learning_rate": 0.001, + "loss": 2.0151, + "step": 864 + }, + { + "epoch": 3.82, + "eval_ag_news_accuracy": 0.3065, + "eval_ag_news_bleu_score": 4.649906244640959, + "eval_ag_news_bleu_score_sem": 0.15743135612355671, + "eval_ag_news_emb_cos_sim": 0.8059832453727722, + "eval_ag_news_emb_cos_sim_sem": 0.004423913304874249, + "eval_ag_news_emb_top1_equal": 0.24799999594688416, + "eval_ag_news_emb_top1_equal_sem": 0.01933234140950753, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5127530097961426, + "eval_ag_news_n_ngrams_match_1": 12.54, + "eval_ag_news_n_ngrams_match_2": 2.6, + "eval_ag_news_n_ngrams_match_3": 0.696, + "eval_ag_news_num_pred_words": 35.938, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.54047806247922, + "eval_ag_news_pred_num_tokens": 53.4375, + "eval_ag_news_rouge_score": 0.36159762379126176, + "eval_ag_news_runtime": 7.3824, + "eval_ag_news_samples_per_second": 67.729, + "eval_ag_news_steps_per_second": 0.135, + "eval_ag_news_token_set_f1": 0.3494382517083244, + "eval_ag_news_token_set_f1_sem": 0.004868961435080832, + "eval_ag_news_token_set_precision": 0.3109381587573618, + "eval_ag_news_token_set_recall": 0.41859546357524646, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 868 + }, + { + "epoch": 3.82, + "eval_anthropic_toxic_prompts_accuracy": 0.10953125, + "eval_anthropic_toxic_prompts_bleu_score": 4.555906116852318, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.16559701222456658, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6958400011062622, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004614752992158636, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.164000004529953, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016575810354078253, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.9055869579315186, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.9, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.804, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.654, + "eval_anthropic_toxic_prompts_num_pred_words": 31.814, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 18.27596772882499, + "eval_anthropic_toxic_prompts_pred_num_tokens": 46.8984375, + "eval_anthropic_toxic_prompts_rouge_score": 0.2803638376890342, + "eval_anthropic_toxic_prompts_runtime": 8.594, + "eval_anthropic_toxic_prompts_samples_per_second": 58.18, + "eval_anthropic_toxic_prompts_steps_per_second": 0.116, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35203804284307416, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0059340870185835256, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43291347429398824, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3207922059739797, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 868 + }, + { + "epoch": 3.82, + "eval_arxiv_accuracy": 0.421, + "eval_arxiv_bleu_score": 3.9870535349349185, + "eval_arxiv_bleu_score_sem": 0.11244166499753491, + "eval_arxiv_emb_cos_sim": 0.7511023879051208, + "eval_arxiv_emb_cos_sim_sem": 0.005155412127449404, + "eval_arxiv_emb_top1_equal": 0.20600000023841858, + "eval_arxiv_emb_top1_equal_sem": 0.018104793612990725, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.038604736328125, + "eval_arxiv_n_ngrams_match_1": 14.278, + "eval_arxiv_n_ngrams_match_2": 2.6, + "eval_arxiv_n_ngrams_match_3": 0.544, + "eval_arxiv_num_pred_words": 33.842, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.876095247947585, + "eval_arxiv_pred_num_tokens": 55.46875, + "eval_arxiv_rouge_score": 0.3653415985199304, + "eval_arxiv_runtime": 7.2009, + "eval_arxiv_samples_per_second": 69.436, + "eval_arxiv_steps_per_second": 0.139, + "eval_arxiv_token_set_f1": 0.3664645739316681, + "eval_arxiv_token_set_f1_sem": 0.0044187143685006655, + "eval_arxiv_token_set_precision": 0.3076602694776184, + "eval_arxiv_token_set_recall": 0.4728561521285042, + "eval_arxiv_true_num_tokens": 64.0, + "step": 868 + }, + { + "epoch": 3.82, + "eval_python_code_alpaca_accuracy": 0.15759375, + "eval_python_code_alpaca_bleu_score": 6.532033427224206, + "eval_python_code_alpaca_bleu_score_sem": 0.21411555875924942, + "eval_python_code_alpaca_emb_cos_sim": 0.7827064394950867, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038833028167971654, + "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.5585899353027344, + "eval_python_code_alpaca_n_ngrams_match_1": 9.566, + "eval_python_code_alpaca_n_ngrams_match_2": 2.766, + "eval_python_code_alpaca_n_ngrams_match_3": 0.938, + "eval_python_code_alpaca_num_pred_words": 30.694, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.917589830203067, + "eval_python_code_alpaca_pred_num_tokens": 46.953125, + "eval_python_code_alpaca_rouge_score": 0.4305157812205427, + "eval_python_code_alpaca_runtime": 6.8722, + "eval_python_code_alpaca_samples_per_second": 72.757, + "eval_python_code_alpaca_steps_per_second": 0.146, + "eval_python_code_alpaca_token_set_f1": 0.49402210084007936, + "eval_python_code_alpaca_token_set_f1_sem": 0.005532471827537771, + "eval_python_code_alpaca_token_set_precision": 0.539240847548361, + "eval_python_code_alpaca_token_set_recall": 0.47278297846068446, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 868 + }, + { + "epoch": 3.82, + "eval_wikibio_accuracy": 0.36740625, + "eval_wikibio_bleu_score": 5.1254217873397945, + "eval_wikibio_bleu_score_sem": 0.21410165833187295, + "eval_wikibio_emb_cos_sim": 0.7113275527954102, + "eval_wikibio_emb_cos_sim_sem": 0.0068368860752584535, + "eval_wikibio_emb_top1_equal": 0.15600000321865082, + "eval_wikibio_emb_top1_equal_sem": 0.01624363651663569, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.370696783065796, + "eval_wikibio_n_ngrams_match_1": 8.212, + "eval_wikibio_n_ngrams_match_2": 2.554, + "eval_wikibio_n_ngrams_match_3": 0.912, + "eval_wikibio_num_pred_words": 29.378, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 29.098795543578024, + "eval_wikibio_pred_num_tokens": 58.8515625, + "eval_wikibio_rouge_score": 0.30634676609525147, + "eval_wikibio_runtime": 7.0087, + "eval_wikibio_samples_per_second": 71.34, + "eval_wikibio_steps_per_second": 0.143, + "eval_wikibio_token_set_f1": 0.2760021469603922, + "eval_wikibio_token_set_f1_sem": 0.006655398358510342, + "eval_wikibio_token_set_precision": 0.2669645781576738, + "eval_wikibio_token_set_recall": 0.3113305789003813, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 868 + }, + { + "epoch": 3.82, + "eval_bias-bios_accuracy": 0.51403125, + "eval_bias-bios_bleu_score": 18.309380581053627, + "eval_bias-bios_bleu_score_sem": 0.7998042611430283, + "eval_bias-bios_emb_cos_sim": 0.8731557130813599, + "eval_bias-bios_emb_cos_sim_sem": 0.0032644259591032673, + "eval_bias-bios_emb_top1_equal": 0.30399999022483826, + "eval_bias-bios_emb_top1_equal_sem": 0.020591649838958805, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.78922438621521, + "eval_bias-bios_n_ngrams_match_1": 21.338, + "eval_bias-bios_n_ngrams_match_2": 9.45, + "eval_bias-bios_n_ngrams_match_3": 5.204, + "eval_bias-bios_num_pred_words": 39.284, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.984808765578816, + "eval_bias-bios_pred_num_tokens": 53.078125, + "eval_bias-bios_rouge_score": 0.5363434462187412, + "eval_bias-bios_runtime": 7.3659, + "eval_bias-bios_samples_per_second": 67.88, + "eval_bias-bios_steps_per_second": 0.136, + "eval_bias-bios_token_set_f1": 0.5546380520588752, + "eval_bias-bios_token_set_f1_sem": 0.006561511146551417, + "eval_bias-bios_token_set_precision": 0.519204443494721, + "eval_bias-bios_token_set_recall": 0.608071169462653, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 868 + }, + { + "epoch": 3.86, + "learning_rate": 0.001, + "loss": 1.8108, + "step": 876 + }, + { + "epoch": 3.91, + "learning_rate": 0.001, + "loss": 1.9904, + "step": 888 + }, + { + "epoch": 3.96, + "eval_ag_news_accuracy": 0.30365625, + "eval_ag_news_bleu_score": 4.70485632940363, + "eval_ag_news_bleu_score_sem": 0.1545656516010875, + "eval_ag_news_emb_cos_sim": 0.8099994659423828, + "eval_ag_news_emb_cos_sim_sem": 0.004615151499470739, + "eval_ag_news_emb_top1_equal": 0.26600000262260437, + "eval_ag_news_emb_top1_equal_sem": 0.01978055817719369, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.51027250289917, + "eval_ag_news_n_ngrams_match_1": 13.276, + "eval_ag_news_n_ngrams_match_2": 2.766, + "eval_ag_news_n_ngrams_match_3": 0.722, + "eval_ag_news_num_pred_words": 41.458, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.457383775898954, + "eval_ag_news_pred_num_tokens": 61.1484375, + "eval_ag_news_rouge_score": 0.3576016763421582, + "eval_ag_news_runtime": 7.2607, + "eval_ag_news_samples_per_second": 68.864, + "eval_ag_news_steps_per_second": 0.138, + "eval_ag_news_token_set_f1": 0.3483609027032103, + "eval_ag_news_token_set_f1_sem": 0.004683975440046473, + "eval_ag_news_token_set_precision": 0.32529803711985894, + "eval_ag_news_token_set_recall": 0.3941194852067757, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 899 + }, + { + "epoch": 3.96, + "eval_anthropic_toxic_prompts_accuracy": 0.10740625, + "eval_anthropic_toxic_prompts_bleu_score": 3.6769418625327526, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.133806955295666, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6916981339454651, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00461967735634389, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055010489467818, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.042083978652954, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.084, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.976, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788, + "eval_anthropic_toxic_prompts_num_pred_words": 40.926, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.9488547426713, + "eval_anthropic_toxic_prompts_pred_num_tokens": 57.859375, + "eval_anthropic_toxic_prompts_rouge_score": 0.2433747464426228, + "eval_anthropic_toxic_prompts_runtime": 47.0283, + "eval_anthropic_toxic_prompts_samples_per_second": 10.632, + "eval_anthropic_toxic_prompts_steps_per_second": 0.021, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3355777043880606, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00582801703663308, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4417152321391377, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2954359544567566, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 899 + }, + { + "epoch": 3.96, + "eval_arxiv_accuracy": 0.42559375, + "eval_arxiv_bleu_score": 4.2724707434332805, + "eval_arxiv_bleu_score_sem": 0.12141510014680776, + "eval_arxiv_emb_cos_sim": 0.7448193430900574, + "eval_arxiv_emb_cos_sim_sem": 0.005906011645397703, + "eval_arxiv_emb_top1_equal": 0.257999986410141, + "eval_arxiv_emb_top1_equal_sem": 0.019586711692263472, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.988633871078491, + "eval_arxiv_n_ngrams_match_1": 14.812, + "eval_arxiv_n_ngrams_match_2": 2.814, + "eval_arxiv_n_ngrams_match_3": 0.622, + "eval_arxiv_num_pred_words": 37.656, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.858534633548246, + "eval_arxiv_pred_num_tokens": 61.75, + "eval_arxiv_rouge_score": 0.3571695577267662, + "eval_arxiv_runtime": 11.6894, + "eval_arxiv_samples_per_second": 42.774, + "eval_arxiv_steps_per_second": 0.086, + "eval_arxiv_token_set_f1": 0.3610709746435564, + "eval_arxiv_token_set_f1_sem": 0.00493953188589629, + "eval_arxiv_token_set_precision": 0.31268383336107497, + "eval_arxiv_token_set_recall": 0.4499879942866487, + "eval_arxiv_true_num_tokens": 64.0, + "step": 899 + }, + { + "epoch": 3.96, + "eval_python_code_alpaca_accuracy": 0.153, + "eval_python_code_alpaca_bleu_score": 5.200061694757475, + "eval_python_code_alpaca_bleu_score_sem": 0.17449763706947194, + "eval_python_code_alpaca_emb_cos_sim": 0.7678459882736206, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037719813318817415, + "eval_python_code_alpaca_emb_top1_equal": 0.1979999989271164, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017838958581409683, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7135307788848877, + "eval_python_code_alpaca_n_ngrams_match_1": 9.646, + "eval_python_code_alpaca_n_ngrams_match_2": 2.78, + "eval_python_code_alpaca_n_ngrams_match_3": 0.904, + "eval_python_code_alpaca_num_pred_words": 37.516, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.082434354347528, + "eval_python_code_alpaca_pred_num_tokens": 58.8828125, + "eval_python_code_alpaca_rouge_score": 0.3818168407275595, + "eval_python_code_alpaca_runtime": 7.6206, + "eval_python_code_alpaca_samples_per_second": 65.611, + "eval_python_code_alpaca_steps_per_second": 0.131, + "eval_python_code_alpaca_token_set_f1": 0.46320618527919866, + "eval_python_code_alpaca_token_set_f1_sem": 0.0054899797669887875, + "eval_python_code_alpaca_token_set_precision": 0.536733945796312, + "eval_python_code_alpaca_token_set_recall": 0.42535964460210746, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 899 + }, + { + "epoch": 3.96, + "eval_wikibio_accuracy": 0.371625, + "eval_wikibio_bleu_score": 4.961991532761174, + "eval_wikibio_bleu_score_sem": 0.19391037642899922, + "eval_wikibio_emb_cos_sim": 0.710414469242096, + "eval_wikibio_emb_cos_sim_sem": 0.006885463983323721, + "eval_wikibio_emb_top1_equal": 0.1679999977350235, + "eval_wikibio_emb_top1_equal_sem": 0.016736554076096456, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3331379890441895, + "eval_wikibio_n_ngrams_match_1": 8.558, + "eval_wikibio_n_ngrams_match_2": 2.624, + "eval_wikibio_n_ngrams_match_3": 0.958, + "eval_wikibio_num_pred_words": 32.418, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.02614961148761, + "eval_wikibio_pred_num_tokens": 62.796875, + "eval_wikibio_rouge_score": 0.301097672734814, + "eval_wikibio_runtime": 7.0138, + "eval_wikibio_samples_per_second": 71.288, + "eval_wikibio_steps_per_second": 0.143, + "eval_wikibio_token_set_f1": 0.27633025678840173, + "eval_wikibio_token_set_f1_sem": 0.0066326804022172215, + "eval_wikibio_token_set_precision": 0.27362565948405515, + "eval_wikibio_token_set_recall": 0.30748851633911084, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 899 + }, + { + "epoch": 3.96, + "eval_bias-bios_accuracy": 0.5096875, + "eval_bias-bios_bleu_score": 17.551685060770186, + "eval_bias-bios_bleu_score_sem": 0.7535367337734199, + "eval_bias-bios_emb_cos_sim": 0.8744969964027405, + "eval_bias-bios_emb_cos_sim_sem": 0.0031596961716421285, + "eval_bias-bios_emb_top1_equal": 0.33799999952316284, + "eval_bias-bios_emb_top1_equal_sem": 0.02117566563684607, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8095989227294922, + "eval_bias-bios_n_ngrams_match_1": 22.328, + "eval_bias-bios_n_ngrams_match_2": 9.944, + "eval_bias-bios_n_ngrams_match_3": 5.458, + "eval_bias-bios_num_pred_words": 45.576, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.107997162059592, + "eval_bias-bios_pred_num_tokens": 60.453125, + "eval_bias-bios_rouge_score": 0.5208868368337067, + "eval_bias-bios_runtime": 7.3719, + "eval_bias-bios_samples_per_second": 67.825, + "eval_bias-bios_steps_per_second": 0.136, + "eval_bias-bios_token_set_f1": 0.5481558626317612, + "eval_bias-bios_token_set_f1_sem": 0.0065506123204596995, + "eval_bias-bios_token_set_precision": 0.5357022644757441, + "eval_bias-bios_token_set_recall": 0.5720472461620488, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 899 + }, + { + "epoch": 3.96, + "learning_rate": 0.001, + "loss": 2.0314, + "step": 900 + }, + { + "epoch": 4.02, + "learning_rate": 0.001, + "loss": 1.879, + "step": 912 + }, + { + "epoch": 4.07, + "learning_rate": 0.001, + "loss": 2.1048, + "step": 924 + }, + { + "epoch": 4.1, + "eval_ag_news_accuracy": 0.30309375, + "eval_ag_news_bleu_score": 4.649942117770666, + "eval_ag_news_bleu_score_sem": 0.1556338609252372, + "eval_ag_news_emb_cos_sim": 0.8118283152580261, + "eval_ag_news_emb_cos_sim_sem": 0.004582791772767747, + "eval_ag_news_emb_top1_equal": 0.2639999985694885, + "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.52583646774292, + "eval_ag_news_n_ngrams_match_1": 13.382, + "eval_ag_news_n_ngrams_match_2": 2.81, + "eval_ag_news_n_ngrams_match_3": 0.714, + "eval_ag_news_num_pred_words": 41.96, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.98218673429963, + "eval_ag_news_pred_num_tokens": 62.3828125, + "eval_ag_news_rouge_score": 0.3575831135786729, + "eval_ag_news_runtime": 7.3077, + "eval_ag_news_samples_per_second": 68.421, + "eval_ag_news_steps_per_second": 0.137, + "eval_ag_news_token_set_f1": 0.3497080600987024, + "eval_ag_news_token_set_f1_sem": 0.004748773824398774, + "eval_ag_news_token_set_precision": 0.32643981583812415, + "eval_ag_news_token_set_recall": 0.40006084076159165, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 930 + }, + { + "epoch": 4.1, + "eval_anthropic_toxic_prompts_accuracy": 0.10728125, + "eval_anthropic_toxic_prompts_bleu_score": 3.624649536553562, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12608760768171234, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.698235034942627, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004364685988613237, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.06230092048645, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.266, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768, + "eval_anthropic_toxic_prompts_num_pred_words": 42.214, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 21.376686675103247, + "eval_anthropic_toxic_prompts_pred_num_tokens": 60.625, + "eval_anthropic_toxic_prompts_rouge_score": 0.2454785616967755, + "eval_anthropic_toxic_prompts_runtime": 7.1632, + "eval_anthropic_toxic_prompts_samples_per_second": 69.801, + "eval_anthropic_toxic_prompts_steps_per_second": 0.14, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3365336026554279, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005508481783304389, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4560143105044075, + "eval_anthropic_toxic_prompts_token_set_recall": 0.28870884584713874, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 930 + }, + { + "epoch": 4.1, + "eval_arxiv_accuracy": 0.42790625, + "eval_arxiv_bleu_score": 4.381704552162308, + "eval_arxiv_bleu_score_sem": 0.12756008834149637, + "eval_arxiv_emb_cos_sim": 0.7607366442680359, + "eval_arxiv_emb_cos_sim_sem": 0.005076740919718705, + "eval_arxiv_emb_top1_equal": 0.28600001335144043, + "eval_arxiv_emb_top1_equal_sem": 0.020229345383440313, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.9774320125579834, + "eval_arxiv_n_ngrams_match_1": 15.328, + "eval_arxiv_n_ngrams_match_2": 2.892, + "eval_arxiv_n_ngrams_match_3": 0.634, + "eval_arxiv_num_pred_words": 38.774, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.63732343956472, + "eval_arxiv_pred_num_tokens": 62.8515625, + "eval_arxiv_rouge_score": 0.36766133482315777, + "eval_arxiv_runtime": 7.3221, + "eval_arxiv_samples_per_second": 68.287, + "eval_arxiv_steps_per_second": 0.137, + "eval_arxiv_token_set_f1": 0.37072349741716376, + "eval_arxiv_token_set_f1_sem": 0.004570708041320284, + "eval_arxiv_token_set_precision": 0.31969291145012907, + "eval_arxiv_token_set_recall": 0.4626230168900101, + "eval_arxiv_true_num_tokens": 64.0, + "step": 930 + }, + { + "epoch": 4.1, + "eval_python_code_alpaca_accuracy": 0.15403125, + "eval_python_code_alpaca_bleu_score": 5.122233724835081, + "eval_python_code_alpaca_bleu_score_sem": 0.15628240003499672, + "eval_python_code_alpaca_emb_cos_sim": 0.777225136756897, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003568612987872965, + "eval_python_code_alpaca_emb_top1_equal": 0.17599999904632568, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017047853594066943, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.705463409423828, + "eval_python_code_alpaca_n_ngrams_match_1": 9.852, + "eval_python_code_alpaca_n_ngrams_match_2": 2.842, + "eval_python_code_alpaca_n_ngrams_match_3": 0.954, + "eval_python_code_alpaca_num_pred_words": 38.956, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.961248267762413, + "eval_python_code_alpaca_pred_num_tokens": 61.1015625, + "eval_python_code_alpaca_rouge_score": 0.38312828213368866, + "eval_python_code_alpaca_runtime": 9.9902, + "eval_python_code_alpaca_samples_per_second": 50.049, + "eval_python_code_alpaca_steps_per_second": 0.1, + "eval_python_code_alpaca_token_set_f1": 0.46499631424575827, + "eval_python_code_alpaca_token_set_f1_sem": 0.00515758919249843, + "eval_python_code_alpaca_token_set_precision": 0.5498324800101624, + "eval_python_code_alpaca_token_set_recall": 0.420151556893869, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 930 + }, + { + "epoch": 4.1, + "eval_wikibio_accuracy": 0.3703125, + "eval_wikibio_bleu_score": 5.0295081586948, + "eval_wikibio_bleu_score_sem": 0.2129816091894679, + "eval_wikibio_emb_cos_sim": 0.7173275351524353, + "eval_wikibio_emb_cos_sim_sem": 0.006135841074850147, + "eval_wikibio_emb_top1_equal": 0.1459999978542328, + "eval_wikibio_emb_top1_equal_sem": 0.015807205702664997, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.323803186416626, + "eval_wikibio_n_ngrams_match_1": 8.628, + "eval_wikibio_n_ngrams_match_2": 2.744, + "eval_wikibio_n_ngrams_match_3": 0.998, + "eval_wikibio_num_pred_words": 31.958, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.76574832467491, + "eval_wikibio_pred_num_tokens": 62.890625, + "eval_wikibio_rouge_score": 0.3016818815426825, + "eval_wikibio_runtime": 7.2876, + "eval_wikibio_samples_per_second": 68.61, + "eval_wikibio_steps_per_second": 0.137, + "eval_wikibio_token_set_f1": 0.2809928597513778, + "eval_wikibio_token_set_f1_sem": 0.006686818517156172, + "eval_wikibio_token_set_precision": 0.2752523175790538, + "eval_wikibio_token_set_recall": 0.31501951298363845, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 930 + }, + { + "epoch": 4.1, + "eval_bias-bios_accuracy": 0.51159375, + "eval_bias-bios_bleu_score": 17.560732357402014, + "eval_bias-bios_bleu_score_sem": 0.7532225324274616, + "eval_bias-bios_emb_cos_sim": 0.8785954713821411, + "eval_bias-bios_emb_cos_sim_sem": 0.0027169508826951332, + "eval_bias-bios_emb_top1_equal": 0.36000001430511475, + "eval_bias-bios_emb_top1_equal_sem": 0.021487751507037762, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7983323335647583, + "eval_bias-bios_n_ngrams_match_1": 22.614, + "eval_bias-bios_n_ngrams_match_2": 10.156, + "eval_bias-bios_n_ngrams_match_3": 5.602, + "eval_bias-bios_num_pred_words": 46.644, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.0395670780886865, + "eval_bias-bios_pred_num_tokens": 62.125, + "eval_bias-bios_rouge_score": 0.523223725376486, + "eval_bias-bios_runtime": 7.4795, + "eval_bias-bios_samples_per_second": 66.85, + "eval_bias-bios_steps_per_second": 0.134, + "eval_bias-bios_token_set_f1": 0.5539285540120094, + "eval_bias-bios_token_set_f1_sem": 0.0064311476985358445, + "eval_bias-bios_token_set_precision": 0.5429808276328268, + "eval_bias-bios_token_set_recall": 0.5750748467740818, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 930 + }, + { + "epoch": 4.12, + "learning_rate": 0.001, + "loss": 2.0386, + "step": 936 + }, + { + "epoch": 4.18, + "learning_rate": 0.001, + "loss": 1.8774, + "step": 948 + }, + { + "epoch": 4.23, + "learning_rate": 0.001, + "loss": 1.769, + "step": 960 + }, + { + "epoch": 4.23, + "eval_ag_news_accuracy": 0.30234375, + "eval_ag_news_bleu_score": 4.147391091922028, + "eval_ag_news_bleu_score_sem": 0.15238698424262714, + "eval_ag_news_emb_cos_sim": 0.7980059385299683, + "eval_ag_news_emb_cos_sim_sem": 0.004560917853014478, + "eval_ag_news_emb_top1_equal": 0.27000001072883606, + "eval_ag_news_emb_top1_equal_sem": 0.019874356669179787, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.63006329536438, + "eval_ag_news_n_ngrams_match_1": 10.884, + "eval_ag_news_n_ngrams_match_2": 2.304, + "eval_ag_news_n_ngrams_match_3": 0.594, + "eval_ag_news_num_pred_words": 26.08, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 37.715203739197406, + "eval_ag_news_pred_num_tokens": 38.3046875, + "eval_ag_news_rouge_score": 0.36068826779136964, + "eval_ag_news_runtime": 7.2741, + "eval_ag_news_samples_per_second": 68.737, + "eval_ag_news_steps_per_second": 0.137, + "eval_ag_news_token_set_f1": 0.34376003652816817, + "eval_ag_news_token_set_f1_sem": 0.004950570396033107, + "eval_ag_news_token_set_precision": 0.285037592368919, + "eval_ag_news_token_set_recall": 0.45211257290244156, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 961 + }, + { + "epoch": 4.23, + "eval_anthropic_toxic_prompts_accuracy": 0.11275, + "eval_anthropic_toxic_prompts_bleu_score": 6.354488424332666, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.23522810593773286, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7129065990447998, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004523031336356766, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14800000190734863, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015896458012572223, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.8465847969055176, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.62, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.794, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694, + "eval_anthropic_toxic_prompts_num_pred_words": 22.834, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 17.22884125884587, + "eval_anthropic_toxic_prompts_pred_num_tokens": 32.046875, + "eval_anthropic_toxic_prompts_rouge_score": 0.33907338462194764, + "eval_anthropic_toxic_prompts_runtime": 6.8732, + "eval_anthropic_toxic_prompts_samples_per_second": 72.747, + "eval_anthropic_toxic_prompts_steps_per_second": 0.145, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3653409896107974, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006149537629688382, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4229459544561599, + "eval_anthropic_toxic_prompts_token_set_recall": 0.34831732469949417, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 961 + }, + { + "epoch": 4.23, + "eval_arxiv_accuracy": 0.4078125, + "eval_arxiv_bleu_score": 3.3433365195319773, + "eval_arxiv_bleu_score_sem": 0.0999958730659908, + "eval_arxiv_emb_cos_sim": 0.748033881187439, + "eval_arxiv_emb_cos_sim_sem": 0.004871760220456363, + "eval_arxiv_emb_top1_equal": 0.17000000178813934, + "eval_arxiv_emb_top1_equal_sem": 0.016815633120741882, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.133113384246826, + "eval_arxiv_n_ngrams_match_1": 12.732, + "eval_arxiv_n_ngrams_match_2": 2.336, + "eval_arxiv_n_ngrams_match_3": 0.508, + "eval_arxiv_num_pred_words": 26.09, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.945306005739177, + "eval_arxiv_pred_num_tokens": 41.4140625, + "eval_arxiv_rouge_score": 0.3638914135343809, + "eval_arxiv_runtime": 52.9647, + "eval_arxiv_samples_per_second": 9.44, + "eval_arxiv_steps_per_second": 0.019, + "eval_arxiv_token_set_f1": 0.3630713709509955, + "eval_arxiv_token_set_f1_sem": 0.0044595886226552126, + "eval_arxiv_token_set_precision": 0.2937237459695398, + "eval_arxiv_token_set_recall": 0.4890969392190314, + "eval_arxiv_true_num_tokens": 64.0, + "step": 961 + }, + { + "epoch": 4.23, + "eval_python_code_alpaca_accuracy": 0.16284375, + "eval_python_code_alpaca_bleu_score": 8.244648427163419, + "eval_python_code_alpaca_bleu_score_sem": 0.27193364945369436, + "eval_python_code_alpaca_emb_cos_sim": 0.7976054549217224, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.00378482253029405, + "eval_python_code_alpaca_emb_top1_equal": 0.2160000056028366, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018421909471797383, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.4601211547851562, + "eval_python_code_alpaca_n_ngrams_match_1": 8.85, + "eval_python_code_alpaca_n_ngrams_match_2": 2.302, + "eval_python_code_alpaca_n_ngrams_match_3": 0.768, + "eval_python_code_alpaca_num_pred_words": 21.24, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.706229719816177, + "eval_python_code_alpaca_pred_num_tokens": 32.0703125, + "eval_python_code_alpaca_rouge_score": 0.4873818187316993, + "eval_python_code_alpaca_runtime": 6.9102, + "eval_python_code_alpaca_samples_per_second": 72.357, + "eval_python_code_alpaca_steps_per_second": 0.145, + "eval_python_code_alpaca_token_set_f1": 0.5018531295912411, + "eval_python_code_alpaca_token_set_f1_sem": 0.005550867674810159, + "eval_python_code_alpaca_token_set_precision": 0.5139247919672837, + "eval_python_code_alpaca_token_set_recall": 0.5075546170413242, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 961 + }, + { + "epoch": 4.23, + "eval_wikibio_accuracy": 0.35096875, + "eval_wikibio_bleu_score": 6.137347588754137, + "eval_wikibio_bleu_score_sem": 0.24090041823655922, + "eval_wikibio_emb_cos_sim": 0.7398593425750732, + "eval_wikibio_emb_cos_sim_sem": 0.0057534999648952215, + "eval_wikibio_emb_top1_equal": 0.1679999977350235, + "eval_wikibio_emb_top1_equal_sem": 0.016736554076096456, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.5328280925750732, + "eval_wikibio_n_ngrams_match_1": 8.75, + "eval_wikibio_n_ngrams_match_2": 2.724, + "eval_wikibio_n_ngrams_match_3": 1.0, + "eval_wikibio_num_pred_words": 27.262, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 34.22060994651517, + "eval_wikibio_pred_num_tokens": 48.1953125, + "eval_wikibio_rouge_score": 0.3567907867484968, + "eval_wikibio_runtime": 8.1224, + "eval_wikibio_samples_per_second": 61.558, + "eval_wikibio_steps_per_second": 0.123, + "eval_wikibio_token_set_f1": 0.3103392604417974, + "eval_wikibio_token_set_f1_sem": 0.005936015862774939, + "eval_wikibio_token_set_precision": 0.29465665802898156, + "eval_wikibio_token_set_recall": 0.34358783712012403, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 961 + }, + { + "epoch": 4.23, + "eval_bias-bios_accuracy": 0.506, + "eval_bias-bios_bleu_score": 16.979589075313555, + "eval_bias-bios_bleu_score_sem": 0.8283281982214029, + "eval_bias-bios_emb_cos_sim": 0.8586215376853943, + "eval_bias-bios_emb_cos_sim_sem": 0.003312062910103837, + "eval_bias-bios_emb_top1_equal": 0.28200000524520874, + "eval_bias-bios_emb_top1_equal_sem": 0.02014357434811239, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.8640848398208618, + "eval_bias-bios_n_ngrams_match_1": 18.688, + "eval_bias-bios_n_ngrams_match_2": 8.448, + "eval_bias-bios_n_ngrams_match_3": 4.798, + "eval_bias-bios_num_pred_words": 29.634, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.45003037320653, + "eval_bias-bios_pred_num_tokens": 39.71875, + "eval_bias-bios_rouge_score": 0.5397636174739893, + "eval_bias-bios_runtime": 7.1597, + "eval_bias-bios_samples_per_second": 69.836, + "eval_bias-bios_steps_per_second": 0.14, + "eval_bias-bios_token_set_f1": 0.5489658834860061, + "eval_bias-bios_token_set_f1_sem": 0.006750833075212532, + "eval_bias-bios_token_set_precision": 0.48311541662210883, + "eval_bias-bios_token_set_recall": 0.6515866266764235, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 961 + }, + { + "epoch": 4.28, + "learning_rate": 0.001, + "loss": 2.1033, + "step": 972 + }, + { + "epoch": 4.33, + "learning_rate": 0.001, + "loss": 2.0322, + "step": 984 + }, + { + "epoch": 4.37, + "eval_ag_news_accuracy": 0.302625, + "eval_ag_news_bleu_score": 4.721562311279806, + "eval_ag_news_bleu_score_sem": 0.15220658679469298, + "eval_ag_news_emb_cos_sim": 0.8130433559417725, + "eval_ag_news_emb_cos_sim_sem": 0.004464113896182924, + "eval_ag_news_emb_top1_equal": 0.27399998903274536, + "eval_ag_news_emb_top1_equal_sem": 0.019966103981388875, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5447354316711426, + "eval_ag_news_n_ngrams_match_1": 13.134, + "eval_ag_news_n_ngrams_match_2": 2.762, + "eval_ag_news_n_ngrams_match_3": 0.746, + "eval_ag_news_num_pred_words": 39.284, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.63052199081744, + "eval_ag_news_pred_num_tokens": 59.6953125, + "eval_ag_news_rouge_score": 0.3646941890955627, + "eval_ag_news_runtime": 7.3132, + "eval_ag_news_samples_per_second": 68.369, + "eval_ag_news_steps_per_second": 0.137, + "eval_ag_news_token_set_f1": 0.35109915978400064, + "eval_ag_news_token_set_f1_sem": 0.0047277102046728026, + "eval_ag_news_token_set_precision": 0.3224770229379847, + "eval_ag_news_token_set_recall": 0.4050023141334169, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 992 + }, + { + "epoch": 4.37, + "eval_anthropic_toxic_prompts_accuracy": 0.10625, + "eval_anthropic_toxic_prompts_bleu_score": 3.857535866643261, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1364815254506636, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6963840126991272, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004629406799080843, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15399999916553497, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016158283980625493, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0389821529388428, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.056, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.866, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.698, + "eval_anthropic_toxic_prompts_num_pred_words": 37.574, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.883975720078084, + "eval_anthropic_toxic_prompts_pred_num_tokens": 55.6796875, + "eval_anthropic_toxic_prompts_rouge_score": 0.25629776628720247, + "eval_anthropic_toxic_prompts_runtime": 6.8668, + "eval_anthropic_toxic_prompts_samples_per_second": 72.815, + "eval_anthropic_toxic_prompts_steps_per_second": 0.146, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34453358271344703, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005626971181626931, + "eval_anthropic_toxic_prompts_token_set_precision": 0.443931261076189, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3034545321130603, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 992 + }, + { + "epoch": 4.37, + "eval_arxiv_accuracy": 0.42146875, + "eval_arxiv_bleu_score": 4.467109503010037, + "eval_arxiv_bleu_score_sem": 0.12513951968969245, + "eval_arxiv_emb_cos_sim": 0.7589080929756165, + "eval_arxiv_emb_cos_sim_sem": 0.0052892959235117465, + "eval_arxiv_emb_top1_equal": 0.2759999930858612, + "eval_arxiv_emb_top1_equal_sem": 0.02001121794127971, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0264053344726562, + "eval_arxiv_n_ngrams_match_1": 15.18, + "eval_arxiv_n_ngrams_match_2": 2.974, + "eval_arxiv_n_ngrams_match_3": 0.668, + "eval_arxiv_num_pred_words": 36.86, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.622966521695513, + "eval_arxiv_pred_num_tokens": 59.8984375, + "eval_arxiv_rouge_score": 0.3693511442153703, + "eval_arxiv_runtime": 7.306, + "eval_arxiv_samples_per_second": 68.436, + "eval_arxiv_steps_per_second": 0.137, + "eval_arxiv_token_set_f1": 0.36958194757201546, + "eval_arxiv_token_set_f1_sem": 0.0045242802112849066, + "eval_arxiv_token_set_precision": 0.32082253661436355, + "eval_arxiv_token_set_recall": 0.4518457470546488, + "eval_arxiv_true_num_tokens": 64.0, + "step": 992 + }, + { + "epoch": 4.37, + "eval_python_code_alpaca_accuracy": 0.1514375, + "eval_python_code_alpaca_bleu_score": 5.4034348745502045, + "eval_python_code_alpaca_bleu_score_sem": 0.16839146425841325, + "eval_python_code_alpaca_emb_cos_sim": 0.7750210762023926, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003707060625619144, + "eval_python_code_alpaca_emb_top1_equal": 0.20999999344348907, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018233622097230975, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.709430694580078, + "eval_python_code_alpaca_n_ngrams_match_1": 9.768, + "eval_python_code_alpaca_n_ngrams_match_2": 2.758, + "eval_python_code_alpaca_n_ngrams_match_3": 0.88, + "eval_python_code_alpaca_num_pred_words": 35.508, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.02072170196434, + "eval_python_code_alpaca_pred_num_tokens": 56.53125, + "eval_python_code_alpaca_rouge_score": 0.39803901244698126, + "eval_python_code_alpaca_runtime": 6.8684, + "eval_python_code_alpaca_samples_per_second": 72.797, + "eval_python_code_alpaca_steps_per_second": 0.146, + "eval_python_code_alpaca_token_set_f1": 0.47170758684628605, + "eval_python_code_alpaca_token_set_f1_sem": 0.005351354049134305, + "eval_python_code_alpaca_token_set_precision": 0.5434103449240332, + "eval_python_code_alpaca_token_set_recall": 0.43454752980053024, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 992 + }, + { + "epoch": 4.37, + "eval_wikibio_accuracy": 0.36184375, + "eval_wikibio_bleu_score": 5.358936275570008, + "eval_wikibio_bleu_score_sem": 0.21306544598498858, + "eval_wikibio_emb_cos_sim": 0.7233623266220093, + "eval_wikibio_emb_cos_sim_sem": 0.006553808605227006, + "eval_wikibio_emb_top1_equal": 0.18799999356269836, + "eval_wikibio_emb_top1_equal_sem": 0.017490679184236527, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4013521671295166, + "eval_wikibio_n_ngrams_match_1": 8.566, + "eval_wikibio_n_ngrams_match_2": 2.716, + "eval_wikibio_n_ngrams_match_3": 1.036, + "eval_wikibio_num_pred_words": 31.162, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 30.004643923417493, + "eval_wikibio_pred_num_tokens": 61.953125, + "eval_wikibio_rouge_score": 0.31111536849106736, + "eval_wikibio_runtime": 7.0887, + "eval_wikibio_samples_per_second": 70.535, + "eval_wikibio_steps_per_second": 0.141, + "eval_wikibio_token_set_f1": 0.28584804623663285, + "eval_wikibio_token_set_f1_sem": 0.006453280164360098, + "eval_wikibio_token_set_precision": 0.27848457039702645, + "eval_wikibio_token_set_recall": 0.3193362754800391, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 992 + }, + { + "epoch": 4.37, + "eval_bias-bios_accuracy": 0.51265625, + "eval_bias-bios_bleu_score": 17.964736092698793, + "eval_bias-bios_bleu_score_sem": 0.7466608259028716, + "eval_bias-bios_emb_cos_sim": 0.8767092823982239, + "eval_bias-bios_emb_cos_sim_sem": 0.0030343106439704023, + "eval_bias-bios_emb_top1_equal": 0.3580000102519989, + "eval_bias-bios_emb_top1_equal_sem": 0.021461435363634866, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7962360382080078, + "eval_bias-bios_n_ngrams_match_1": 22.106, + "eval_bias-bios_n_ngrams_match_2": 9.886, + "eval_bias-bios_n_ngrams_match_3": 5.454, + "eval_bias-bios_num_pred_words": 43.802, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.026919622698665, + "eval_bias-bios_pred_num_tokens": 59.5234375, + "eval_bias-bios_rouge_score": 0.526119687478555, + "eval_bias-bios_runtime": 7.8302, + "eval_bias-bios_samples_per_second": 63.855, + "eval_bias-bios_steps_per_second": 0.128, + "eval_bias-bios_token_set_f1": 0.5506974926133947, + "eval_bias-bios_token_set_f1_sem": 0.006418396953704471, + "eval_bias-bios_token_set_precision": 0.5331520331431275, + "eval_bias-bios_token_set_recall": 0.5807658588551965, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 992 + }, + { + "epoch": 4.39, + "learning_rate": 0.001, + "loss": 1.9006, + "step": 996 + }, + { + "epoch": 4.44, + "learning_rate": 0.001, + "loss": 1.6887, + "step": 1008 + }, + { + "epoch": 4.49, + "learning_rate": 0.001, + "loss": 2.1588, + "step": 1020 + }, + { + "epoch": 4.51, + "eval_ag_news_accuracy": 0.3020625, + "eval_ag_news_bleu_score": 4.7932275818087815, + "eval_ag_news_bleu_score_sem": 0.15605911700718159, + "eval_ag_news_emb_cos_sim": 0.8050810694694519, + "eval_ag_news_emb_cos_sim_sem": 0.004921948817774727, + "eval_ag_news_emb_top1_equal": 0.25200000405311584, + "eval_ag_news_emb_top1_equal_sem": 0.019435728067390842, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.521515130996704, + "eval_ag_news_n_ngrams_match_1": 13.042, + "eval_ag_news_n_ngrams_match_2": 2.88, + "eval_ag_news_n_ngrams_match_3": 0.808, + "eval_ag_news_num_pred_words": 40.654, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.8356550963507, + "eval_ag_news_pred_num_tokens": 60.2109375, + "eval_ag_news_rouge_score": 0.3544122061798759, + "eval_ag_news_runtime": 7.6014, + "eval_ag_news_samples_per_second": 65.777, + "eval_ag_news_steps_per_second": 0.132, + "eval_ag_news_token_set_f1": 0.34653211780222726, + "eval_ag_news_token_set_f1_sem": 0.004903302915032384, + "eval_ag_news_token_set_precision": 0.3193018972552045, + "eval_ag_news_token_set_recall": 0.40219067047503776, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1023 + }, + { + "epoch": 4.51, + "eval_anthropic_toxic_prompts_accuracy": 0.1068125, + "eval_anthropic_toxic_prompts_bleu_score": 3.862666125219136, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14053569940919364, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6820381283760071, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005041296759748043, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13199999928474426, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015152928667412809, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.027169704437256, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.89, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.836, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736, + "eval_anthropic_toxic_prompts_num_pred_words": 38.55, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.63873612402303, + "eval_anthropic_toxic_prompts_pred_num_tokens": 55.7109375, + "eval_anthropic_toxic_prompts_rouge_score": 0.24442206374960782, + "eval_anthropic_toxic_prompts_runtime": 7.0663, + "eval_anthropic_toxic_prompts_samples_per_second": 70.758, + "eval_anthropic_toxic_prompts_steps_per_second": 0.142, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3398768275342579, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005639698117516144, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4271537549157317, + "eval_anthropic_toxic_prompts_token_set_recall": 0.30831531114690275, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1023 + }, + { + "epoch": 4.51, + "eval_arxiv_accuracy": 0.42240625, + "eval_arxiv_bleu_score": 4.171253458571327, + "eval_arxiv_bleu_score_sem": 0.11767789407650925, + "eval_arxiv_emb_cos_sim": 0.7384297251701355, + "eval_arxiv_emb_cos_sim_sem": 0.005850672441030442, + "eval_arxiv_emb_top1_equal": 0.2540000081062317, + "eval_arxiv_emb_top1_equal_sem": 0.01948659572650023, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.008838415145874, + "eval_arxiv_n_ngrams_match_1": 14.468, + "eval_arxiv_n_ngrams_match_2": 2.732, + "eval_arxiv_n_ngrams_match_3": 0.598, + "eval_arxiv_num_pred_words": 36.848, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.263848070145425, + "eval_arxiv_pred_num_tokens": 61.4609375, + "eval_arxiv_rouge_score": 0.35353412039225657, + "eval_arxiv_runtime": 24.2177, + "eval_arxiv_samples_per_second": 20.646, + "eval_arxiv_steps_per_second": 0.041, + "eval_arxiv_token_set_f1": 0.35591688837586377, + "eval_arxiv_token_set_f1_sem": 0.004753441988684993, + "eval_arxiv_token_set_precision": 0.30392920024519354, + "eval_arxiv_token_set_recall": 0.45268519173881183, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1023 + }, + { + "epoch": 4.51, + "eval_python_code_alpaca_accuracy": 0.1519375, + "eval_python_code_alpaca_bleu_score": 5.7001715909469395, + "eval_python_code_alpaca_bleu_score_sem": 0.18245331772390755, + "eval_python_code_alpaca_emb_cos_sim": 0.7690838575363159, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036216657731970425, + "eval_python_code_alpaca_emb_top1_equal": 0.17599999904632568, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017047853594066943, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.6924941539764404, + "eval_python_code_alpaca_n_ngrams_match_1": 9.596, + "eval_python_code_alpaca_n_ngrams_match_2": 2.94, + "eval_python_code_alpaca_n_ngrams_match_3": 1.024, + "eval_python_code_alpaca_num_pred_words": 35.698, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.768464848072853, + "eval_python_code_alpaca_pred_num_tokens": 57.9609375, + "eval_python_code_alpaca_rouge_score": 0.39724312590614486, + "eval_python_code_alpaca_runtime": 36.425, + "eval_python_code_alpaca_samples_per_second": 13.727, + "eval_python_code_alpaca_steps_per_second": 0.027, + "eval_python_code_alpaca_token_set_f1": 0.472579092229167, + "eval_python_code_alpaca_token_set_f1_sem": 0.005334868398535928, + "eval_python_code_alpaca_token_set_precision": 0.5364848862944417, + "eval_python_code_alpaca_token_set_recall": 0.44106302354087196, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1023 + }, + { + "epoch": 4.51, + "eval_wikibio_accuracy": 0.3705, + "eval_wikibio_bleu_score": 4.88755922984987, + "eval_wikibio_bleu_score_sem": 0.20708995809366076, + "eval_wikibio_emb_cos_sim": 0.7016727924346924, + "eval_wikibio_emb_cos_sim_sem": 0.007042247955428024, + "eval_wikibio_emb_top1_equal": 0.14800000190734863, + "eval_wikibio_emb_top1_equal_sem": 0.015896458012572223, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.313309907913208, + "eval_wikibio_n_ngrams_match_1": 8.268, + "eval_wikibio_n_ngrams_match_2": 2.63, + "eval_wikibio_n_ngrams_match_3": 0.976, + "eval_wikibio_num_pred_words": 31.332, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.475917889788466, + "eval_wikibio_pred_num_tokens": 62.890625, + "eval_wikibio_rouge_score": 0.2918099611132722, + "eval_wikibio_runtime": 7.0935, + "eval_wikibio_samples_per_second": 70.487, + "eval_wikibio_steps_per_second": 0.141, + "eval_wikibio_token_set_f1": 0.2694020672814847, + "eval_wikibio_token_set_f1_sem": 0.006978672843545908, + "eval_wikibio_token_set_precision": 0.26418978779936264, + "eval_wikibio_token_set_recall": 0.30268523625472155, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1023 + }, + { + "epoch": 4.51, + "eval_bias-bios_accuracy": 0.50959375, + "eval_bias-bios_bleu_score": 17.670913883343722, + "eval_bias-bios_bleu_score_sem": 0.7618895808099503, + "eval_bias-bios_emb_cos_sim": 0.8704634308815002, + "eval_bias-bios_emb_cos_sim_sem": 0.002964730134396973, + "eval_bias-bios_emb_top1_equal": 0.32199999690055847, + "eval_bias-bios_emb_top1_equal_sem": 0.02091666653838802, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.806833267211914, + "eval_bias-bios_n_ngrams_match_1": 22.242, + "eval_bias-bios_n_ngrams_match_2": 9.992, + "eval_bias-bios_n_ngrams_match_3": 5.52, + "eval_bias-bios_num_pred_words": 45.478, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.091127884065418, + "eval_bias-bios_pred_num_tokens": 60.0625, + "eval_bias-bios_rouge_score": 0.5196810999362653, + "eval_bias-bios_runtime": 7.4704, + "eval_bias-bios_samples_per_second": 66.931, + "eval_bias-bios_steps_per_second": 0.134, + "eval_bias-bios_token_set_f1": 0.5505760399134664, + "eval_bias-bios_token_set_f1_sem": 0.006596670216275572, + "eval_bias-bios_token_set_precision": 0.5310260817494754, + "eval_bias-bios_token_set_recall": 0.5836186251248524, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1023 + }, + { + "epoch": 4.55, + "learning_rate": 0.001, + "loss": 2.0462, + "step": 1032 + }, + { + "epoch": 4.6, + "learning_rate": 0.001, + "loss": 1.9454, + "step": 1044 + }, + { + "epoch": 4.64, + "eval_ag_news_accuracy": 0.303625, + "eval_ag_news_bleu_score": 4.607424035746342, + "eval_ag_news_bleu_score_sem": 0.15785109714710252, + "eval_ag_news_emb_cos_sim": 0.8068847060203552, + "eval_ag_news_emb_cos_sim_sem": 0.004727621191909161, + "eval_ag_news_emb_top1_equal": 0.2720000147819519, + "eval_ag_news_emb_top1_equal_sem": 0.019920483557355567, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.562575101852417, + "eval_ag_news_n_ngrams_match_1": 12.09, + "eval_ag_news_n_ngrams_match_2": 2.424, + "eval_ag_news_n_ngrams_match_3": 0.66, + "eval_ag_news_num_pred_words": 32.644, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.25386264555959, + "eval_ag_news_pred_num_tokens": 49.21875, + "eval_ag_news_rouge_score": 0.36812557927211753, + "eval_ag_news_runtime": 8.3463, + "eval_ag_news_samples_per_second": 59.907, + "eval_ag_news_steps_per_second": 0.12, + "eval_ag_news_token_set_f1": 0.3474054458929426, + "eval_ag_news_token_set_f1_sem": 0.004683477639321787, + "eval_ag_news_token_set_precision": 0.30639136557472807, + "eval_ag_news_token_set_recall": 0.4190597447752051, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1054 + }, + { + "epoch": 4.64, + "eval_anthropic_toxic_prompts_accuracy": 0.110125, + "eval_anthropic_toxic_prompts_bleu_score": 5.163171339929796, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.19282672443503882, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7005923986434937, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0047202354786315135, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15000000596046448, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01598471338779901, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.9066479206085205, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.904, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.854, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.704, + "eval_anthropic_toxic_prompts_num_pred_words": 28.996, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 18.295368138206676, + "eval_anthropic_toxic_prompts_pred_num_tokens": 41.984375, + "eval_anthropic_toxic_prompts_rouge_score": 0.30157651451983636, + "eval_anthropic_toxic_prompts_runtime": 6.9928, + "eval_anthropic_toxic_prompts_samples_per_second": 71.502, + "eval_anthropic_toxic_prompts_steps_per_second": 0.143, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35570073189073564, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0061060657186972804, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43802447258263794, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3248866462032167, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1054 + }, + { + "epoch": 4.64, + "eval_arxiv_accuracy": 0.41659375, + "eval_arxiv_bleu_score": 3.955898042661497, + "eval_arxiv_bleu_score_sem": 0.11722614020701647, + "eval_arxiv_emb_cos_sim": 0.7505651712417603, + "eval_arxiv_emb_cos_sim_sem": 0.0055917039969810125, + "eval_arxiv_emb_top1_equal": 0.18199999630451202, + "eval_arxiv_emb_top1_equal_sem": 0.017272772986938162, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0766408443450928, + "eval_arxiv_n_ngrams_match_1": 14.196, + "eval_arxiv_n_ngrams_match_2": 2.698, + "eval_arxiv_n_ngrams_match_3": 0.568, + "eval_arxiv_num_pred_words": 31.414, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.685435158513755, + "eval_arxiv_pred_num_tokens": 50.890625, + "eval_arxiv_rouge_score": 0.37409423610863923, + "eval_arxiv_runtime": 7.3004, + "eval_arxiv_samples_per_second": 68.49, + "eval_arxiv_steps_per_second": 0.137, + "eval_arxiv_token_set_f1": 0.3682223121599511, + "eval_arxiv_token_set_f1_sem": 0.004627859384051069, + "eval_arxiv_token_set_precision": 0.3100685398445484, + "eval_arxiv_token_set_recall": 0.4692943092560599, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1054 + }, + { + "epoch": 4.64, + "eval_python_code_alpaca_accuracy": 0.15634375, + "eval_python_code_alpaca_bleu_score": 6.79960326411184, + "eval_python_code_alpaca_bleu_score_sem": 0.22452214571025933, + "eval_python_code_alpaca_emb_cos_sim": 0.7928995490074158, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037320899502600487, + "eval_python_code_alpaca_emb_top1_equal": 0.2160000056028366, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018421909471797383, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.5504839420318604, + "eval_python_code_alpaca_n_ngrams_match_1": 9.574, + "eval_python_code_alpaca_n_ngrams_match_2": 2.626, + "eval_python_code_alpaca_n_ngrams_match_3": 0.856, + "eval_python_code_alpaca_num_pred_words": 28.114, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.813303178442965, + "eval_python_code_alpaca_pred_num_tokens": 43.5546875, + "eval_python_code_alpaca_rouge_score": 0.4508352048107117, + "eval_python_code_alpaca_runtime": 8.0363, + "eval_python_code_alpaca_samples_per_second": 62.217, + "eval_python_code_alpaca_steps_per_second": 0.124, + "eval_python_code_alpaca_token_set_f1": 0.49210118813659204, + "eval_python_code_alpaca_token_set_f1_sem": 0.005441665430889188, + "eval_python_code_alpaca_token_set_precision": 0.5387057133146517, + "eval_python_code_alpaca_token_set_recall": 0.46976460400315756, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1054 + }, + { + "epoch": 4.64, + "eval_wikibio_accuracy": 0.358375, + "eval_wikibio_bleu_score": 5.526680676705421, + "eval_wikibio_bleu_score_sem": 0.22318342201779084, + "eval_wikibio_emb_cos_sim": 0.7270724773406982, + "eval_wikibio_emb_cos_sim_sem": 0.006319029155182091, + "eval_wikibio_emb_top1_equal": 0.1720000058412552, + "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.438575506210327, + "eval_wikibio_n_ngrams_match_1": 8.586, + "eval_wikibio_n_ngrams_match_2": 2.596, + "eval_wikibio_n_ngrams_match_3": 0.942, + "eval_wikibio_num_pred_words": 29.138, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 31.142564167076408, + "eval_wikibio_pred_num_tokens": 56.09375, + "eval_wikibio_rouge_score": 0.330226387836722, + "eval_wikibio_runtime": 7.1468, + "eval_wikibio_samples_per_second": 69.961, + "eval_wikibio_steps_per_second": 0.14, + "eval_wikibio_token_set_f1": 0.29123985480521514, + "eval_wikibio_token_set_f1_sem": 0.006346977449761717, + "eval_wikibio_token_set_precision": 0.28265532766102136, + "eval_wikibio_token_set_recall": 0.3221428125241798, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1054 + }, + { + "epoch": 4.64, + "eval_bias-bios_accuracy": 0.5160625, + "eval_bias-bios_bleu_score": 18.49066373707306, + "eval_bias-bios_bleu_score_sem": 0.824870220846744, + "eval_bias-bios_emb_cos_sim": 0.8730402588844299, + "eval_bias-bios_emb_cos_sim_sem": 0.0032544679411020154, + "eval_bias-bios_emb_top1_equal": 0.3240000009536743, + "eval_bias-bios_emb_top1_equal_sem": 0.020950555653521236, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7815382480621338, + "eval_bias-bios_n_ngrams_match_1": 20.964, + "eval_bias-bios_n_ngrams_match_2": 9.392, + "eval_bias-bios_n_ngrams_match_3": 5.23, + "eval_bias-bios_num_pred_words": 37.032, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.938985027968603, + "eval_bias-bios_pred_num_tokens": 49.4609375, + "eval_bias-bios_rouge_score": 0.5454588049088351, + "eval_bias-bios_runtime": 8.2478, + "eval_bias-bios_samples_per_second": 60.622, + "eval_bias-bios_steps_per_second": 0.121, + "eval_bias-bios_token_set_f1": 0.5572734682256394, + "eval_bias-bios_token_set_f1_sem": 0.006750857588431413, + "eval_bias-bios_token_set_precision": 0.5193511851690316, + "eval_bias-bios_token_set_recall": 0.6139545043369491, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1054 + }, + { + "epoch": 4.65, + "learning_rate": 0.001, + "loss": 1.7226, + "step": 1056 + }, + { + "epoch": 4.7, + "learning_rate": 0.001, + "loss": 2.0507, + "step": 1068 + }, + { + "epoch": 4.76, + "learning_rate": 0.001, + "loss": 2.0374, + "step": 1080 + }, + { + "epoch": 4.78, + "eval_ag_news_accuracy": 0.3025, + "eval_ag_news_bleu_score": 4.782426774883124, + "eval_ag_news_bleu_score_sem": 0.1495675503894871, + "eval_ag_news_emb_cos_sim": 0.8184526562690735, + "eval_ag_news_emb_cos_sim_sem": 0.004131730809081501, + "eval_ag_news_emb_top1_equal": 0.25600001215934753, + "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.540161609649658, + "eval_ag_news_n_ngrams_match_1": 13.778, + "eval_ag_news_n_ngrams_match_2": 2.968, + "eval_ag_news_n_ngrams_match_3": 0.822, + "eval_ag_news_num_pred_words": 42.946, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.47248982771483, + "eval_ag_news_pred_num_tokens": 62.890625, + "eval_ag_news_rouge_score": 0.36404645168806504, + "eval_ag_news_runtime": 7.3968, + "eval_ag_news_samples_per_second": 67.597, + "eval_ag_news_steps_per_second": 0.135, + "eval_ag_news_token_set_f1": 0.3549916818578896, + "eval_ag_news_token_set_f1_sem": 0.004641491153567951, + "eval_ag_news_token_set_precision": 0.33502667272869446, + "eval_ag_news_token_set_recall": 0.39560025222116335, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1085 + }, + { + "epoch": 4.78, + "eval_anthropic_toxic_prompts_accuracy": 0.10765625, + "eval_anthropic_toxic_prompts_bleu_score": 3.546828835753263, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12631718784025125, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.698980987071991, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004317670122587383, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055010489467818, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.107421636581421, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.032, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.784, + "eval_anthropic_toxic_prompts_num_pred_words": 43.392, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.363309263125586, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.65625, + "eval_anthropic_toxic_prompts_rouge_score": 0.23832087035857508, + "eval_anthropic_toxic_prompts_runtime": 7.0669, + "eval_anthropic_toxic_prompts_samples_per_second": 70.752, + "eval_anthropic_toxic_prompts_steps_per_second": 0.142, + "eval_anthropic_toxic_prompts_token_set_f1": 0.33135563913515376, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005503391397071844, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4564273236473413, + "eval_anthropic_toxic_prompts_token_set_recall": 0.28195039472658895, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1085 + }, + { + "epoch": 4.78, + "eval_arxiv_accuracy": 0.4284375, + "eval_arxiv_bleu_score": 4.4366820296721725, + "eval_arxiv_bleu_score_sem": 0.12741093796420108, + "eval_arxiv_emb_cos_sim": 0.7520226836204529, + "eval_arxiv_emb_cos_sim_sem": 0.005331376429749253, + "eval_arxiv_emb_top1_equal": 0.28600001335144043, + "eval_arxiv_emb_top1_equal_sem": 0.020229345383440313, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.9971020221710205, + "eval_arxiv_n_ngrams_match_1": 15.13, + "eval_arxiv_n_ngrams_match_2": 2.982, + "eval_arxiv_n_ngrams_match_3": 0.668, + "eval_arxiv_num_pred_words": 38.58, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.027413743022723, + "eval_arxiv_pred_num_tokens": 62.8984375, + "eval_arxiv_rouge_score": 0.3612686858556743, + "eval_arxiv_runtime": 7.4014, + "eval_arxiv_samples_per_second": 67.554, + "eval_arxiv_steps_per_second": 0.135, + "eval_arxiv_token_set_f1": 0.3633472670510122, + "eval_arxiv_token_set_f1_sem": 0.004831455911736207, + "eval_arxiv_token_set_precision": 0.3146368295803298, + "eval_arxiv_token_set_recall": 0.4532731992238954, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1085 + }, + { + "epoch": 4.78, + "eval_python_code_alpaca_accuracy": 0.1553125, + "eval_python_code_alpaca_bleu_score": 5.122593845366089, + "eval_python_code_alpaca_bleu_score_sem": 0.15172203544953022, + "eval_python_code_alpaca_emb_cos_sim": 0.7787840366363525, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003238402698582763, + "eval_python_code_alpaca_emb_top1_equal": 0.18000000715255737, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017198591983670585, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.743194341659546, + "eval_python_code_alpaca_n_ngrams_match_1": 10.09, + "eval_python_code_alpaca_n_ngrams_match_2": 3.016, + "eval_python_code_alpaca_n_ngrams_match_3": 1.034, + "eval_python_code_alpaca_num_pred_words": 40.474, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.536534915406467, + "eval_python_code_alpaca_pred_num_tokens": 62.7109375, + "eval_python_code_alpaca_rouge_score": 0.3824998945624835, + "eval_python_code_alpaca_runtime": 53.5162, + "eval_python_code_alpaca_samples_per_second": 9.343, + "eval_python_code_alpaca_steps_per_second": 0.019, + "eval_python_code_alpaca_token_set_f1": 0.46524449680098723, + "eval_python_code_alpaca_token_set_f1_sem": 0.005022565353754791, + "eval_python_code_alpaca_token_set_precision": 0.5631874903720834, + "eval_python_code_alpaca_token_set_recall": 0.41347913436767303, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1085 + }, + { + "epoch": 4.78, + "eval_wikibio_accuracy": 0.37053125, + "eval_wikibio_bleu_score": 5.041326690755854, + "eval_wikibio_bleu_score_sem": 0.20965006906787886, + "eval_wikibio_emb_cos_sim": 0.7176839113235474, + "eval_wikibio_emb_cos_sim_sem": 0.006469605609540181, + "eval_wikibio_emb_top1_equal": 0.16200000047683716, + "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.337543487548828, + "eval_wikibio_n_ngrams_match_1": 8.682, + "eval_wikibio_n_ngrams_match_2": 2.672, + "eval_wikibio_n_ngrams_match_3": 0.986, + "eval_wikibio_num_pred_words": 31.822, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.149891143122552, + "eval_wikibio_pred_num_tokens": 62.9765625, + "eval_wikibio_rouge_score": 0.3031944117428183, + "eval_wikibio_runtime": 7.3254, + "eval_wikibio_samples_per_second": 68.255, + "eval_wikibio_steps_per_second": 0.137, + "eval_wikibio_token_set_f1": 0.28195704162537605, + "eval_wikibio_token_set_f1_sem": 0.0066943326071722984, + "eval_wikibio_token_set_precision": 0.27719178191689897, + "eval_wikibio_token_set_recall": 0.3106932649060452, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1085 + }, + { + "epoch": 4.78, + "eval_bias-bios_accuracy": 0.51134375, + "eval_bias-bios_bleu_score": 17.141887959631816, + "eval_bias-bios_bleu_score_sem": 0.6962557324948682, + "eval_bias-bios_emb_cos_sim": 0.8785771727561951, + "eval_bias-bios_emb_cos_sim_sem": 0.002824276632086532, + "eval_bias-bios_emb_top1_equal": 0.3400000035762787, + "eval_bias-bios_emb_top1_equal_sem": 0.021206118792612732, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8023052215576172, + "eval_bias-bios_n_ngrams_match_1": 22.798, + "eval_bias-bios_n_ngrams_match_2": 10.262, + "eval_bias-bios_n_ngrams_match_3": 5.67, + "eval_bias-bios_num_pred_words": 47.208, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.06360932857619, + "eval_bias-bios_pred_num_tokens": 62.6875, + "eval_bias-bios_rouge_score": 0.5199242058822136, + "eval_bias-bios_runtime": 7.4164, + "eval_bias-bios_samples_per_second": 67.418, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.5520066710280114, + "eval_bias-bios_token_set_f1_sem": 0.006506081911934038, + "eval_bias-bios_token_set_precision": 0.5448837177365514, + "eval_bias-bios_token_set_recall": 0.5702486809543807, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1085 + }, + { + "epoch": 4.81, + "learning_rate": 0.001, + "loss": 1.9815, + "step": 1092 + }, + { + "epoch": 4.86, + "learning_rate": 0.001, + "loss": 1.7388, + "step": 1104 + }, + { + "epoch": 4.92, + "learning_rate": 0.001, + "loss": 1.9799, + "step": 1116 + }, + { + "epoch": 4.92, + "eval_ag_news_accuracy": 0.30546875, + "eval_ag_news_bleu_score": 4.628609823295798, + "eval_ag_news_bleu_score_sem": 0.16814417003404045, + "eval_ag_news_emb_cos_sim": 0.8036626577377319, + "eval_ag_news_emb_cos_sim_sem": 0.004962024792248347, + "eval_ag_news_emb_top1_equal": 0.25200000405311584, + "eval_ag_news_emb_top1_equal_sem": 0.019435728067390842, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.502821683883667, + "eval_ag_news_n_ngrams_match_1": 11.762, + "eval_ag_news_n_ngrams_match_2": 2.468, + "eval_ag_news_n_ngrams_match_3": 0.67, + "eval_ag_news_num_pred_words": 32.45, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 33.20902525082459, + "eval_ag_news_pred_num_tokens": 49.375, + "eval_ag_news_rouge_score": 0.35847457034358254, + "eval_ag_news_runtime": 7.1368, + "eval_ag_news_samples_per_second": 70.06, + "eval_ag_news_steps_per_second": 0.14, + "eval_ag_news_token_set_f1": 0.34363322119153306, + "eval_ag_news_token_set_f1_sem": 0.005106203858593481, + "eval_ag_news_token_set_precision": 0.29934172472786247, + "eval_ag_news_token_set_recall": 0.4300573230492533, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1116 + }, + { + "epoch": 4.92, + "eval_anthropic_toxic_prompts_accuracy": 0.10875, + "eval_anthropic_toxic_prompts_bleu_score": 5.205884991798989, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1865173621633143, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6996538639068604, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004887105417573062, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345322399934358, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.9634671211242676, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.804, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.89, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.74, + "eval_anthropic_toxic_prompts_num_pred_words": 28.776, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 19.36499628497135, + "eval_anthropic_toxic_prompts_pred_num_tokens": 42.9765625, + "eval_anthropic_toxic_prompts_rouge_score": 0.3026698968444351, + "eval_anthropic_toxic_prompts_runtime": 6.9544, + "eval_anthropic_toxic_prompts_samples_per_second": 71.896, + "eval_anthropic_toxic_prompts_steps_per_second": 0.144, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35883487316207335, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006119140100683589, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43265422966135547, + "eval_anthropic_toxic_prompts_token_set_recall": 0.33372935076770943, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1116 + }, + { + "epoch": 4.92, + "eval_arxiv_accuracy": 0.41740625, + "eval_arxiv_bleu_score": 3.872287891682529, + "eval_arxiv_bleu_score_sem": 0.11487334041813434, + "eval_arxiv_emb_cos_sim": 0.744478702545166, + "eval_arxiv_emb_cos_sim_sem": 0.006124686202112212, + "eval_arxiv_emb_top1_equal": 0.1720000058412552, + "eval_arxiv_emb_top1_equal_sem": 0.016893869835550357, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.015841007232666, + "eval_arxiv_n_ngrams_match_1": 13.73, + "eval_arxiv_n_ngrams_match_2": 2.58, + "eval_arxiv_n_ngrams_match_3": 0.586, + "eval_arxiv_num_pred_words": 31.274, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.40624552605183, + "eval_arxiv_pred_num_tokens": 54.0078125, + "eval_arxiv_rouge_score": 0.3609061809223396, + "eval_arxiv_runtime": 7.3764, + "eval_arxiv_samples_per_second": 67.784, + "eval_arxiv_steps_per_second": 0.136, + "eval_arxiv_token_set_f1": 0.36442828917901954, + "eval_arxiv_token_set_f1_sem": 0.0049055757087992, + "eval_arxiv_token_set_precision": 0.30121546718967745, + "eval_arxiv_token_set_recall": 0.47774161387520014, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1116 + }, + { + "epoch": 4.92, + "eval_python_code_alpaca_accuracy": 0.1583125, + "eval_python_code_alpaca_bleu_score": 7.442459981245183, + "eval_python_code_alpaca_bleu_score_sem": 0.2587730959133015, + "eval_python_code_alpaca_emb_cos_sim": 0.7887560129165649, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036784270755643492, + "eval_python_code_alpaca_emb_top1_equal": 0.1979999989271164, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017838958581409683, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.526684045791626, + "eval_python_code_alpaca_n_ngrams_match_1": 9.42, + "eval_python_code_alpaca_n_ngrams_match_2": 2.816, + "eval_python_code_alpaca_n_ngrams_match_3": 0.932, + "eval_python_code_alpaca_num_pred_words": 27.836, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.51194822530937, + "eval_python_code_alpaca_pred_num_tokens": 46.1484375, + "eval_python_code_alpaca_rouge_score": 0.45174719006638997, + "eval_python_code_alpaca_runtime": 6.8582, + "eval_python_code_alpaca_samples_per_second": 72.905, + "eval_python_code_alpaca_steps_per_second": 0.146, + "eval_python_code_alpaca_token_set_f1": 0.4940857636888703, + "eval_python_code_alpaca_token_set_f1_sem": 0.00574673042749639, + "eval_python_code_alpaca_token_set_precision": 0.5306708638242008, + "eval_python_code_alpaca_token_set_recall": 0.4792872902979338, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1116 + }, + { + "epoch": 4.92, + "eval_wikibio_accuracy": 0.37003125, + "eval_wikibio_bleu_score": 5.271042883413745, + "eval_wikibio_bleu_score_sem": 0.21668692554035335, + "eval_wikibio_emb_cos_sim": 0.7317224144935608, + "eval_wikibio_emb_cos_sim_sem": 0.006157877596249158, + "eval_wikibio_emb_top1_equal": 0.20399999618530273, + "eval_wikibio_emb_top1_equal_sem": 0.018039369108186407, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.2863619327545166, + "eval_wikibio_n_ngrams_match_1": 8.604, + "eval_wikibio_n_ngrams_match_2": 2.594, + "eval_wikibio_n_ngrams_match_3": 0.97, + "eval_wikibio_num_pred_words": 31.038, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 26.745384937438654, + "eval_wikibio_pred_num_tokens": 59.7421875, + "eval_wikibio_rouge_score": 0.3227614958549562, + "eval_wikibio_runtime": 7.1142, + "eval_wikibio_samples_per_second": 70.282, + "eval_wikibio_steps_per_second": 0.141, + "eval_wikibio_token_set_f1": 0.28746519585548624, + "eval_wikibio_token_set_f1_sem": 0.006471419437000773, + "eval_wikibio_token_set_precision": 0.2785137949281113, + "eval_wikibio_token_set_recall": 0.3176058242144122, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1116 + }, + { + "epoch": 4.92, + "eval_bias-bios_accuracy": 0.51221875, + "eval_bias-bios_bleu_score": 18.650420864840633, + "eval_bias-bios_bleu_score_sem": 0.8302788590751484, + "eval_bias-bios_emb_cos_sim": 0.8733921647071838, + "eval_bias-bios_emb_cos_sim_sem": 0.003121800325381792, + "eval_bias-bios_emb_top1_equal": 0.28600001335144043, + "eval_bias-bios_emb_top1_equal_sem": 0.020229345383440313, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7778593301773071, + "eval_bias-bios_n_ngrams_match_1": 20.736, + "eval_bias-bios_n_ngrams_match_2": 9.35, + "eval_bias-bios_n_ngrams_match_3": 5.248, + "eval_bias-bios_num_pred_words": 36.104, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.9171761308998185, + "eval_bias-bios_pred_num_tokens": 50.6796875, + "eval_bias-bios_rouge_score": 0.5457379709662528, + "eval_bias-bios_runtime": 7.3864, + "eval_bias-bios_samples_per_second": 67.692, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.5581272984981764, + "eval_bias-bios_token_set_f1_sem": 0.00681536792835148, + "eval_bias-bios_token_set_precision": 0.5114805191768111, + "eval_bias-bios_token_set_recall": 0.6244049600041734, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1116 + }, + { + "epoch": 4.97, + "learning_rate": 0.001, + "loss": 1.9734, + "step": 1128 + }, + { + "epoch": 5.02, + "learning_rate": 0.001, + "loss": 1.8862, + "step": 1140 + }, + { + "epoch": 5.05, + "eval_ag_news_accuracy": 0.3008125, + "eval_ag_news_bleu_score": 4.751560589852374, + "eval_ag_news_bleu_score_sem": 0.1603748846731102, + "eval_ag_news_emb_cos_sim": 0.7978442311286926, + "eval_ag_news_emb_cos_sim_sem": 0.005238710484881748, + "eval_ag_news_emb_top1_equal": 0.24199999868869781, + "eval_ag_news_emb_top1_equal_sem": 0.019173085092707744, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.528089761734009, + "eval_ag_news_n_ngrams_match_1": 12.1, + "eval_ag_news_n_ngrams_match_2": 2.658, + "eval_ag_news_n_ngrams_match_3": 0.766, + "eval_ag_news_num_pred_words": 35.188, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.058844925758244, + "eval_ag_news_pred_num_tokens": 53.625, + "eval_ag_news_rouge_score": 0.3522136604799726, + "eval_ag_news_runtime": 7.297, + "eval_ag_news_samples_per_second": 68.521, + "eval_ag_news_steps_per_second": 0.137, + "eval_ag_news_token_set_f1": 0.3421194657906841, + "eval_ag_news_token_set_f1_sem": 0.00512623722261911, + "eval_ag_news_token_set_precision": 0.3016949150083078, + "eval_ag_news_token_set_recall": 0.42125821865705554, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1147 + }, + { + "epoch": 5.05, + "eval_anthropic_toxic_prompts_accuracy": 0.10775, + "eval_anthropic_toxic_prompts_bleu_score": 4.824045774109767, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.17779367422682138, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6928799748420715, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004798333249000901, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1420000046491623, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015625630310786714, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.983499050140381, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.702, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684, + "eval_anthropic_toxic_prompts_num_pred_words": 30.648, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 19.75682596548496, + "eval_anthropic_toxic_prompts_pred_num_tokens": 43.3984375, + "eval_anthropic_toxic_prompts_rouge_score": 0.28186001156785756, + "eval_anthropic_toxic_prompts_runtime": 6.8855, + "eval_anthropic_toxic_prompts_samples_per_second": 72.617, + "eval_anthropic_toxic_prompts_steps_per_second": 0.145, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3553819370248394, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005984688098916189, + "eval_anthropic_toxic_prompts_token_set_precision": 0.42502285473371, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3329052847488175, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1147 + }, + { + "epoch": 5.05, + "eval_arxiv_accuracy": 0.423875, + "eval_arxiv_bleu_score": 4.2970257142972335, + "eval_arxiv_bleu_score_sem": 0.1242882050526764, + "eval_arxiv_emb_cos_sim": 0.744454026222229, + "eval_arxiv_emb_cos_sim_sem": 0.00547184292686335, + "eval_arxiv_emb_top1_equal": 0.21799999475479126, + "eval_arxiv_emb_top1_equal_sem": 0.018483376892288548, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0156562328338623, + "eval_arxiv_n_ngrams_match_1": 14.664, + "eval_arxiv_n_ngrams_match_2": 2.916, + "eval_arxiv_n_ngrams_match_3": 0.634, + "eval_arxiv_num_pred_words": 34.544, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.402475322632174, + "eval_arxiv_pred_num_tokens": 58.109375, + "eval_arxiv_rouge_score": 0.3705193267222546, + "eval_arxiv_runtime": 7.2639, + "eval_arxiv_samples_per_second": 68.833, + "eval_arxiv_steps_per_second": 0.138, + "eval_arxiv_token_set_f1": 0.36950581432877866, + "eval_arxiv_token_set_f1_sem": 0.004561741515291868, + "eval_arxiv_token_set_precision": 0.31003179888853766, + "eval_arxiv_token_set_recall": 0.47645288506817857, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1147 + }, + { + "epoch": 5.05, + "eval_python_code_alpaca_accuracy": 0.16003125, + "eval_python_code_alpaca_bleu_score": 7.109532152823405, + "eval_python_code_alpaca_bleu_score_sem": 0.23092374536841825, + "eval_python_code_alpaca_emb_cos_sim": 0.7907090187072754, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038112096453169096, + "eval_python_code_alpaca_emb_top1_equal": 0.21799999475479126, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018483376892288548, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.54667329788208, + "eval_python_code_alpaca_n_ngrams_match_1": 9.642, + "eval_python_code_alpaca_n_ngrams_match_2": 2.98, + "eval_python_code_alpaca_n_ngrams_match_3": 1.058, + "eval_python_code_alpaca_num_pred_words": 30.814, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.764569152634095, + "eval_python_code_alpaca_pred_num_tokens": 48.4921875, + "eval_python_code_alpaca_rouge_score": 0.43772150015430555, + "eval_python_code_alpaca_runtime": 23.6322, + "eval_python_code_alpaca_samples_per_second": 21.158, + "eval_python_code_alpaca_steps_per_second": 0.042, + "eval_python_code_alpaca_token_set_f1": 0.5007978263098356, + "eval_python_code_alpaca_token_set_f1_sem": 0.005543943666993787, + "eval_python_code_alpaca_token_set_precision": 0.5448615964600481, + "eval_python_code_alpaca_token_set_recall": 0.4806543376566172, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1147 + }, + { + "epoch": 5.05, + "eval_wikibio_accuracy": 0.3758125, + "eval_wikibio_bleu_score": 4.798178200869212, + "eval_wikibio_bleu_score_sem": 0.22454293054991328, + "eval_wikibio_emb_cos_sim": 0.6980165243148804, + "eval_wikibio_emb_cos_sim_sem": 0.007304763647675317, + "eval_wikibio_emb_top1_equal": 0.1599999964237213, + "eval_wikibio_emb_top1_equal_sem": 0.016411540042267993, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.2932658195495605, + "eval_wikibio_n_ngrams_match_1": 7.85, + "eval_wikibio_n_ngrams_match_2": 2.37, + "eval_wikibio_n_ngrams_match_3": 0.872, + "eval_wikibio_num_pred_words": 29.784, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 26.930670908069647, + "eval_wikibio_pred_num_tokens": 61.1796875, + "eval_wikibio_rouge_score": 0.28846571983854963, + "eval_wikibio_runtime": 36.7626, + "eval_wikibio_samples_per_second": 13.601, + "eval_wikibio_steps_per_second": 0.027, + "eval_wikibio_token_set_f1": 0.26126189669956695, + "eval_wikibio_token_set_f1_sem": 0.006870438775008788, + "eval_wikibio_token_set_precision": 0.25099417299572807, + "eval_wikibio_token_set_recall": 0.30064562359800917, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1147 + }, + { + "epoch": 5.05, + "eval_bias-bios_accuracy": 0.511, + "eval_bias-bios_bleu_score": 18.25300675333223, + "eval_bias-bios_bleu_score_sem": 0.8207254399584505, + "eval_bias-bios_emb_cos_sim": 0.8679388761520386, + "eval_bias-bios_emb_cos_sim_sem": 0.0030592396753830933, + "eval_bias-bios_emb_top1_equal": 0.328000009059906, + "eval_bias-bios_emb_top1_equal_sem": 0.02101702640661987, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7814617156982422, + "eval_bias-bios_n_ngrams_match_1": 21.26, + "eval_bias-bios_n_ngrams_match_2": 9.388, + "eval_bias-bios_n_ngrams_match_3": 5.212, + "eval_bias-bios_num_pred_words": 39.654, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.938530520797771, + "eval_bias-bios_pred_num_tokens": 53.5859375, + "eval_bias-bios_rouge_score": 0.5355492854480801, + "eval_bias-bios_runtime": 8.217, + "eval_bias-bios_samples_per_second": 60.849, + "eval_bias-bios_steps_per_second": 0.122, + "eval_bias-bios_token_set_f1": 0.5539771399755472, + "eval_bias-bios_token_set_f1_sem": 0.0066418923081446425, + "eval_bias-bios_token_set_precision": 0.516171193789998, + "eval_bias-bios_token_set_recall": 0.6108871601437479, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1147 + }, + { + "epoch": 5.07, + "learning_rate": 0.001, + "loss": 2.0541, + "step": 1152 + }, + { + "epoch": 5.13, + "learning_rate": 0.001, + "loss": 1.9959, + "step": 1164 + }, + { + "epoch": 5.18, + "learning_rate": 0.001, + "loss": 1.8152, + "step": 1176 + }, + { + "epoch": 5.19, + "eval_ag_news_accuracy": 0.30721875, + "eval_ag_news_bleu_score": 4.8485594330572, + "eval_ag_news_bleu_score_sem": 0.16286452590046857, + "eval_ag_news_emb_cos_sim": 0.8151611685752869, + "eval_ag_news_emb_cos_sim_sem": 0.004361782482996463, + "eval_ag_news_emb_top1_equal": 0.25600001215934753, + "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.531165838241577, + "eval_ag_news_n_ngrams_match_1": 12.808, + "eval_ag_news_n_ngrams_match_2": 2.622, + "eval_ag_news_n_ngrams_match_3": 0.728, + "eval_ag_news_num_pred_words": 35.324, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.16377384045543, + "eval_ag_news_pred_num_tokens": 54.0546875, + "eval_ag_news_rouge_score": 0.37183171897717315, + "eval_ag_news_runtime": 7.4621, + "eval_ag_news_samples_per_second": 67.005, + "eval_ag_news_steps_per_second": 0.134, + "eval_ag_news_token_set_f1": 0.35285156123195754, + "eval_ag_news_token_set_f1_sem": 0.004871174499504987, + "eval_ag_news_token_set_precision": 0.3177566795698645, + "eval_ag_news_token_set_recall": 0.41301102402278256, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1178 + }, + { + "epoch": 5.19, + "eval_anthropic_toxic_prompts_accuracy": 0.10784375, + "eval_anthropic_toxic_prompts_bleu_score": 4.754495104532313, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.17230160467881073, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7017025351524353, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004532275639757245, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14800000190734863, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015896458012572223, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.959667682647705, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.864, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.796, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.678, + "eval_anthropic_toxic_prompts_num_pred_words": 30.582, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 19.291559770090323, + "eval_anthropic_toxic_prompts_pred_num_tokens": 45.6796875, + "eval_anthropic_toxic_prompts_rouge_score": 0.2863486347156138, + "eval_anthropic_toxic_prompts_runtime": 6.9905, + "eval_anthropic_toxic_prompts_samples_per_second": 71.526, + "eval_anthropic_toxic_prompts_steps_per_second": 0.143, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34945743337792634, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005829221450704748, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43410117442174334, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3167146498514724, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1178 + }, + { + "epoch": 5.19, + "eval_arxiv_accuracy": 0.41959375, + "eval_arxiv_bleu_score": 4.171391212411956, + "eval_arxiv_bleu_score_sem": 0.12731606291014222, + "eval_arxiv_emb_cos_sim": 0.7597247958183289, + "eval_arxiv_emb_cos_sim_sem": 0.005162535612253285, + "eval_arxiv_emb_top1_equal": 0.23800000548362732, + "eval_arxiv_emb_top1_equal_sem": 0.019064072684441876, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.033210515975952, + "eval_arxiv_n_ngrams_match_1": 14.73, + "eval_arxiv_n_ngrams_match_2": 2.744, + "eval_arxiv_n_ngrams_match_3": 0.578, + "eval_arxiv_num_pred_words": 34.308, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.763788166980333, + "eval_arxiv_pred_num_tokens": 56.265625, + "eval_arxiv_rouge_score": 0.37153285277011533, + "eval_arxiv_runtime": 7.1673, + "eval_arxiv_samples_per_second": 69.762, + "eval_arxiv_steps_per_second": 0.14, + "eval_arxiv_token_set_f1": 0.3688551921177521, + "eval_arxiv_token_set_f1_sem": 0.004459365592802378, + "eval_arxiv_token_set_precision": 0.3162146658557265, + "eval_arxiv_token_set_recall": 0.45769358422893, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1178 + }, + { + "epoch": 5.19, + "eval_python_code_alpaca_accuracy": 0.157125, + "eval_python_code_alpaca_bleu_score": 6.484596128014531, + "eval_python_code_alpaca_bleu_score_sem": 0.19621191927439421, + "eval_python_code_alpaca_emb_cos_sim": 0.7900090217590332, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003316913303868283, + "eval_python_code_alpaca_emb_top1_equal": 0.21199999749660492, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01829703673906991, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.5738043785095215, + "eval_python_code_alpaca_n_ngrams_match_1": 9.782, + "eval_python_code_alpaca_n_ngrams_match_2": 2.848, + "eval_python_code_alpaca_n_ngrams_match_3": 0.938, + "eval_python_code_alpaca_num_pred_words": 31.012, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.11562645543709, + "eval_python_code_alpaca_pred_num_tokens": 48.453125, + "eval_python_code_alpaca_rouge_score": 0.43364782590062695, + "eval_python_code_alpaca_runtime": 6.9669, + "eval_python_code_alpaca_samples_per_second": 71.768, + "eval_python_code_alpaca_steps_per_second": 0.144, + "eval_python_code_alpaca_token_set_f1": 0.4890011179316131, + "eval_python_code_alpaca_token_set_f1_sem": 0.005414977771996831, + "eval_python_code_alpaca_token_set_precision": 0.5485789963357921, + "eval_python_code_alpaca_token_set_recall": 0.4570615049588744, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1178 + }, + { + "epoch": 5.19, + "eval_wikibio_accuracy": 0.3601875, + "eval_wikibio_bleu_score": 5.865862348150063, + "eval_wikibio_bleu_score_sem": 0.21719391064798121, + "eval_wikibio_emb_cos_sim": 0.7565488815307617, + "eval_wikibio_emb_cos_sim_sem": 0.005104238257453877, + "eval_wikibio_emb_top1_equal": 0.16200000047683716, + "eval_wikibio_emb_top1_equal_sem": 0.016494124351899474, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4136314392089844, + "eval_wikibio_n_ngrams_match_1": 9.378, + "eval_wikibio_n_ngrams_match_2": 2.944, + "eval_wikibio_n_ngrams_match_3": 1.1, + "eval_wikibio_num_pred_words": 32.134, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 30.375350455050448, + "eval_wikibio_pred_num_tokens": 58.1015625, + "eval_wikibio_rouge_score": 0.3496561867392557, + "eval_wikibio_runtime": 7.5836, + "eval_wikibio_samples_per_second": 65.932, + "eval_wikibio_steps_per_second": 0.132, + "eval_wikibio_token_set_f1": 0.31190722363983975, + "eval_wikibio_token_set_f1_sem": 0.0057202644914374, + "eval_wikibio_token_set_precision": 0.3071798029170863, + "eval_wikibio_token_set_recall": 0.33384434619047204, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1178 + }, + { + "epoch": 5.19, + "eval_bias-bios_accuracy": 0.51753125, + "eval_bias-bios_bleu_score": 18.51520860116379, + "eval_bias-bios_bleu_score_sem": 0.8058685076135255, + "eval_bias-bios_emb_cos_sim": 0.8777711391448975, + "eval_bias-bios_emb_cos_sim_sem": 0.0031768019980541037, + "eval_bias-bios_emb_top1_equal": 0.38199999928474426, + "eval_bias-bios_emb_top1_equal_sem": 0.02175082231064121, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7584089040756226, + "eval_bias-bios_n_ngrams_match_1": 21.4, + "eval_bias-bios_n_ngrams_match_2": 9.614, + "eval_bias-bios_n_ngrams_match_3": 5.364, + "eval_bias-bios_num_pred_words": 38.926, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.803196602396394, + "eval_bias-bios_pred_num_tokens": 53.125, + "eval_bias-bios_rouge_score": 0.5422110673865645, + "eval_bias-bios_runtime": 7.3443, + "eval_bias-bios_samples_per_second": 68.08, + "eval_bias-bios_steps_per_second": 0.136, + "eval_bias-bios_token_set_f1": 0.5559782567207496, + "eval_bias-bios_token_set_f1_sem": 0.006687994638385981, + "eval_bias-bios_token_set_precision": 0.524176482491051, + "eval_bias-bios_token_set_recall": 0.6043461224917017, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1178 + }, + { + "epoch": 5.23, + "learning_rate": 0.001, + "loss": 1.753, + "step": 1188 + }, + { + "epoch": 5.29, + "learning_rate": 0.001, + "loss": 2.0388, + "step": 1200 + }, + { + "epoch": 5.33, + "eval_ag_news_accuracy": 0.30171875, + "eval_ag_news_bleu_score": 4.66786630563764, + "eval_ag_news_bleu_score_sem": 0.15154646564550858, + "eval_ag_news_emb_cos_sim": 0.8145875334739685, + "eval_ag_news_emb_cos_sim_sem": 0.004933308608570307, + "eval_ag_news_emb_top1_equal": 0.3179999887943268, + "eval_ag_news_emb_top1_equal_sem": 0.02084757283415153, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5350406169891357, + "eval_ag_news_n_ngrams_match_1": 13.4, + "eval_ag_news_n_ngrams_match_2": 2.782, + "eval_ag_news_n_ngrams_match_3": 0.77, + "eval_ag_news_num_pred_words": 41.146, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.296407702758216, + "eval_ag_news_pred_num_tokens": 62.65625, + "eval_ag_news_rouge_score": 0.35984020185901033, + "eval_ag_news_runtime": 16.4581, + "eval_ag_news_samples_per_second": 30.38, + "eval_ag_news_steps_per_second": 0.061, + "eval_ag_news_token_set_f1": 0.35244076946010666, + "eval_ag_news_token_set_f1_sem": 0.004875945923880267, + "eval_ag_news_token_set_precision": 0.3272305377699167, + "eval_ag_news_token_set_recall": 0.40297350161661416, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1209 + }, + { + "epoch": 5.33, + "eval_anthropic_toxic_prompts_accuracy": 0.106125, + "eval_anthropic_toxic_prompts_bleu_score": 3.557568302579417, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12541775282119264, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6978194713592529, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00425731925561031, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345323732734733, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.1190552711486816, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.284, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.006, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.776, + "eval_anthropic_toxic_prompts_num_pred_words": 42.482, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.62499505785644, + "eval_anthropic_toxic_prompts_pred_num_tokens": 61.359375, + "eval_anthropic_toxic_prompts_rouge_score": 0.24315983815738715, + "eval_anthropic_toxic_prompts_runtime": 7.1217, + "eval_anthropic_toxic_prompts_samples_per_second": 70.207, + "eval_anthropic_toxic_prompts_steps_per_second": 0.14, + "eval_anthropic_toxic_prompts_token_set_f1": 0.33252564143152274, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005464439635387934, + "eval_anthropic_toxic_prompts_token_set_precision": 0.45493149859126963, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2833442137428397, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1209 + }, + { + "epoch": 5.33, + "eval_arxiv_accuracy": 0.42709375, + "eval_arxiv_bleu_score": 4.502508568694519, + "eval_arxiv_bleu_score_sem": 0.12789589710631666, + "eval_arxiv_emb_cos_sim": 0.7558550834655762, + "eval_arxiv_emb_cos_sim_sem": 0.005511884581727316, + "eval_arxiv_emb_top1_equal": 0.2919999957084656, + "eval_arxiv_emb_top1_equal_sem": 0.020354376719412405, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 2.991607427597046, + "eval_arxiv_n_ngrams_match_1": 15.408, + "eval_arxiv_n_ngrams_match_2": 2.98, + "eval_arxiv_n_ngrams_match_3": 0.688, + "eval_arxiv_num_pred_words": 38.328, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 19.917672990705192, + "eval_arxiv_pred_num_tokens": 62.875, + "eval_arxiv_rouge_score": 0.36719712273634053, + "eval_arxiv_runtime": 7.4164, + "eval_arxiv_samples_per_second": 67.418, + "eval_arxiv_steps_per_second": 0.135, + "eval_arxiv_token_set_f1": 0.37198021860120206, + "eval_arxiv_token_set_f1_sem": 0.004738087124722292, + "eval_arxiv_token_set_precision": 0.3226906643637732, + "eval_arxiv_token_set_recall": 0.4562920586465375, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1209 + }, + { + "epoch": 5.33, + "eval_python_code_alpaca_accuracy": 0.155625, + "eval_python_code_alpaca_bleu_score": 5.315662477971717, + "eval_python_code_alpaca_bleu_score_sem": 0.16177645145891859, + "eval_python_code_alpaca_emb_cos_sim": 0.7812846899032593, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003370661144587884, + "eval_python_code_alpaca_emb_top1_equal": 0.1599999964237213, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.016411540042267993, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7277579307556152, + "eval_python_code_alpaca_n_ngrams_match_1": 10.196, + "eval_python_code_alpaca_n_ngrams_match_2": 3.118, + "eval_python_code_alpaca_n_ngrams_match_3": 1.054, + "eval_python_code_alpaca_num_pred_words": 39.74, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.298548134955924, + "eval_python_code_alpaca_pred_num_tokens": 61.9453125, + "eval_python_code_alpaca_rouge_score": 0.38760636668818693, + "eval_python_code_alpaca_runtime": 7.2205, + "eval_python_code_alpaca_samples_per_second": 69.247, + "eval_python_code_alpaca_steps_per_second": 0.138, + "eval_python_code_alpaca_token_set_f1": 0.4748953776740159, + "eval_python_code_alpaca_token_set_f1_sem": 0.005199266059350083, + "eval_python_code_alpaca_token_set_precision": 0.5677066819762692, + "eval_python_code_alpaca_token_set_recall": 0.42523396627114723, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1209 + }, + { + "epoch": 5.33, + "eval_wikibio_accuracy": 0.36765625, + "eval_wikibio_bleu_score": 5.031924327295865, + "eval_wikibio_bleu_score_sem": 0.20167395499192434, + "eval_wikibio_emb_cos_sim": 0.7238757610321045, + "eval_wikibio_emb_cos_sim_sem": 0.0063559130727577655, + "eval_wikibio_emb_top1_equal": 0.15800000727176666, + "eval_wikibio_emb_top1_equal_sem": 0.016328049428381567, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.33139705657959, + "eval_wikibio_n_ngrams_match_1": 8.876, + "eval_wikibio_n_ngrams_match_2": 2.702, + "eval_wikibio_n_ngrams_match_3": 1.0, + "eval_wikibio_num_pred_words": 32.756, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.977400424604888, + "eval_wikibio_pred_num_tokens": 62.984375, + "eval_wikibio_rouge_score": 0.31450113973078364, + "eval_wikibio_runtime": 8.0852, + "eval_wikibio_samples_per_second": 61.841, + "eval_wikibio_steps_per_second": 0.124, + "eval_wikibio_token_set_f1": 0.2843410825236571, + "eval_wikibio_token_set_f1_sem": 0.006492023793848845, + "eval_wikibio_token_set_precision": 0.28389924039721054, + "eval_wikibio_token_set_recall": 0.3075195178220353, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1209 + }, + { + "epoch": 5.33, + "eval_bias-bios_accuracy": 0.51425, + "eval_bias-bios_bleu_score": 17.79308714162288, + "eval_bias-bios_bleu_score_sem": 0.7377277793877473, + "eval_bias-bios_emb_cos_sim": 0.8805733919143677, + "eval_bias-bios_emb_cos_sim_sem": 0.0029906071200763297, + "eval_bias-bios_emb_top1_equal": 0.3720000088214874, + "eval_bias-bios_emb_top1_equal_sem": 0.021637198413078103, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7840691804885864, + "eval_bias-bios_n_ngrams_match_1": 22.868, + "eval_bias-bios_n_ngrams_match_2": 10.366, + "eval_bias-bios_n_ngrams_match_3": 5.742, + "eval_bias-bios_num_pred_words": 46.95, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.9540352352511, + "eval_bias-bios_pred_num_tokens": 62.234375, + "eval_bias-bios_rouge_score": 0.5226844675142479, + "eval_bias-bios_runtime": 8.2095, + "eval_bias-bios_samples_per_second": 60.905, + "eval_bias-bios_steps_per_second": 0.122, + "eval_bias-bios_token_set_f1": 0.5565963382730305, + "eval_bias-bios_token_set_f1_sem": 0.006584560499707022, + "eval_bias-bios_token_set_precision": 0.548469607354972, + "eval_bias-bios_token_set_recall": 0.5765310501058738, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1209 + }, + { + "epoch": 5.34, + "learning_rate": 0.001, + "loss": 2.0017, + "step": 1212 + }, + { + "epoch": 5.39, + "learning_rate": 0.001, + "loss": 1.8471, + "step": 1224 + }, + { + "epoch": 5.44, + "learning_rate": 0.001, + "loss": 1.6843, + "step": 1236 + }, + { + "epoch": 5.46, + "eval_ag_news_accuracy": 0.3058125, + "eval_ag_news_bleu_score": 4.228804372282374, + "eval_ag_news_bleu_score_sem": 0.16390794415177778, + "eval_ag_news_emb_cos_sim": 0.8003663420677185, + "eval_ag_news_emb_cos_sim_sem": 0.004822506915801103, + "eval_ag_news_emb_top1_equal": 0.2619999945163727, + "eval_ag_news_emb_top1_equal_sem": 0.019684689846225335, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.58817720413208, + "eval_ag_news_n_ngrams_match_1": 10.972, + "eval_ag_news_n_ngrams_match_2": 2.226, + "eval_ag_news_n_ngrams_match_3": 0.576, + "eval_ag_news_num_pred_words": 27.282, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.16808876143634, + "eval_ag_news_pred_num_tokens": 42.375, + "eval_ag_news_rouge_score": 0.35711205410770497, + "eval_ag_news_runtime": 7.3843, + "eval_ag_news_samples_per_second": 67.711, + "eval_ag_news_steps_per_second": 0.135, + "eval_ag_news_token_set_f1": 0.3399839974085392, + "eval_ag_news_token_set_f1_sem": 0.004867167151513755, + "eval_ag_news_token_set_precision": 0.28651641622207985, + "eval_ag_news_token_set_recall": 0.43905283960004043, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1240 + }, + { + "epoch": 5.46, + "eval_anthropic_toxic_prompts_accuracy": 0.110375, + "eval_anthropic_toxic_prompts_bleu_score": 5.934900892843169, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.22076600473926677, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7030026912689209, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004729080608319704, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15000000596046448, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01598471338779901, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.920982599258423, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.55, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.668, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.616, + "eval_anthropic_toxic_prompts_num_pred_words": 23.468, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 18.55951506880603, + "eval_anthropic_toxic_prompts_pred_num_tokens": 33.9296875, + "eval_anthropic_toxic_prompts_rouge_score": 0.32892340617179394, + "eval_anthropic_toxic_prompts_runtime": 7.206, + "eval_anthropic_toxic_prompts_samples_per_second": 69.386, + "eval_anthropic_toxic_prompts_steps_per_second": 0.139, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35867153576994054, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00592727226660465, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4169835141258158, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3380541481103269, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1240 + }, + { + "epoch": 5.46, + "eval_arxiv_accuracy": 0.415, + "eval_arxiv_bleu_score": 3.549095877403326, + "eval_arxiv_bleu_score_sem": 0.10135517036798959, + "eval_arxiv_emb_cos_sim": 0.7511804103851318, + "eval_arxiv_emb_cos_sim_sem": 0.00503978736332344, + "eval_arxiv_emb_top1_equal": 0.17399999499320984, + "eval_arxiv_emb_top1_equal_sem": 0.016971269551723376, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0753817558288574, + "eval_arxiv_n_ngrams_match_1": 13.332, + "eval_arxiv_n_ngrams_match_2": 2.414, + "eval_arxiv_n_ngrams_match_3": 0.494, + "eval_arxiv_num_pred_words": 28.782, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.658148457926593, + "eval_arxiv_pred_num_tokens": 47.6796875, + "eval_arxiv_rouge_score": 0.36617538747351475, + "eval_arxiv_runtime": 7.5013, + "eval_arxiv_samples_per_second": 66.655, + "eval_arxiv_steps_per_second": 0.133, + "eval_arxiv_token_set_f1": 0.3651676595945922, + "eval_arxiv_token_set_f1_sem": 0.004322873802341711, + "eval_arxiv_token_set_precision": 0.3020699734995879, + "eval_arxiv_token_set_recall": 0.475372664607063, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1240 + }, + { + "epoch": 5.46, + "eval_python_code_alpaca_accuracy": 0.16234375, + "eval_python_code_alpaca_bleu_score": 8.203572226303173, + "eval_python_code_alpaca_bleu_score_sem": 0.2808840321062194, + "eval_python_code_alpaca_emb_cos_sim": 0.7998011112213135, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003771246625675064, + "eval_python_code_alpaca_emb_top1_equal": 0.21199999749660492, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01829703673906991, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.4834351539611816, + "eval_python_code_alpaca_n_ngrams_match_1": 9.048, + "eval_python_code_alpaca_n_ngrams_match_2": 2.492, + "eval_python_code_alpaca_n_ngrams_match_3": 0.828, + "eval_python_code_alpaca_num_pred_words": 22.6, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.98235503550787, + "eval_python_code_alpaca_pred_num_tokens": 35.3125, + "eval_python_code_alpaca_rouge_score": 0.48279035321359737, + "eval_python_code_alpaca_runtime": 7.066, + "eval_python_code_alpaca_samples_per_second": 70.762, + "eval_python_code_alpaca_steps_per_second": 0.142, + "eval_python_code_alpaca_token_set_f1": 0.5002778659188205, + "eval_python_code_alpaca_token_set_f1_sem": 0.005630725813805154, + "eval_python_code_alpaca_token_set_precision": 0.5206289500177254, + "eval_python_code_alpaca_token_set_recall": 0.49733067164037936, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1240 + }, + { + "epoch": 5.46, + "eval_wikibio_accuracy": 0.35940625, + "eval_wikibio_bleu_score": 6.0215781318391235, + "eval_wikibio_bleu_score_sem": 0.22700354104088374, + "eval_wikibio_emb_cos_sim": 0.7536024451255798, + "eval_wikibio_emb_cos_sim_sem": 0.005223740804270685, + "eval_wikibio_emb_top1_equal": 0.20600000023841858, + "eval_wikibio_emb_top1_equal_sem": 0.018104793612990725, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4180655479431152, + "eval_wikibio_n_ngrams_match_1": 9.248, + "eval_wikibio_n_ngrams_match_2": 2.878, + "eval_wikibio_n_ngrams_match_3": 1.05, + "eval_wikibio_num_pred_words": 30.128, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 30.510337113397938, + "eval_wikibio_pred_num_tokens": 53.328125, + "eval_wikibio_rouge_score": 0.35471940693449555, + "eval_wikibio_runtime": 7.3318, + "eval_wikibio_samples_per_second": 68.196, + "eval_wikibio_steps_per_second": 0.136, + "eval_wikibio_token_set_f1": 0.3156909691627191, + "eval_wikibio_token_set_f1_sem": 0.005623957716607399, + "eval_wikibio_token_set_precision": 0.30678649564693816, + "eval_wikibio_token_set_recall": 0.3410327489783421, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1240 + }, + { + "epoch": 5.46, + "eval_bias-bios_accuracy": 0.51178125, + "eval_bias-bios_bleu_score": 17.90672965013963, + "eval_bias-bios_bleu_score_sem": 0.8619788872673458, + "eval_bias-bios_emb_cos_sim": 0.8670877814292908, + "eval_bias-bios_emb_cos_sim_sem": 0.0034544596353609193, + "eval_bias-bios_emb_top1_equal": 0.3100000023841858, + "eval_bias-bios_emb_top1_equal_sem": 0.020704040896175106, + "eval_bias-bios_exact_match": 0.004, + "eval_bias-bios_exact_match_sem": 0.002825591608118863, + "eval_bias-bios_loss": 1.797906756401062, + "eval_bias-bios_n_ngrams_match_1": 19.478, + "eval_bias-bios_n_ngrams_match_2": 8.804, + "eval_bias-bios_n_ngrams_match_3": 5.016, + "eval_bias-bios_num_pred_words": 32.024, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.036997323114941, + "eval_bias-bios_pred_num_tokens": 43.1640625, + "eval_bias-bios_rouge_score": 0.5462596720572078, + "eval_bias-bios_runtime": 7.3889, + "eval_bias-bios_samples_per_second": 67.669, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.554597593898957, + "eval_bias-bios_token_set_f1_sem": 0.006986531889248037, + "eval_bias-bios_token_set_precision": 0.4988897214671942, + "eval_bias-bios_token_set_recall": 0.6389780375672935, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1240 + }, + { + "epoch": 5.5, + "learning_rate": 0.001, + "loss": 2.0709, + "step": 1248 + }, + { + "epoch": 5.55, + "learning_rate": 0.001, + "loss": 2.0069, + "step": 1260 + }, + { + "epoch": 5.6, + "eval_ag_news_accuracy": 0.3023125, + "eval_ag_news_bleu_score": 4.57337402880596, + "eval_ag_news_bleu_score_sem": 0.14544406424535272, + "eval_ag_news_emb_cos_sim": 0.8080353140830994, + "eval_ag_news_emb_cos_sim_sem": 0.004815184510541266, + "eval_ag_news_emb_top1_equal": 0.27799999713897705, + "eval_ag_news_emb_top1_equal_sem": 0.0200558347666307, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5642201900482178, + "eval_ag_news_n_ngrams_match_1": 12.77, + "eval_ag_news_n_ngrams_match_2": 2.6, + "eval_ag_news_n_ngrams_match_3": 0.656, + "eval_ag_news_num_pred_words": 37.444, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.31190608905573, + "eval_ag_news_pred_num_tokens": 55.9453125, + "eval_ag_news_rouge_score": 0.36077600319081815, + "eval_ag_news_runtime": 7.4135, + "eval_ag_news_samples_per_second": 67.445, + "eval_ag_news_steps_per_second": 0.135, + "eval_ag_news_token_set_f1": 0.34675509763818224, + "eval_ag_news_token_set_f1_sem": 0.004742689533799806, + "eval_ag_news_token_set_precision": 0.3147625166746689, + "eval_ag_news_token_set_recall": 0.4019475532268016, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1271 + }, + { + "epoch": 5.6, + "eval_anthropic_toxic_prompts_accuracy": 0.106625, + "eval_anthropic_toxic_prompts_bleu_score": 4.09428074697677, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14453180124028525, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6872526407241821, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004509919579468347, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.16599999368190765, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016656615375209204, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0059316158294678, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.926, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.77, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.64, + "eval_anthropic_toxic_prompts_num_pred_words": 34.678, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.205030657050173, + "eval_anthropic_toxic_prompts_pred_num_tokens": 50.5, + "eval_anthropic_toxic_prompts_rouge_score": 0.2663162692915284, + "eval_anthropic_toxic_prompts_runtime": 7.2083, + "eval_anthropic_toxic_prompts_samples_per_second": 69.364, + "eval_anthropic_toxic_prompts_steps_per_second": 0.139, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3440817895035503, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0057425914365728204, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4288524547204851, + "eval_anthropic_toxic_prompts_token_set_recall": 0.31069896013840936, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1271 + }, + { + "epoch": 5.6, + "eval_arxiv_accuracy": 0.42084375, + "eval_arxiv_bleu_score": 4.39097555111001, + "eval_arxiv_bleu_score_sem": 0.11792424158293455, + "eval_arxiv_emb_cos_sim": 0.7578997611999512, + "eval_arxiv_emb_cos_sim_sem": 0.004245361370646482, + "eval_arxiv_emb_top1_equal": 0.23000000417232513, + "eval_arxiv_emb_top1_equal_sem": 0.018839050665941787, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0509696006774902, + "eval_arxiv_n_ngrams_match_1": 15.062, + "eval_arxiv_n_ngrams_match_2": 2.924, + "eval_arxiv_n_ngrams_match_3": 0.646, + "eval_arxiv_num_pred_words": 36.756, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.13582780354345, + "eval_arxiv_pred_num_tokens": 58.8359375, + "eval_arxiv_rouge_score": 0.3717338045573107, + "eval_arxiv_runtime": 7.611, + "eval_arxiv_samples_per_second": 65.695, + "eval_arxiv_steps_per_second": 0.131, + "eval_arxiv_token_set_f1": 0.37075290043526415, + "eval_arxiv_token_set_f1_sem": 0.004148483552312404, + "eval_arxiv_token_set_precision": 0.3164356851337472, + "eval_arxiv_token_set_recall": 0.4646605842101968, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1271 + }, + { + "epoch": 5.6, + "eval_python_code_alpaca_accuracy": 0.15196875, + "eval_python_code_alpaca_bleu_score": 5.519793427598636, + "eval_python_code_alpaca_bleu_score_sem": 0.17277299116506056, + "eval_python_code_alpaca_emb_cos_sim": 0.7638775706291199, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003983409786157877, + "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346172969186033, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.689002513885498, + "eval_python_code_alpaca_n_ngrams_match_1": 9.65, + "eval_python_code_alpaca_n_ngrams_match_2": 2.62, + "eval_python_code_alpaca_n_ngrams_match_3": 0.794, + "eval_python_code_alpaca_num_pred_words": 33.444, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.716988604683149, + "eval_python_code_alpaca_pred_num_tokens": 52.4375, + "eval_python_code_alpaca_rouge_score": 0.4092617957255468, + "eval_python_code_alpaca_runtime": 7.0203, + "eval_python_code_alpaca_samples_per_second": 71.222, + "eval_python_code_alpaca_steps_per_second": 0.142, + "eval_python_code_alpaca_token_set_f1": 0.478976922517982, + "eval_python_code_alpaca_token_set_f1_sem": 0.005203506678295278, + "eval_python_code_alpaca_token_set_precision": 0.5368041712791153, + "eval_python_code_alpaca_token_set_recall": 0.45078687179454685, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1271 + }, + { + "epoch": 5.6, + "eval_wikibio_accuracy": 0.3555, + "eval_wikibio_bleu_score": 5.2488485705282795, + "eval_wikibio_bleu_score_sem": 0.21298491513749515, + "eval_wikibio_emb_cos_sim": 0.71724534034729, + "eval_wikibio_emb_cos_sim_sem": 0.0063786226583461125, + "eval_wikibio_emb_top1_equal": 0.15000000596046448, + "eval_wikibio_emb_top1_equal_sem": 0.01598471338779901, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4222168922424316, + "eval_wikibio_n_ngrams_match_1": 8.358, + "eval_wikibio_n_ngrams_match_2": 2.702, + "eval_wikibio_n_ngrams_match_3": 1.016, + "eval_wikibio_num_pred_words": 30.586, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 30.637259294001566, + "eval_wikibio_pred_num_tokens": 61.0390625, + "eval_wikibio_rouge_score": 0.30777700457295964, + "eval_wikibio_runtime": 7.2697, + "eval_wikibio_samples_per_second": 68.779, + "eval_wikibio_steps_per_second": 0.138, + "eval_wikibio_token_set_f1": 0.2778107394683041, + "eval_wikibio_token_set_f1_sem": 0.006709734336985542, + "eval_wikibio_token_set_precision": 0.27087812641060205, + "eval_wikibio_token_set_recall": 0.31154529694026967, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1271 + }, + { + "epoch": 5.6, + "eval_bias-bios_accuracy": 0.5176875, + "eval_bias-bios_bleu_score": 18.540615663972712, + "eval_bias-bios_bleu_score_sem": 0.797121209731287, + "eval_bias-bios_emb_cos_sim": 0.872423529624939, + "eval_bias-bios_emb_cos_sim_sem": 0.0031692606803326844, + "eval_bias-bios_emb_top1_equal": 0.3400000035762787, + "eval_bias-bios_emb_top1_equal_sem": 0.021206117459812355, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7669774293899536, + "eval_bias-bios_n_ngrams_match_1": 21.822, + "eval_bias-bios_n_ngrams_match_2": 9.798, + "eval_bias-bios_n_ngrams_match_3": 5.486, + "eval_bias-bios_num_pred_words": 41.998, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.853135083419125, + "eval_bias-bios_pred_num_tokens": 56.09375, + "eval_bias-bios_rouge_score": 0.5323654174564787, + "eval_bias-bios_runtime": 7.5551, + "eval_bias-bios_samples_per_second": 66.181, + "eval_bias-bios_steps_per_second": 0.132, + "eval_bias-bios_token_set_f1": 0.5523502621363561, + "eval_bias-bios_token_set_f1_sem": 0.006720164885935815, + "eval_bias-bios_token_set_precision": 0.5282085906273399, + "eval_bias-bios_token_set_recall": 0.5897559634360018, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1271 + }, + { + "epoch": 5.6, + "learning_rate": 0.001, + "loss": 1.897, + "step": 1272 + }, + { + "epoch": 5.66, + "learning_rate": 0.001, + "loss": 1.6563, + "step": 1284 + }, + { + "epoch": 5.71, + "learning_rate": 0.001, + "loss": 2.0459, + "step": 1296 + }, + { + "epoch": 5.74, + "eval_ag_news_accuracy": 0.3018125, + "eval_ag_news_bleu_score": 4.580675351931296, + "eval_ag_news_bleu_score_sem": 0.1390842938196433, + "eval_ag_news_emb_cos_sim": 0.8149040937423706, + "eval_ag_news_emb_cos_sim_sem": 0.0040050554638469404, + "eval_ag_news_emb_top1_equal": 0.2840000092983246, + "eval_ag_news_emb_top1_equal_sem": 0.020186705101045338, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.540907621383667, + "eval_ag_news_n_ngrams_match_1": 13.236, + "eval_ag_news_n_ngrams_match_2": 2.768, + "eval_ag_news_n_ngrams_match_3": 0.722, + "eval_ag_news_num_pred_words": 42.296, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.498216304560465, + "eval_ag_news_pred_num_tokens": 61.8515625, + "eval_ag_news_rouge_score": 0.3541570127113056, + "eval_ag_news_runtime": 7.5701, + "eval_ag_news_samples_per_second": 66.049, + "eval_ag_news_steps_per_second": 0.132, + "eval_ag_news_token_set_f1": 0.3461913907982923, + "eval_ag_news_token_set_f1_sem": 0.004594991147194266, + "eval_ag_news_token_set_precision": 0.32251028718278496, + "eval_ag_news_token_set_recall": 0.39702258975572, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1302 + }, + { + "epoch": 5.74, + "eval_anthropic_toxic_prompts_accuracy": 0.10628125, + "eval_anthropic_toxic_prompts_bleu_score": 3.4424543830075285, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12195139246443613, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.684529185295105, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004560404391670038, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.11599999666213989, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01433523564539069, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.1139750480651855, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.188, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.886, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, + "eval_anthropic_toxic_prompts_num_pred_words": 42.012, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.510346502395596, + "eval_anthropic_toxic_prompts_pred_num_tokens": 60.1953125, + "eval_anthropic_toxic_prompts_rouge_score": 0.23978661530396744, + "eval_anthropic_toxic_prompts_runtime": 7.1684, + "eval_anthropic_toxic_prompts_samples_per_second": 69.75, + "eval_anthropic_toxic_prompts_steps_per_second": 0.14, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3326568724343772, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00553262305039052, + "eval_anthropic_toxic_prompts_token_set_precision": 0.44904456927897024, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2878560045999485, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1302 + }, + { + "epoch": 5.74, + "eval_arxiv_accuracy": 0.42740625, + "eval_arxiv_bleu_score": 4.420526084563949, + "eval_arxiv_bleu_score_sem": 0.12220117044334794, + "eval_arxiv_emb_cos_sim": 0.7494902610778809, + "eval_arxiv_emb_cos_sim_sem": 0.004779517106507803, + "eval_arxiv_emb_top1_equal": 0.25600001215934753, + "eval_arxiv_emb_top1_equal_sem": 0.019536923601457774, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.009330987930298, + "eval_arxiv_n_ngrams_match_1": 15.288, + "eval_arxiv_n_ngrams_match_2": 2.898, + "eval_arxiv_n_ngrams_match_3": 0.646, + "eval_arxiv_num_pred_words": 39.268, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.273831948904096, + "eval_arxiv_pred_num_tokens": 62.5625, + "eval_arxiv_rouge_score": 0.362846579847411, + "eval_arxiv_runtime": 8.3104, + "eval_arxiv_samples_per_second": 60.165, + "eval_arxiv_steps_per_second": 0.12, + "eval_arxiv_token_set_f1": 0.36629274140253887, + "eval_arxiv_token_set_f1_sem": 0.0044614996118577475, + "eval_arxiv_token_set_precision": 0.317322733552626, + "eval_arxiv_token_set_recall": 0.452008425847499, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1302 + }, + { + "epoch": 5.74, + "eval_python_code_alpaca_accuracy": 0.153, + "eval_python_code_alpaca_bleu_score": 5.177949737443402, + "eval_python_code_alpaca_bleu_score_sem": 0.1580344328908884, + "eval_python_code_alpaca_emb_cos_sim": 0.769737720489502, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0035329592450433463, + "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346174301986407, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.746436834335327, + "eval_python_code_alpaca_n_ngrams_match_1": 9.89, + "eval_python_code_alpaca_n_ngrams_match_2": 2.796, + "eval_python_code_alpaca_n_ngrams_match_3": 0.946, + "eval_python_code_alpaca_num_pred_words": 38.078, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.586993778113719, + "eval_python_code_alpaca_pred_num_tokens": 59.9921875, + "eval_python_code_alpaca_rouge_score": 0.391759995715714, + "eval_python_code_alpaca_runtime": 7.2752, + "eval_python_code_alpaca_samples_per_second": 68.726, + "eval_python_code_alpaca_steps_per_second": 0.137, + "eval_python_code_alpaca_token_set_f1": 0.46521206467444476, + "eval_python_code_alpaca_token_set_f1_sem": 0.005278026580205972, + "eval_python_code_alpaca_token_set_precision": 0.5487120124810396, + "eval_python_code_alpaca_token_set_recall": 0.4198012272368391, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1302 + }, + { + "epoch": 5.74, + "eval_wikibio_accuracy": 0.36684375, + "eval_wikibio_bleu_score": 5.070507207255898, + "eval_wikibio_bleu_score_sem": 0.20825270698295933, + "eval_wikibio_emb_cos_sim": 0.7176789045333862, + "eval_wikibio_emb_cos_sim_sem": 0.0064573131916822474, + "eval_wikibio_emb_top1_equal": 0.18000000715255737, + "eval_wikibio_emb_top1_equal_sem": 0.017198591983670585, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3135361671447754, + "eval_wikibio_n_ngrams_match_1": 8.63, + "eval_wikibio_n_ngrams_match_2": 2.738, + "eval_wikibio_n_ngrams_match_3": 0.996, + "eval_wikibio_num_pred_words": 32.56, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.48213527320049, + "eval_wikibio_pred_num_tokens": 62.9375, + "eval_wikibio_rouge_score": 0.30395403219075934, + "eval_wikibio_runtime": 7.4136, + "eval_wikibio_samples_per_second": 67.444, + "eval_wikibio_steps_per_second": 0.135, + "eval_wikibio_token_set_f1": 0.2788986587618517, + "eval_wikibio_token_set_f1_sem": 0.006604972211474385, + "eval_wikibio_token_set_precision": 0.27549524981074697, + "eval_wikibio_token_set_recall": 0.3093327647420698, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1302 + }, + { + "epoch": 5.74, + "eval_bias-bios_accuracy": 0.512875, + "eval_bias-bios_bleu_score": 17.587711221111313, + "eval_bias-bios_bleu_score_sem": 0.7582759478456785, + "eval_bias-bios_emb_cos_sim": 0.8744062781333923, + "eval_bias-bios_emb_cos_sim_sem": 0.003047957520209353, + "eval_bias-bios_emb_top1_equal": 0.35199999809265137, + "eval_bias-bios_emb_top1_equal_sem": 0.02138004257753857, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7857671976089478, + "eval_bias-bios_n_ngrams_match_1": 22.486, + "eval_bias-bios_n_ngrams_match_2": 10.062, + "eval_bias-bios_n_ngrams_match_3": 5.588, + "eval_bias-bios_num_pred_words": 46.814, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.964153877398367, + "eval_bias-bios_pred_num_tokens": 61.7265625, + "eval_bias-bios_rouge_score": 0.5167177720702403, + "eval_bias-bios_runtime": 7.5771, + "eval_bias-bios_samples_per_second": 65.989, + "eval_bias-bios_steps_per_second": 0.132, + "eval_bias-bios_token_set_f1": 0.5499504893910986, + "eval_bias-bios_token_set_f1_sem": 0.006585196715222386, + "eval_bias-bios_token_set_precision": 0.541049468479298, + "eval_bias-bios_token_set_recall": 0.5705891060146241, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1302 + }, + { + "epoch": 5.76, + "learning_rate": 0.001, + "loss": 1.9889, + "step": 1308 + }, + { + "epoch": 5.81, + "learning_rate": 0.001, + "loss": 1.921, + "step": 1320 + }, + { + "epoch": 5.87, + "learning_rate": 0.001, + "loss": 1.6839, + "step": 1332 + }, + { + "epoch": 5.87, + "eval_ag_news_accuracy": 0.30190625, + "eval_ag_news_bleu_score": 4.510702202082297, + "eval_ag_news_bleu_score_sem": 0.1562655783341779, + "eval_ag_news_emb_cos_sim": 0.8097511529922485, + "eval_ag_news_emb_cos_sim_sem": 0.004043597051888928, + "eval_ag_news_emb_top1_equal": 0.2800000011920929, + "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.6359164714813232, + "eval_ag_news_n_ngrams_match_1": 11.704, + "eval_ag_news_n_ngrams_match_2": 2.412, + "eval_ag_news_n_ngrams_match_3": 0.638, + "eval_ag_news_num_pred_words": 30.68, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 37.93660478653705, + "eval_ag_news_pred_num_tokens": 46.9609375, + "eval_ag_news_rouge_score": 0.3586267610476457, + "eval_ag_news_runtime": 7.3684, + "eval_ag_news_samples_per_second": 67.858, + "eval_ag_news_steps_per_second": 0.136, + "eval_ag_news_token_set_f1": 0.3445078990006429, + "eval_ag_news_token_set_f1_sem": 0.004640395360757867, + "eval_ag_news_token_set_precision": 0.2995115498991181, + "eval_ag_news_token_set_recall": 0.4238478297632044, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1333 + }, + { + "epoch": 5.87, + "eval_anthropic_toxic_prompts_accuracy": 0.10834375, + "eval_anthropic_toxic_prompts_bleu_score": 5.335873092259973, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.200454930491489, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6999314427375793, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004881605616825935, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15000000596046448, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01598471338779901, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.979020833969116, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.722, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.698, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.624, + "eval_anthropic_toxic_prompts_num_pred_words": 26.368, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 19.668548438407626, + "eval_anthropic_toxic_prompts_pred_num_tokens": 37.5859375, + "eval_anthropic_toxic_prompts_rouge_score": 0.310660466066935, + "eval_anthropic_toxic_prompts_runtime": 6.9444, + "eval_anthropic_toxic_prompts_samples_per_second": 72.0, + "eval_anthropic_toxic_prompts_steps_per_second": 0.144, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34998396653391634, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006147953719054929, + "eval_anthropic_toxic_prompts_token_set_precision": 0.42293888028152654, + "eval_anthropic_toxic_prompts_token_set_recall": 0.32233621200100004, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1333 + }, + { + "epoch": 5.87, + "eval_arxiv_accuracy": 0.41125, + "eval_arxiv_bleu_score": 3.840186807066469, + "eval_arxiv_bleu_score_sem": 0.10961283946182737, + "eval_arxiv_emb_cos_sim": 0.7524436116218567, + "eval_arxiv_emb_cos_sim_sem": 0.0045383505438661535, + "eval_arxiv_emb_top1_equal": 0.15399999916553497, + "eval_arxiv_emb_top1_equal_sem": 0.016158283980625493, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.1682863235473633, + "eval_arxiv_n_ngrams_match_1": 13.868, + "eval_arxiv_n_ngrams_match_2": 2.616, + "eval_arxiv_n_ngrams_match_3": 0.574, + "eval_arxiv_num_pred_words": 30.132, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 23.76672096869516, + "eval_arxiv_pred_num_tokens": 49.046875, + "eval_arxiv_rouge_score": 0.3729047020927051, + "eval_arxiv_runtime": 7.256, + "eval_arxiv_samples_per_second": 68.909, + "eval_arxiv_steps_per_second": 0.138, + "eval_arxiv_token_set_f1": 0.37123502434387323, + "eval_arxiv_token_set_f1_sem": 0.004258247789180813, + "eval_arxiv_token_set_precision": 0.3095713620207935, + "eval_arxiv_token_set_recall": 0.4767404887052051, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1333 + }, + { + "epoch": 5.87, + "eval_python_code_alpaca_accuracy": 0.158, + "eval_python_code_alpaca_bleu_score": 7.187326839919982, + "eval_python_code_alpaca_bleu_score_sem": 0.231031260944302, + "eval_python_code_alpaca_emb_cos_sim": 0.7905387282371521, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0032995285889778547, + "eval_python_code_alpaca_emb_top1_equal": 0.20999999344348907, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.0182336207644306, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.565624713897705, + "eval_python_code_alpaca_n_ngrams_match_1": 9.234, + "eval_python_code_alpaca_n_ngrams_match_2": 2.432, + "eval_python_code_alpaca_n_ngrams_match_3": 0.782, + "eval_python_code_alpaca_num_pred_words": 25.644, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.008782599028763, + "eval_python_code_alpaca_pred_num_tokens": 38.703125, + "eval_python_code_alpaca_rouge_score": 0.46299269220710015, + "eval_python_code_alpaca_runtime": 6.9473, + "eval_python_code_alpaca_samples_per_second": 71.971, + "eval_python_code_alpaca_steps_per_second": 0.144, + "eval_python_code_alpaca_token_set_f1": 0.4906910225784128, + "eval_python_code_alpaca_token_set_f1_sem": 0.005492782455485606, + "eval_python_code_alpaca_token_set_precision": 0.5259909652905059, + "eval_python_code_alpaca_token_set_recall": 0.47641560708165553, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1333 + }, + { + "epoch": 5.87, + "eval_wikibio_accuracy": 0.348375, + "eval_wikibio_bleu_score": 6.015703013618571, + "eval_wikibio_bleu_score_sem": 0.24475698963490608, + "eval_wikibio_emb_cos_sim": 0.7452252507209778, + "eval_wikibio_emb_cos_sim_sem": 0.005492733573140019, + "eval_wikibio_emb_top1_equal": 0.18000000715255737, + "eval_wikibio_emb_top1_equal_sem": 0.017198591983670585, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.548689126968384, + "eval_wikibio_n_ngrams_match_1": 8.922, + "eval_wikibio_n_ngrams_match_2": 2.732, + "eval_wikibio_n_ngrams_match_3": 0.974, + "eval_wikibio_num_pred_words": 28.69, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 34.76771154689603, + "eval_wikibio_pred_num_tokens": 51.3984375, + "eval_wikibio_rouge_score": 0.3529826805446006, + "eval_wikibio_runtime": 7.288, + "eval_wikibio_samples_per_second": 68.606, + "eval_wikibio_steps_per_second": 0.137, + "eval_wikibio_token_set_f1": 0.30786314677145327, + "eval_wikibio_token_set_f1_sem": 0.005744181551871143, + "eval_wikibio_token_set_precision": 0.2945685099531984, + "eval_wikibio_token_set_recall": 0.34009161709370883, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1333 + }, + { + "epoch": 5.87, + "eval_bias-bios_accuracy": 0.51409375, + "eval_bias-bios_bleu_score": 18.601895243693658, + "eval_bias-bios_bleu_score_sem": 0.8395960259213998, + "eval_bias-bios_emb_cos_sim": 0.8698825836181641, + "eval_bias-bios_emb_cos_sim_sem": 0.003303766227769929, + "eval_bias-bios_emb_top1_equal": 0.3179999887943268, + "eval_bias-bios_emb_top1_equal_sem": 0.02084757283415153, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.8061383962631226, + "eval_bias-bios_n_ngrams_match_1": 20.428, + "eval_bias-bios_n_ngrams_match_2": 9.206, + "eval_bias-bios_n_ngrams_match_3": 5.208, + "eval_bias-bios_num_pred_words": 34.672, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.086896806450112, + "eval_bias-bios_pred_num_tokens": 47.1875, + "eval_bias-bios_rouge_score": 0.547141850002661, + "eval_bias-bios_runtime": 7.4016, + "eval_bias-bios_samples_per_second": 67.553, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.554887604686961, + "eval_bias-bios_token_set_f1_sem": 0.0068598685232604895, + "eval_bias-bios_token_set_precision": 0.5098140838716572, + "eval_bias-bios_token_set_recall": 0.6214830682766334, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1333 + }, + { + "epoch": 5.92, + "learning_rate": 0.001, + "loss": 1.9777, + "step": 1344 + }, + { + "epoch": 5.97, + "learning_rate": 0.001, + "loss": 1.9223, + "step": 1356 + }, + { + "epoch": 6.01, + "eval_ag_news_accuracy": 0.30090625, + "eval_ag_news_bleu_score": 4.229521606856611, + "eval_ag_news_bleu_score_sem": 0.16599247987237792, + "eval_ag_news_emb_cos_sim": 0.793785810470581, + "eval_ag_news_emb_cos_sim_sem": 0.00425375801300851, + "eval_ag_news_emb_top1_equal": 0.23000000417232513, + "eval_ag_news_emb_top1_equal_sem": 0.018839050665941787, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.6538124084472656, + "eval_ag_news_n_ngrams_match_1": 10.808, + "eval_ag_news_n_ngrams_match_2": 2.226, + "eval_ag_news_n_ngrams_match_3": 0.64, + "eval_ag_news_num_pred_words": 27.13, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 38.62162715075604, + "eval_ag_news_pred_num_tokens": 39.921875, + "eval_ag_news_rouge_score": 0.34952438531194774, + "eval_ag_news_runtime": 7.4075, + "eval_ag_news_samples_per_second": 67.499, + "eval_ag_news_steps_per_second": 0.135, + "eval_ag_news_token_set_f1": 0.33620576435916294, + "eval_ag_news_token_set_f1_sem": 0.0046712207071122, + "eval_ag_news_token_set_precision": 0.2825569694260224, + "eval_ag_news_token_set_recall": 0.43444870854553147, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1364 + }, + { + "epoch": 6.01, + "eval_anthropic_toxic_prompts_accuracy": 0.11184375, + "eval_anthropic_toxic_prompts_bleu_score": 6.414028894652578, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.23846858566105303, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7042384147644043, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004670806244326763, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016328049428381567, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.9145073890686035, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.608, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.684, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.61, + "eval_anthropic_toxic_prompts_num_pred_words": 21.97, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 18.43972655416395, + "eval_anthropic_toxic_prompts_pred_num_tokens": 30.3046875, + "eval_anthropic_toxic_prompts_rouge_score": 0.34220573788923403, + "eval_anthropic_toxic_prompts_runtime": 7.0386, + "eval_anthropic_toxic_prompts_samples_per_second": 71.037, + "eval_anthropic_toxic_prompts_steps_per_second": 0.142, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3608022356241281, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006174602651222466, + "eval_anthropic_toxic_prompts_token_set_precision": 0.41874715816277824, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3403751793504618, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1364 + }, + { + "epoch": 6.01, + "eval_arxiv_accuracy": 0.4129375, + "eval_arxiv_bleu_score": 3.5049575512352105, + "eval_arxiv_bleu_score_sem": 0.10327675713543369, + "eval_arxiv_emb_cos_sim": 0.746076226234436, + "eval_arxiv_emb_cos_sim_sem": 0.003951010408643731, + "eval_arxiv_emb_top1_equal": 0.16599999368190765, + "eval_arxiv_emb_top1_equal_sem": 0.016656615375209204, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.1462719440460205, + "eval_arxiv_n_ngrams_match_1": 13.094, + "eval_arxiv_n_ngrams_match_2": 2.47, + "eval_arxiv_n_ngrams_match_3": 0.534, + "eval_arxiv_num_pred_words": 27.968, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 23.24922839209836, + "eval_arxiv_pred_num_tokens": 44.546875, + "eval_arxiv_rouge_score": 0.36512357189825984, + "eval_arxiv_runtime": 7.3317, + "eval_arxiv_samples_per_second": 68.197, + "eval_arxiv_steps_per_second": 0.136, + "eval_arxiv_token_set_f1": 0.36415789710214524, + "eval_arxiv_token_set_f1_sem": 0.003994316825023978, + "eval_arxiv_token_set_precision": 0.2976549374135428, + "eval_arxiv_token_set_recall": 0.48209242048316353, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1364 + }, + { + "epoch": 6.01, + "eval_python_code_alpaca_accuracy": 0.16503125, + "eval_python_code_alpaca_bleu_score": 8.367728619471684, + "eval_python_code_alpaca_bleu_score_sem": 0.28630724669936525, + "eval_python_code_alpaca_emb_cos_sim": 0.7989581823348999, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036343343739607984, + "eval_python_code_alpaca_emb_top1_equal": 0.23000000417232513, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018839050665941787, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.4538772106170654, + "eval_python_code_alpaca_n_ngrams_match_1": 8.802, + "eval_python_code_alpaca_n_ngrams_match_2": 2.304, + "eval_python_code_alpaca_n_ngrams_match_3": 0.732, + "eval_python_code_alpaca_num_pred_words": 20.898, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.633364395268169, + "eval_python_code_alpaca_pred_num_tokens": 31.390625, + "eval_python_code_alpaca_rouge_score": 0.49621776374006266, + "eval_python_code_alpaca_runtime": 7.0134, + "eval_python_code_alpaca_samples_per_second": 71.292, + "eval_python_code_alpaca_steps_per_second": 0.143, + "eval_python_code_alpaca_token_set_f1": 0.5008596100567906, + "eval_python_code_alpaca_token_set_f1_sem": 0.005744874789612027, + "eval_python_code_alpaca_token_set_precision": 0.5180000103019801, + "eval_python_code_alpaca_token_set_recall": 0.5024754775785634, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1364 + }, + { + "epoch": 6.01, + "eval_wikibio_accuracy": 0.3526875, + "eval_wikibio_bleu_score": 6.186516290284805, + "eval_wikibio_bleu_score_sem": 0.23684423168356836, + "eval_wikibio_emb_cos_sim": 0.7449456453323364, + "eval_wikibio_emb_cos_sim_sem": 0.004893123011265849, + "eval_wikibio_emb_top1_equal": 0.17399999499320984, + "eval_wikibio_emb_top1_equal_sem": 0.016971270884523753, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.5559232234954834, + "eval_wikibio_n_ngrams_match_1": 9.038, + "eval_wikibio_n_ngrams_match_2": 2.814, + "eval_wikibio_n_ngrams_match_3": 1.056, + "eval_wikibio_num_pred_words": 28.844, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 35.02013646052084, + "eval_wikibio_pred_num_tokens": 51.984375, + "eval_wikibio_rouge_score": 0.3573138234309702, + "eval_wikibio_runtime": 7.3573, + "eval_wikibio_samples_per_second": 67.96, + "eval_wikibio_steps_per_second": 0.136, + "eval_wikibio_token_set_f1": 0.3157430502946985, + "eval_wikibio_token_set_f1_sem": 0.0055280111079246685, + "eval_wikibio_token_set_precision": 0.3016019113436564, + "eval_wikibio_token_set_recall": 0.3466330422570567, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1364 + }, + { + "epoch": 6.01, + "eval_bias-bios_accuracy": 0.50953125, + "eval_bias-bios_bleu_score": 17.60533299468726, + "eval_bias-bios_bleu_score_sem": 0.8486476305948097, + "eval_bias-bios_emb_cos_sim": 0.8615785241127014, + "eval_bias-bios_emb_cos_sim_sem": 0.0031654218820528065, + "eval_bias-bios_emb_top1_equal": 0.2720000147819519, + "eval_bias-bios_emb_top1_equal_sem": 0.019920483557355567, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.8134593963623047, + "eval_bias-bios_n_ngrams_match_1": 18.908, + "eval_bias-bios_n_ngrams_match_2": 8.648, + "eval_bias-bios_n_ngrams_match_3": 4.96, + "eval_bias-bios_num_pred_words": 31.066, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 6.131622497203516, + "eval_bias-bios_pred_num_tokens": 41.2578125, + "eval_bias-bios_rouge_score": 0.5365375885706414, + "eval_bias-bios_runtime": 7.4354, + "eval_bias-bios_samples_per_second": 67.246, + "eval_bias-bios_steps_per_second": 0.134, + "eval_bias-bios_token_set_f1": 0.5439127466893935, + "eval_bias-bios_token_set_f1_sem": 0.00694620487083124, + "eval_bias-bios_token_set_precision": 0.48595299762233435, + "eval_bias-bios_token_set_recall": 0.6326538885690199, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1364 + }, + { + "epoch": 6.03, + "learning_rate": 0.001, + "loss": 1.8676, + "step": 1368 + }, + { + "epoch": 6.08, + "learning_rate": 0.001, + "loss": 1.9923, + "step": 1380 + }, + { + "epoch": 6.13, + "learning_rate": 0.001, + "loss": 1.9533, + "step": 1392 + }, + { + "epoch": 6.15, + "eval_ag_news_accuracy": 0.3015625, + "eval_ag_news_bleu_score": 4.958398698029417, + "eval_ag_news_bleu_score_sem": 0.163501632207498, + "eval_ag_news_emb_cos_sim": 0.8198105096817017, + "eval_ag_news_emb_cos_sim_sem": 0.003713689308283834, + "eval_ag_news_emb_top1_equal": 0.28600001335144043, + "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.560136079788208, + "eval_ag_news_n_ngrams_match_1": 13.476, + "eval_ag_news_n_ngrams_match_2": 2.88, + "eval_ag_news_n_ngrams_match_3": 0.832, + "eval_ag_news_num_pred_words": 41.864, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.16798247111252, + "eval_ag_news_pred_num_tokens": 61.4453125, + "eval_ag_news_rouge_score": 0.36747310775866715, + "eval_ag_news_runtime": 8.0532, + "eval_ag_news_samples_per_second": 62.087, + "eval_ag_news_steps_per_second": 0.124, + "eval_ag_news_token_set_f1": 0.35342223201297795, + "eval_ag_news_token_set_f1_sem": 0.004599465585950473, + "eval_ag_news_token_set_precision": 0.3303619830838599, + "eval_ag_news_token_set_recall": 0.4019049295022457, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1395 + }, + { + "epoch": 6.15, + "eval_anthropic_toxic_prompts_accuracy": 0.1064375, + "eval_anthropic_toxic_prompts_bleu_score": 3.6235744911046486, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.127329776218279, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6942067742347717, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004392782753317032, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1379999965429306, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015439843269723665, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.1174263954162598, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.19, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.742, + "eval_anthropic_toxic_prompts_num_pred_words": 41.358, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.588171750897825, + "eval_anthropic_toxic_prompts_pred_num_tokens": 58.4921875, + "eval_anthropic_toxic_prompts_rouge_score": 0.24408343271962318, + "eval_anthropic_toxic_prompts_runtime": 7.6259, + "eval_anthropic_toxic_prompts_samples_per_second": 65.566, + "eval_anthropic_toxic_prompts_steps_per_second": 0.131, + "eval_anthropic_toxic_prompts_token_set_f1": 0.33171928516895843, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005368515808050288, + "eval_anthropic_toxic_prompts_token_set_precision": 0.451752766617253, + "eval_anthropic_toxic_prompts_token_set_recall": 0.283778100154781, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1395 + }, + { + "epoch": 6.15, + "eval_arxiv_accuracy": 0.425, + "eval_arxiv_bleu_score": 4.534491599177458, + "eval_arxiv_bleu_score_sem": 0.12468185772288573, + "eval_arxiv_emb_cos_sim": 0.7561259865760803, + "eval_arxiv_emb_cos_sim_sem": 0.004883616146191509, + "eval_arxiv_emb_top1_equal": 0.2460000067949295, + "eval_arxiv_emb_top1_equal_sem": 0.019279819745132862, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.026859760284424, + "eval_arxiv_n_ngrams_match_1": 15.424, + "eval_arxiv_n_ngrams_match_2": 3.03, + "eval_arxiv_n_ngrams_match_3": 0.692, + "eval_arxiv_num_pred_words": 38.806, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.63234025967113, + "eval_arxiv_pred_num_tokens": 62.359375, + "eval_arxiv_rouge_score": 0.36722323683211977, + "eval_arxiv_runtime": 7.4273, + "eval_arxiv_samples_per_second": 67.319, + "eval_arxiv_steps_per_second": 0.135, + "eval_arxiv_token_set_f1": 0.3709845887261192, + "eval_arxiv_token_set_f1_sem": 0.004405283616695815, + "eval_arxiv_token_set_precision": 0.3211236842225695, + "eval_arxiv_token_set_recall": 0.45768708574331657, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1395 + }, + { + "epoch": 6.15, + "eval_python_code_alpaca_accuracy": 0.1545625, + "eval_python_code_alpaca_bleu_score": 5.5736478407264585, + "eval_python_code_alpaca_bleu_score_sem": 0.17406260818897018, + "eval_python_code_alpaca_emb_cos_sim": 0.7750619053840637, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003336053983252675, + "eval_python_code_alpaca_emb_top1_equal": 0.16200000047683716, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.016494124351899474, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7529311180114746, + "eval_python_code_alpaca_n_ngrams_match_1": 10.01, + "eval_python_code_alpaca_n_ngrams_match_2": 2.986, + "eval_python_code_alpaca_n_ngrams_match_3": 1.014, + "eval_python_code_alpaca_num_pred_words": 37.058, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.68854954641924, + "eval_python_code_alpaca_pred_num_tokens": 58.078125, + "eval_python_code_alpaca_rouge_score": 0.4001777824628884, + "eval_python_code_alpaca_runtime": 7.5451, + "eval_python_code_alpaca_samples_per_second": 66.268, + "eval_python_code_alpaca_steps_per_second": 0.133, + "eval_python_code_alpaca_token_set_f1": 0.4776881355635347, + "eval_python_code_alpaca_token_set_f1_sem": 0.005200739185162961, + "eval_python_code_alpaca_token_set_precision": 0.5604280670828278, + "eval_python_code_alpaca_token_set_recall": 0.4339519068232182, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1395 + }, + { + "epoch": 6.15, + "eval_wikibio_accuracy": 0.36215625, + "eval_wikibio_bleu_score": 4.99687688158487, + "eval_wikibio_bleu_score_sem": 0.21497068371007713, + "eval_wikibio_emb_cos_sim": 0.7186086773872375, + "eval_wikibio_emb_cos_sim_sem": 0.00626282829537244, + "eval_wikibio_emb_top1_equal": 0.17000000178813934, + "eval_wikibio_emb_top1_equal_sem": 0.016815633120741882, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3449625968933105, + "eval_wikibio_n_ngrams_match_1": 8.356, + "eval_wikibio_n_ngrams_match_2": 2.622, + "eval_wikibio_n_ngrams_match_3": 0.98, + "eval_wikibio_num_pred_words": 31.684, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.35951491285399, + "eval_wikibio_pred_num_tokens": 62.859375, + "eval_wikibio_rouge_score": 0.30098109064809037, + "eval_wikibio_runtime": 7.3137, + "eval_wikibio_samples_per_second": 68.365, + "eval_wikibio_steps_per_second": 0.137, + "eval_wikibio_token_set_f1": 0.2726735406063593, + "eval_wikibio_token_set_f1_sem": 0.006712610493410692, + "eval_wikibio_token_set_precision": 0.26585764155408015, + "eval_wikibio_token_set_recall": 0.30445040567456516, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1395 + }, + { + "epoch": 6.15, + "eval_bias-bios_accuracy": 0.515375, + "eval_bias-bios_bleu_score": 18.27200721878929, + "eval_bias-bios_bleu_score_sem": 0.7860741507629497, + "eval_bias-bios_emb_cos_sim": 0.8807288408279419, + "eval_bias-bios_emb_cos_sim_sem": 0.0026622839095169744, + "eval_bias-bios_emb_top1_equal": 0.3580000102519989, + "eval_bias-bios_emb_top1_equal_sem": 0.021461435363634866, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7732970714569092, + "eval_bias-bios_n_ngrams_match_1": 22.77, + "eval_bias-bios_n_ngrams_match_2": 10.378, + "eval_bias-bios_n_ngrams_match_3": 5.788, + "eval_bias-bios_num_pred_words": 46.242, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.890241929611657, + "eval_bias-bios_pred_num_tokens": 60.8984375, + "eval_bias-bios_rouge_score": 0.5304071182171766, + "eval_bias-bios_runtime": 8.3616, + "eval_bias-bios_samples_per_second": 59.797, + "eval_bias-bios_steps_per_second": 0.12, + "eval_bias-bios_token_set_f1": 0.5578896663828681, + "eval_bias-bios_token_set_f1_sem": 0.006577120420087825, + "eval_bias-bios_token_set_precision": 0.5450745752720823, + "eval_bias-bios_token_set_recall": 0.5807688151511985, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1395 + }, + { + "epoch": 6.19, + "learning_rate": 0.001, + "loss": 1.7614, + "step": 1404 + }, + { + "epoch": 6.24, + "learning_rate": 0.001, + "loss": 1.7616, + "step": 1416 + }, + { + "epoch": 6.28, + "eval_ag_news_accuracy": 0.30121875, + "eval_ag_news_bleu_score": 4.759509818524586, + "eval_ag_news_bleu_score_sem": 0.1544282222876058, + "eval_ag_news_emb_cos_sim": 0.8108397126197815, + "eval_ag_news_emb_cos_sim_sem": 0.004678499834491018, + "eval_ag_news_emb_top1_equal": 0.2540000081062317, + "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.532440185546875, + "eval_ag_news_n_ngrams_match_1": 13.188, + "eval_ag_news_n_ngrams_match_2": 2.724, + "eval_ag_news_n_ngrams_match_3": 0.712, + "eval_ag_news_num_pred_words": 39.954, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.20733810569427, + "eval_ag_news_pred_num_tokens": 60.5546875, + "eval_ag_news_rouge_score": 0.3631419565768096, + "eval_ag_news_runtime": 7.4938, + "eval_ag_news_samples_per_second": 66.722, + "eval_ag_news_steps_per_second": 0.133, + "eval_ag_news_token_set_f1": 0.3508523169951062, + "eval_ag_news_token_set_f1_sem": 0.004783773326326581, + "eval_ag_news_token_set_precision": 0.32298989144493473, + "eval_ag_news_token_set_recall": 0.4043442158097807, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1426 + }, + { + "epoch": 6.28, + "eval_anthropic_toxic_prompts_accuracy": 0.106375, + "eval_anthropic_toxic_prompts_bleu_score": 3.8482111778361916, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13929177545922003, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6884832978248596, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004413079303826577, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12800000607967377, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014955912783191019, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0690646171569824, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.074, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.9, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722, + "eval_anthropic_toxic_prompts_num_pred_words": 38.91, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 21.521762169844234, + "eval_anthropic_toxic_prompts_pred_num_tokens": 56.6875, + "eval_anthropic_toxic_prompts_rouge_score": 0.2507626807826677, + "eval_anthropic_toxic_prompts_runtime": 6.9858, + "eval_anthropic_toxic_prompts_samples_per_second": 71.574, + "eval_anthropic_toxic_prompts_steps_per_second": 0.143, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34025409496271963, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00581995043985978, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4401520663488164, + "eval_anthropic_toxic_prompts_token_set_recall": 0.29992367690094496, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1426 + }, + { + "epoch": 6.28, + "eval_arxiv_accuracy": 0.425625, + "eval_arxiv_bleu_score": 4.292847318437543, + "eval_arxiv_bleu_score_sem": 0.12571980529092805, + "eval_arxiv_emb_cos_sim": 0.7370375394821167, + "eval_arxiv_emb_cos_sim_sem": 0.005921472129746834, + "eval_arxiv_emb_top1_equal": 0.23800000548362732, + "eval_arxiv_emb_top1_equal_sem": 0.019064072684441876, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.01349139213562, + "eval_arxiv_n_ngrams_match_1": 14.782, + "eval_arxiv_n_ngrams_match_2": 2.812, + "eval_arxiv_n_ngrams_match_3": 0.638, + "eval_arxiv_num_pred_words": 36.926, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.35835498768817, + "eval_arxiv_pred_num_tokens": 61.0078125, + "eval_arxiv_rouge_score": 0.3582139659097523, + "eval_arxiv_runtime": 7.4003, + "eval_arxiv_samples_per_second": 67.565, + "eval_arxiv_steps_per_second": 0.135, + "eval_arxiv_token_set_f1": 0.3634346970856803, + "eval_arxiv_token_set_f1_sem": 0.004765512360675474, + "eval_arxiv_token_set_precision": 0.31038220900070124, + "eval_arxiv_token_set_recall": 0.4631410541105638, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1426 + }, + { + "epoch": 6.28, + "eval_python_code_alpaca_accuracy": 0.15415625, + "eval_python_code_alpaca_bleu_score": 5.5198433610264175, + "eval_python_code_alpaca_bleu_score_sem": 0.17136673825122412, + "eval_python_code_alpaca_emb_cos_sim": 0.765462338924408, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004074766921057206, + "eval_python_code_alpaca_emb_top1_equal": 0.18199999630451202, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017272772986938162, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.6881864070892334, + "eval_python_code_alpaca_n_ngrams_match_1": 9.646, + "eval_python_code_alpaca_n_ngrams_match_2": 2.738, + "eval_python_code_alpaca_n_ngrams_match_3": 0.916, + "eval_python_code_alpaca_num_pred_words": 35.164, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.704982869909538, + "eval_python_code_alpaca_pred_num_tokens": 55.21875, + "eval_python_code_alpaca_rouge_score": 0.4021857778620161, + "eval_python_code_alpaca_runtime": 7.1266, + "eval_python_code_alpaca_samples_per_second": 70.16, + "eval_python_code_alpaca_steps_per_second": 0.14, + "eval_python_code_alpaca_token_set_f1": 0.4768254189100492, + "eval_python_code_alpaca_token_set_f1_sem": 0.00559437213796573, + "eval_python_code_alpaca_token_set_precision": 0.5371631900196111, + "eval_python_code_alpaca_token_set_recall": 0.4464693605196623, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1426 + }, + { + "epoch": 6.28, + "eval_wikibio_accuracy": 0.37003125, + "eval_wikibio_bleu_score": 5.059556992142152, + "eval_wikibio_bleu_score_sem": 0.21789251518816838, + "eval_wikibio_emb_cos_sim": 0.7171035408973694, + "eval_wikibio_emb_cos_sim_sem": 0.006326739405351032, + "eval_wikibio_emb_top1_equal": 0.15000000596046448, + "eval_wikibio_emb_top1_equal_sem": 0.015984712054998636, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.315477132797241, + "eval_wikibio_n_ngrams_match_1": 8.372, + "eval_wikibio_n_ngrams_match_2": 2.636, + "eval_wikibio_n_ngrams_match_3": 0.996, + "eval_wikibio_num_pred_words": 31.736, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.535528954710415, + "eval_wikibio_pred_num_tokens": 62.8125, + "eval_wikibio_rouge_score": 0.30072556489442737, + "eval_wikibio_runtime": 7.042, + "eval_wikibio_samples_per_second": 71.002, + "eval_wikibio_steps_per_second": 0.142, + "eval_wikibio_token_set_f1": 0.2749744801258639, + "eval_wikibio_token_set_f1_sem": 0.006671092272697617, + "eval_wikibio_token_set_precision": 0.2685623083750441, + "eval_wikibio_token_set_recall": 0.310867379805107, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1426 + }, + { + "epoch": 6.28, + "eval_bias-bios_accuracy": 0.51678125, + "eval_bias-bios_bleu_score": 18.45761988385277, + "eval_bias-bios_bleu_score_sem": 0.7958526816092888, + "eval_bias-bios_emb_cos_sim": 0.8778645992279053, + "eval_bias-bios_emb_cos_sim_sem": 0.0028580947755997856, + "eval_bias-bios_emb_top1_equal": 0.31200000643730164, + "eval_bias-bios_emb_top1_equal_sem": 0.020740595612058172, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7571202516555786, + "eval_bias-bios_n_ngrams_match_1": 22.41, + "eval_bias-bios_n_ngrams_match_2": 10.152, + "eval_bias-bios_n_ngrams_match_3": 5.648, + "eval_bias-bios_num_pred_words": 44.464, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.795723115448467, + "eval_bias-bios_pred_num_tokens": 59.8359375, + "eval_bias-bios_rouge_score": 0.5313359871031016, + "eval_bias-bios_runtime": 8.4445, + "eval_bias-bios_samples_per_second": 59.21, + "eval_bias-bios_steps_per_second": 0.118, + "eval_bias-bios_token_set_f1": 0.557497262539606, + "eval_bias-bios_token_set_f1_sem": 0.006626022613662489, + "eval_bias-bios_token_set_precision": 0.5390052520117803, + "eval_bias-bios_token_set_recall": 0.5882333649734502, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1426 + }, + { + "epoch": 6.29, + "learning_rate": 0.001, + "loss": 2.0052, + "step": 1428 + }, + { + "epoch": 6.34, + "learning_rate": 0.001, + "loss": 1.9724, + "step": 1440 + }, + { + "epoch": 6.4, + "learning_rate": 0.001, + "loss": 1.8063, + "step": 1452 + }, + { + "epoch": 6.42, + "eval_ag_news_accuracy": 0.3015625, + "eval_ag_news_bleu_score": 4.4805017796612825, + "eval_ag_news_bleu_score_sem": 0.1517655061779348, + "eval_ag_news_emb_cos_sim": 0.8106436729431152, + "eval_ag_news_emb_cos_sim_sem": 0.00428181512690115, + "eval_ag_news_emb_top1_equal": 0.26600000262260437, + "eval_ag_news_emb_top1_equal_sem": 0.01978055817719369, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5997157096862793, + "eval_ag_news_n_ngrams_match_1": 12.064, + "eval_ag_news_n_ngrams_match_2": 2.416, + "eval_ag_news_n_ngrams_match_3": 0.612, + "eval_ag_news_num_pred_words": 32.786, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.58783139893886, + "eval_ag_news_pred_num_tokens": 50.3671875, + "eval_ag_news_rouge_score": 0.3644739897183449, + "eval_ag_news_runtime": 7.339, + "eval_ag_news_samples_per_second": 68.129, + "eval_ag_news_steps_per_second": 0.136, + "eval_ag_news_token_set_f1": 0.34682066457855015, + "eval_ag_news_token_set_f1_sem": 0.004680649556318142, + "eval_ag_news_token_set_precision": 0.3066558121768779, + "eval_ag_news_token_set_recall": 0.4171577397900149, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1457 + }, + { + "epoch": 6.42, + "eval_anthropic_toxic_prompts_accuracy": 0.10828125, + "eval_anthropic_toxic_prompts_bleu_score": 4.917360176354823, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.17999685495020398, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6960086822509766, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004688394211274368, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.000969171524048, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.748, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.674, + "eval_anthropic_toxic_prompts_num_pred_words": 29.014, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.10501268977357, + "eval_anthropic_toxic_prompts_pred_num_tokens": 42.0, + "eval_anthropic_toxic_prompts_rouge_score": 0.29196549015552187, + "eval_anthropic_toxic_prompts_runtime": 7.0438, + "eval_anthropic_toxic_prompts_samples_per_second": 70.984, + "eval_anthropic_toxic_prompts_steps_per_second": 0.142, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34287485631247483, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005876938571321538, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4244424668846003, + "eval_anthropic_toxic_prompts_token_set_recall": 0.31100293355640496, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1457 + }, + { + "epoch": 6.42, + "eval_arxiv_accuracy": 0.41771875, + "eval_arxiv_bleu_score": 3.934917926586904, + "eval_arxiv_bleu_score_sem": 0.10808345887239815, + "eval_arxiv_emb_cos_sim": 0.7565560936927795, + "eval_arxiv_emb_cos_sim_sem": 0.0043263203302207395, + "eval_arxiv_emb_top1_equal": 0.20200000703334808, + "eval_arxiv_emb_top1_equal_sem": 0.017973259543989376, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.091585874557495, + "eval_arxiv_n_ngrams_match_1": 14.304, + "eval_arxiv_n_ngrams_match_2": 2.642, + "eval_arxiv_n_ngrams_match_3": 0.566, + "eval_arxiv_num_pred_words": 32.542, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.011958515343032, + "eval_arxiv_pred_num_tokens": 52.9609375, + "eval_arxiv_rouge_score": 0.3719034083947236, + "eval_arxiv_runtime": 7.3761, + "eval_arxiv_samples_per_second": 67.787, + "eval_arxiv_steps_per_second": 0.136, + "eval_arxiv_token_set_f1": 0.3687926222207479, + "eval_arxiv_token_set_f1_sem": 0.004272372260008827, + "eval_arxiv_token_set_precision": 0.3122666822215735, + "eval_arxiv_token_set_recall": 0.4644894310912833, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1457 + }, + { + "epoch": 6.42, + "eval_python_code_alpaca_accuracy": 0.15671875, + "eval_python_code_alpaca_bleu_score": 6.599258142542607, + "eval_python_code_alpaca_bleu_score_sem": 0.20786536106594386, + "eval_python_code_alpaca_emb_cos_sim": 0.7821627855300903, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037482584848083575, + "eval_python_code_alpaca_emb_top1_equal": 0.19599999487400055, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017770749856622144, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.6141581535339355, + "eval_python_code_alpaca_n_ngrams_match_1": 9.334, + "eval_python_code_alpaca_n_ngrams_match_2": 2.564, + "eval_python_code_alpaca_n_ngrams_match_3": 0.826, + "eval_python_code_alpaca_num_pred_words": 27.932, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.655715521741897, + "eval_python_code_alpaca_pred_num_tokens": 44.6328125, + "eval_python_code_alpaca_rouge_score": 0.4446707271822361, + "eval_python_code_alpaca_runtime": 7.005, + "eval_python_code_alpaca_samples_per_second": 71.378, + "eval_python_code_alpaca_steps_per_second": 0.143, + "eval_python_code_alpaca_token_set_f1": 0.48588868276907005, + "eval_python_code_alpaca_token_set_f1_sem": 0.005428207810769336, + "eval_python_code_alpaca_token_set_precision": 0.5299259184573405, + "eval_python_code_alpaca_token_set_recall": 0.4649175014085176, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1457 + }, + { + "epoch": 6.42, + "eval_wikibio_accuracy": 0.35565625, + "eval_wikibio_bleu_score": 5.7748851156901, + "eval_wikibio_bleu_score_sem": 0.23454135758726272, + "eval_wikibio_emb_cos_sim": 0.7541660666465759, + "eval_wikibio_emb_cos_sim_sem": 0.004788653453077914, + "eval_wikibio_emb_top1_equal": 0.16200000047683716, + "eval_wikibio_emb_top1_equal_sem": 0.016494123019099097, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.462639093399048, + "eval_wikibio_n_ngrams_match_1": 9.024, + "eval_wikibio_n_ngrams_match_2": 2.756, + "eval_wikibio_n_ngrams_match_3": 1.002, + "eval_wikibio_num_pred_words": 30.664, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 31.901055384561012, + "eval_wikibio_pred_num_tokens": 56.9609375, + "eval_wikibio_rouge_score": 0.3481066238213619, + "eval_wikibio_runtime": 8.6821, + "eval_wikibio_samples_per_second": 57.589, + "eval_wikibio_steps_per_second": 0.115, + "eval_wikibio_token_set_f1": 0.3042383637835181, + "eval_wikibio_token_set_f1_sem": 0.005621777232315361, + "eval_wikibio_token_set_precision": 0.29629385842357076, + "eval_wikibio_token_set_recall": 0.32962238408366523, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1457 + }, + { + "epoch": 6.42, + "eval_bias-bios_accuracy": 0.51865625, + "eval_bias-bios_bleu_score": 19.25680543440157, + "eval_bias-bios_bleu_score_sem": 0.8417259010557914, + "eval_bias-bios_emb_cos_sim": 0.8749097585678101, + "eval_bias-bios_emb_cos_sim_sem": 0.003254033114979696, + "eval_bias-bios_emb_top1_equal": 0.3319999873638153, + "eval_bias-bios_emb_top1_equal_sem": 0.02108176585203148, + "eval_bias-bios_exact_match": 0.004, + "eval_bias-bios_exact_match_sem": 0.002825591608118863, + "eval_bias-bios_loss": 1.7528284788131714, + "eval_bias-bios_n_ngrams_match_1": 21.16, + "eval_bias-bios_n_ngrams_match_2": 9.692, + "eval_bias-bios_n_ngrams_match_3": 5.494, + "eval_bias-bios_num_pred_words": 37.402, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.77090248872356, + "eval_bias-bios_pred_num_tokens": 52.0, + "eval_bias-bios_rouge_score": 0.5478462449556378, + "eval_bias-bios_runtime": 7.3773, + "eval_bias-bios_samples_per_second": 67.775, + "eval_bias-bios_steps_per_second": 0.136, + "eval_bias-bios_token_set_f1": 0.5589008692099815, + "eval_bias-bios_token_set_f1_sem": 0.006875126001028403, + "eval_bias-bios_token_set_precision": 0.5246909824127267, + "eval_bias-bios_token_set_recall": 0.609506893287974, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1457 + }, + { + "epoch": 6.45, + "learning_rate": 0.001, + "loss": 1.695, + "step": 1464 + }, + { + "epoch": 6.5, + "learning_rate": 0.001, + "loss": 2.0224, + "step": 1476 + }, + { + "epoch": 6.56, + "learning_rate": 0.001, + "loss": 1.9642, + "step": 1488 + }, + { + "epoch": 6.56, + "eval_ag_news_accuracy": 0.2999375, + "eval_ag_news_bleu_score": 4.732659847556908, + "eval_ag_news_bleu_score_sem": 0.15382722884594552, + "eval_ag_news_emb_cos_sim": 0.8149835467338562, + "eval_ag_news_emb_cos_sim_sem": 0.004877806802557305, + "eval_ag_news_emb_top1_equal": 0.28600001335144043, + "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.57092547416687, + "eval_ag_news_n_ngrams_match_1": 13.68, + "eval_ag_news_n_ngrams_match_2": 2.836, + "eval_ag_news_n_ngrams_match_3": 0.766, + "eval_ag_news_num_pred_words": 42.318, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.54947805580302, + "eval_ag_news_pred_num_tokens": 62.8828125, + "eval_ag_news_rouge_score": 0.3603819369485558, + "eval_ag_news_runtime": 7.7275, + "eval_ag_news_samples_per_second": 64.704, + "eval_ag_news_steps_per_second": 0.129, + "eval_ag_news_token_set_f1": 0.3534624801060994, + "eval_ag_news_token_set_f1_sem": 0.004661502713234828, + "eval_ag_news_token_set_precision": 0.3299273403261311, + "eval_ag_news_token_set_recall": 0.4007806565988763, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1488 + }, + { + "epoch": 6.56, + "eval_anthropic_toxic_prompts_accuracy": 0.10621875, + "eval_anthropic_toxic_prompts_bleu_score": 3.5245039471587516, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12692739102441497, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6925877928733826, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0043533211998162515, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1379999965429306, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015439843269723665, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.17211651802063, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.368, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.754, + "eval_anthropic_toxic_prompts_num_pred_words": 43.272, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 23.857926688316812, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.5625, + "eval_anthropic_toxic_prompts_rouge_score": 0.23856933493684165, + "eval_anthropic_toxic_prompts_runtime": 7.0117, + "eval_anthropic_toxic_prompts_samples_per_second": 71.31, + "eval_anthropic_toxic_prompts_steps_per_second": 0.143, + "eval_anthropic_toxic_prompts_token_set_f1": 0.33716291320329317, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005488927012354413, + "eval_anthropic_toxic_prompts_token_set_precision": 0.46357587025721775, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2879152966800727, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1488 + }, + { + "epoch": 6.56, + "eval_arxiv_accuracy": 0.4288125, + "eval_arxiv_bleu_score": 4.661399367438937, + "eval_arxiv_bleu_score_sem": 0.12783894900256684, + "eval_arxiv_emb_cos_sim": 0.7645853161811829, + "eval_arxiv_emb_cos_sim_sem": 0.004385083831951277, + "eval_arxiv_emb_top1_equal": 0.2980000078678131, + "eval_arxiv_emb_top1_equal_sem": 0.020475119103777986, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.00892972946167, + "eval_arxiv_n_ngrams_match_1": 15.502, + "eval_arxiv_n_ngrams_match_2": 3.086, + "eval_arxiv_n_ngrams_match_3": 0.706, + "eval_arxiv_num_pred_words": 39.224, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.26569853405298, + "eval_arxiv_pred_num_tokens": 62.984375, + "eval_arxiv_rouge_score": 0.3712170892610218, + "eval_arxiv_runtime": 7.5208, + "eval_arxiv_samples_per_second": 66.482, + "eval_arxiv_steps_per_second": 0.133, + "eval_arxiv_token_set_f1": 0.3734715294639753, + "eval_arxiv_token_set_f1_sem": 0.004153338418403119, + "eval_arxiv_token_set_precision": 0.32291839735695105, + "eval_arxiv_token_set_recall": 0.4580518879073966, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1488 + }, + { + "epoch": 6.56, + "eval_python_code_alpaca_accuracy": 0.15253125, + "eval_python_code_alpaca_bleu_score": 5.117748099309339, + "eval_python_code_alpaca_bleu_score_sem": 0.16347555365801147, + "eval_python_code_alpaca_emb_cos_sim": 0.769605278968811, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036531835032631754, + "eval_python_code_alpaca_emb_top1_equal": 0.17800000309944153, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01712362329538143, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.8052680492401123, + "eval_python_code_alpaca_n_ngrams_match_1": 9.946, + "eval_python_code_alpaca_n_ngrams_match_2": 2.932, + "eval_python_code_alpaca_n_ngrams_match_3": 0.966, + "eval_python_code_alpaca_num_pred_words": 39.292, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 16.531506569992075, + "eval_python_code_alpaca_pred_num_tokens": 62.65625, + "eval_python_code_alpaca_rouge_score": 0.3836086668387766, + "eval_python_code_alpaca_runtime": 7.2389, + "eval_python_code_alpaca_samples_per_second": 69.071, + "eval_python_code_alpaca_steps_per_second": 0.138, + "eval_python_code_alpaca_token_set_f1": 0.46205300074061845, + "eval_python_code_alpaca_token_set_f1_sem": 0.005207518408869205, + "eval_python_code_alpaca_token_set_precision": 0.5537377665363031, + "eval_python_code_alpaca_token_set_recall": 0.41235007315082095, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1488 + }, + { + "epoch": 6.56, + "eval_wikibio_accuracy": 0.372, + "eval_wikibio_bleu_score": 5.16626984542665, + "eval_wikibio_bleu_score_sem": 0.2033688183142454, + "eval_wikibio_emb_cos_sim": 0.7348429560661316, + "eval_wikibio_emb_cos_sim_sem": 0.005632250449986981, + "eval_wikibio_emb_top1_equal": 0.1720000058412552, + "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3192598819732666, + "eval_wikibio_n_ngrams_match_1": 8.746, + "eval_wikibio_n_ngrams_match_2": 2.706, + "eval_wikibio_n_ngrams_match_3": 0.988, + "eval_wikibio_num_pred_words": 32.386, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.639886208393996, + "eval_wikibio_pred_num_tokens": 62.96875, + "eval_wikibio_rouge_score": 0.31316939003335964, + "eval_wikibio_runtime": 8.1102, + "eval_wikibio_samples_per_second": 61.651, + "eval_wikibio_steps_per_second": 0.123, + "eval_wikibio_token_set_f1": 0.2861699744726475, + "eval_wikibio_token_set_f1_sem": 0.00638652547434589, + "eval_wikibio_token_set_precision": 0.2807087931884633, + "eval_wikibio_token_set_recall": 0.31595880847210117, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1488 + }, + { + "epoch": 6.56, + "eval_bias-bios_accuracy": 0.51503125, + "eval_bias-bios_bleu_score": 17.79482156618152, + "eval_bias-bios_bleu_score_sem": 0.7485445091910192, + "eval_bias-bios_emb_cos_sim": 0.8782715797424316, + "eval_bias-bios_emb_cos_sim_sem": 0.003119295993477308, + "eval_bias-bios_emb_top1_equal": 0.33399999141693115, + "eval_bias-bios_emb_top1_equal_sem": 0.021113493164956566, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.779577374458313, + "eval_bias-bios_n_ngrams_match_1": 22.808, + "eval_bias-bios_n_ngrams_match_2": 10.398, + "eval_bias-bios_n_ngrams_match_3": 5.816, + "eval_bias-bios_num_pred_words": 46.766, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.927350839308307, + "eval_bias-bios_pred_num_tokens": 62.484375, + "eval_bias-bios_rouge_score": 0.5222122418814229, + "eval_bias-bios_runtime": 7.5869, + "eval_bias-bios_samples_per_second": 65.903, + "eval_bias-bios_steps_per_second": 0.132, + "eval_bias-bios_token_set_f1": 0.5567971157863137, + "eval_bias-bios_token_set_f1_sem": 0.006579296670950696, + "eval_bias-bios_token_set_precision": 0.5482021158150124, + "eval_bias-bios_token_set_recall": 0.5790242704328651, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1488 + }, + { + "epoch": 6.61, + "learning_rate": 0.001, + "loss": 1.8493, + "step": 1500 + }, + { + "epoch": 6.66, + "learning_rate": 0.001, + "loss": 1.6162, + "step": 1512 + }, + { + "epoch": 6.69, + "eval_ag_news_accuracy": 0.3058125, + "eval_ag_news_bleu_score": 4.7463020709686194, + "eval_ag_news_bleu_score_sem": 0.1605780732079416, + "eval_ag_news_emb_cos_sim": 0.8073976039886475, + "eval_ag_news_emb_cos_sim_sem": 0.005124277910691155, + "eval_ag_news_emb_top1_equal": 0.2800000011920929, + "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.532249689102173, + "eval_ag_news_n_ngrams_match_1": 12.448, + "eval_ag_news_n_ngrams_match_2": 2.55, + "eval_ag_news_n_ngrams_match_3": 0.66, + "eval_ag_news_num_pred_words": 34.98, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.200822350036255, + "eval_ag_news_pred_num_tokens": 53.34375, + "eval_ag_news_rouge_score": 0.3641022807574177, + "eval_ag_news_runtime": 11.3731, + "eval_ag_news_samples_per_second": 43.963, + "eval_ag_news_steps_per_second": 0.088, + "eval_ag_news_token_set_f1": 0.3476928951651583, + "eval_ag_news_token_set_f1_sem": 0.004917711975399316, + "eval_ag_news_token_set_precision": 0.3099224145207519, + "eval_ag_news_token_set_recall": 0.4156977928068492, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1519 + }, + { + "epoch": 6.69, + "eval_anthropic_toxic_prompts_accuracy": 0.10853125, + "eval_anthropic_toxic_prompts_bleu_score": 5.128832221923525, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.19070154322972455, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7086655497550964, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00440713668015488, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17000000178813934, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016815633120741882, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0389885902404785, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.902, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.846, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, + "eval_anthropic_toxic_prompts_num_pred_words": 28.914, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.884110156961853, + "eval_anthropic_toxic_prompts_pred_num_tokens": 42.640625, + "eval_anthropic_toxic_prompts_rouge_score": 0.2993498581809596, + "eval_anthropic_toxic_prompts_runtime": 7.1117, + "eval_anthropic_toxic_prompts_samples_per_second": 70.306, + "eval_anthropic_toxic_prompts_steps_per_second": 0.141, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35337104182115847, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005880778721392854, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43707066740847084, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3173444768330494, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1519 + }, + { + "epoch": 6.69, + "eval_arxiv_accuracy": 0.42440625, + "eval_arxiv_bleu_score": 4.118168026943256, + "eval_arxiv_bleu_score_sem": 0.1250072563230236, + "eval_arxiv_emb_cos_sim": 0.742168664932251, + "eval_arxiv_emb_cos_sim_sem": 0.0065224204899973345, + "eval_arxiv_emb_top1_equal": 0.24799999594688416, + "eval_arxiv_emb_top1_equal_sem": 0.01933234274230791, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.022693395614624, + "eval_arxiv_n_ngrams_match_1": 14.448, + "eval_arxiv_n_ngrams_match_2": 2.698, + "eval_arxiv_n_ngrams_match_3": 0.616, + "eval_arxiv_num_pred_words": 33.828, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.546557231935324, + "eval_arxiv_pred_num_tokens": 57.03125, + "eval_arxiv_rouge_score": 0.362829673463994, + "eval_arxiv_runtime": 7.3621, + "eval_arxiv_samples_per_second": 67.915, + "eval_arxiv_steps_per_second": 0.136, + "eval_arxiv_token_set_f1": 0.3622128562665494, + "eval_arxiv_token_set_f1_sem": 0.00485702315818566, + "eval_arxiv_token_set_precision": 0.3067772119241535, + "eval_arxiv_token_set_recall": 0.45555796091217454, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1519 + }, + { + "epoch": 6.69, + "eval_python_code_alpaca_accuracy": 0.156625, + "eval_python_code_alpaca_bleu_score": 7.257567396442399, + "eval_python_code_alpaca_bleu_score_sem": 0.23629107675727168, + "eval_python_code_alpaca_emb_cos_sim": 0.7840350866317749, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003614340702336455, + "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.5811822414398193, + "eval_python_code_alpaca_n_ngrams_match_1": 9.49, + "eval_python_code_alpaca_n_ngrams_match_2": 2.69, + "eval_python_code_alpaca_n_ngrams_match_3": 0.898, + "eval_python_code_alpaca_num_pred_words": 27.904, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.212749589699158, + "eval_python_code_alpaca_pred_num_tokens": 44.4765625, + "eval_python_code_alpaca_rouge_score": 0.45019775968067777, + "eval_python_code_alpaca_runtime": 7.081, + "eval_python_code_alpaca_samples_per_second": 70.611, + "eval_python_code_alpaca_steps_per_second": 0.141, + "eval_python_code_alpaca_token_set_f1": 0.4959035467348254, + "eval_python_code_alpaca_token_set_f1_sem": 0.0055782694432414326, + "eval_python_code_alpaca_token_set_precision": 0.5380365728375572, + "eval_python_code_alpaca_token_set_recall": 0.47629206841719984, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1519 + }, + { + "epoch": 6.69, + "eval_wikibio_accuracy": 0.370375, + "eval_wikibio_bleu_score": 5.341609284477066, + "eval_wikibio_bleu_score_sem": 0.2066184654207597, + "eval_wikibio_emb_cos_sim": 0.7368956804275513, + "eval_wikibio_emb_cos_sim_sem": 0.00575103694980236, + "eval_wikibio_emb_top1_equal": 0.15800000727176666, + "eval_wikibio_emb_top1_equal_sem": 0.01632805076118194, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.31675124168396, + "eval_wikibio_n_ngrams_match_1": 8.94, + "eval_wikibio_n_ngrams_match_2": 2.756, + "eval_wikibio_n_ngrams_match_3": 0.99, + "eval_wikibio_num_pred_words": 32.058, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.570634576295262, + "eval_wikibio_pred_num_tokens": 60.6875, + "eval_wikibio_rouge_score": 0.32612518061916174, + "eval_wikibio_runtime": 8.7268, + "eval_wikibio_samples_per_second": 57.294, + "eval_wikibio_steps_per_second": 0.115, + "eval_wikibio_token_set_f1": 0.2963580125635239, + "eval_wikibio_token_set_f1_sem": 0.00629818151845586, + "eval_wikibio_token_set_precision": 0.2896516821409088, + "eval_wikibio_token_set_recall": 0.32416889589872927, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1519 + }, + { + "epoch": 6.69, + "eval_bias-bios_accuracy": 0.51909375, + "eval_bias-bios_bleu_score": 19.268902172980635, + "eval_bias-bios_bleu_score_sem": 0.857516824635127, + "eval_bias-bios_emb_cos_sim": 0.8769698143005371, + "eval_bias-bios_emb_cos_sim_sem": 0.00319190762430341, + "eval_bias-bios_emb_top1_equal": 0.36000001430511475, + "eval_bias-bios_emb_top1_equal_sem": 0.021487752839838135, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7438139915466309, + "eval_bias-bios_n_ngrams_match_1": 21.502, + "eval_bias-bios_n_ngrams_match_2": 9.814, + "eval_bias-bios_n_ngrams_match_3": 5.53, + "eval_bias-bios_num_pred_words": 38.682, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.719114533349184, + "eval_bias-bios_pred_num_tokens": 52.0078125, + "eval_bias-bios_rouge_score": 0.5474474528418282, + "eval_bias-bios_runtime": 7.4191, + "eval_bias-bios_samples_per_second": 67.394, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.5632959401204308, + "eval_bias-bios_token_set_f1_sem": 0.0068805471549051025, + "eval_bias-bios_token_set_precision": 0.5276031872114068, + "eval_bias-bios_token_set_recall": 0.6158242049632531, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1519 + }, + { + "epoch": 6.71, + "learning_rate": 0.001, + "loss": 2.0335, + "step": 1524 + }, + { + "epoch": 6.77, + "learning_rate": 0.001, + "loss": 1.9593, + "step": 1536 + }, + { + "epoch": 6.82, + "learning_rate": 0.001, + "loss": 1.8879, + "step": 1548 + }, + { + "epoch": 6.83, + "eval_ag_news_accuracy": 0.30028125, + "eval_ag_news_bleu_score": 4.795721070948663, + "eval_ag_news_bleu_score_sem": 0.15974204528730274, + "eval_ag_news_emb_cos_sim": 0.8041967749595642, + "eval_ag_news_emb_cos_sim_sem": 0.0050863287524158226, + "eval_ag_news_emb_top1_equal": 0.27000001072883606, + "eval_ag_news_emb_top1_equal_sem": 0.019874356669179787, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5922787189483643, + "eval_ag_news_n_ngrams_match_1": 12.668, + "eval_ag_news_n_ngrams_match_2": 2.672, + "eval_ag_news_n_ngrams_match_3": 0.782, + "eval_ag_news_num_pred_words": 36.772, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.31673734717072, + "eval_ag_news_pred_num_tokens": 55.5078125, + "eval_ag_news_rouge_score": 0.3619051833548086, + "eval_ag_news_runtime": 7.442, + "eval_ag_news_samples_per_second": 67.187, + "eval_ag_news_steps_per_second": 0.134, + "eval_ag_news_token_set_f1": 0.3482497468589517, + "eval_ag_news_token_set_f1_sem": 0.005093154354263263, + "eval_ag_news_token_set_precision": 0.31438605603788017, + "eval_ag_news_token_set_recall": 0.4080925892825803, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1550 + }, + { + "epoch": 6.83, + "eval_anthropic_toxic_prompts_accuracy": 0.10615625, + "eval_anthropic_toxic_prompts_bleu_score": 4.083847585562941, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.15555929967558885, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6819911599159241, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004785336112944726, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.10999999940395355, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.014006868285818916, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0443711280822754, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.884, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.74, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.646, + "eval_anthropic_toxic_prompts_num_pred_words": 34.422, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.996822737917157, + "eval_anthropic_toxic_prompts_pred_num_tokens": 49.984375, + "eval_anthropic_toxic_prompts_rouge_score": 0.2612879461490797, + "eval_anthropic_toxic_prompts_runtime": 7.8422, + "eval_anthropic_toxic_prompts_samples_per_second": 63.757, + "eval_anthropic_toxic_prompts_steps_per_second": 0.128, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3376565967945119, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005536374664466343, + "eval_anthropic_toxic_prompts_token_set_precision": 0.42873340074689775, + "eval_anthropic_toxic_prompts_token_set_recall": 0.30131964308814996, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1550 + }, + { + "epoch": 6.83, + "eval_arxiv_accuracy": 0.4220625, + "eval_arxiv_bleu_score": 4.31068497115973, + "eval_arxiv_bleu_score_sem": 0.1182485148643836, + "eval_arxiv_emb_cos_sim": 0.751672625541687, + "eval_arxiv_emb_cos_sim_sem": 0.004438314879325501, + "eval_arxiv_emb_top1_equal": 0.23600000143051147, + "eval_arxiv_emb_top1_equal_sem": 0.019008700160065242, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0553781986236572, + "eval_arxiv_n_ngrams_match_1": 14.768, + "eval_arxiv_n_ngrams_match_2": 2.834, + "eval_arxiv_n_ngrams_match_3": 0.618, + "eval_arxiv_num_pred_words": 35.84, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.22921286793967, + "eval_arxiv_pred_num_tokens": 57.1015625, + "eval_arxiv_rouge_score": 0.3674967211275739, + "eval_arxiv_runtime": 7.4582, + "eval_arxiv_samples_per_second": 67.04, + "eval_arxiv_steps_per_second": 0.134, + "eval_arxiv_token_set_f1": 0.36944767629366165, + "eval_arxiv_token_set_f1_sem": 0.004118295290691409, + "eval_arxiv_token_set_precision": 0.3121757890742581, + "eval_arxiv_token_set_recall": 0.4675134012606021, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1550 + }, + { + "epoch": 6.83, + "eval_python_code_alpaca_accuracy": 0.15375, + "eval_python_code_alpaca_bleu_score": 6.087948865482973, + "eval_python_code_alpaca_bleu_score_sem": 0.20284576134331592, + "eval_python_code_alpaca_emb_cos_sim": 0.7750284671783447, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003545411265746177, + "eval_python_code_alpaca_emb_top1_equal": 0.17399999499320984, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.016971270884523753, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.67142915725708, + "eval_python_code_alpaca_n_ngrams_match_1": 9.704, + "eval_python_code_alpaca_n_ngrams_match_2": 2.756, + "eval_python_code_alpaca_n_ngrams_match_3": 0.908, + "eval_python_code_alpaca_num_pred_words": 32.86, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.46062093334414, + "eval_python_code_alpaca_pred_num_tokens": 51.171875, + "eval_python_code_alpaca_rouge_score": 0.41505979945750626, + "eval_python_code_alpaca_runtime": 7.0339, + "eval_python_code_alpaca_samples_per_second": 71.085, + "eval_python_code_alpaca_steps_per_second": 0.142, + "eval_python_code_alpaca_token_set_f1": 0.4861361656943412, + "eval_python_code_alpaca_token_set_f1_sem": 0.00544849215466565, + "eval_python_code_alpaca_token_set_precision": 0.546222964812534, + "eval_python_code_alpaca_token_set_recall": 0.4546387188311011, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1550 + }, + { + "epoch": 6.83, + "eval_wikibio_accuracy": 0.3648125, + "eval_wikibio_bleu_score": 5.078593240932916, + "eval_wikibio_bleu_score_sem": 0.22592161566311428, + "eval_wikibio_emb_cos_sim": 0.7141668796539307, + "eval_wikibio_emb_cos_sim_sem": 0.0065623571868317744, + "eval_wikibio_emb_top1_equal": 0.1860000044107437, + "eval_wikibio_emb_top1_equal_sem": 0.017418806591218323, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3612303733825684, + "eval_wikibio_n_ngrams_match_1": 8.014, + "eval_wikibio_n_ngrams_match_2": 2.408, + "eval_wikibio_n_ngrams_match_3": 0.934, + "eval_wikibio_num_pred_words": 29.244, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.824634133153058, + "eval_wikibio_pred_num_tokens": 60.46875, + "eval_wikibio_rouge_score": 0.30091374134289106, + "eval_wikibio_runtime": 7.2028, + "eval_wikibio_samples_per_second": 69.418, + "eval_wikibio_steps_per_second": 0.139, + "eval_wikibio_token_set_f1": 0.27374934123816314, + "eval_wikibio_token_set_f1_sem": 0.006816941537408769, + "eval_wikibio_token_set_precision": 0.2599069063291573, + "eval_wikibio_token_set_recall": 0.3172531394934058, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1550 + }, + { + "epoch": 6.83, + "eval_bias-bios_accuracy": 0.51903125, + "eval_bias-bios_bleu_score": 18.776008983057356, + "eval_bias-bios_bleu_score_sem": 0.8303969478486409, + "eval_bias-bios_emb_cos_sim": 0.8751217126846313, + "eval_bias-bios_emb_cos_sim_sem": 0.0029728119026704246, + "eval_bias-bios_emb_top1_equal": 0.32600000500679016, + "eval_bias-bios_emb_top1_equal_sem": 0.020984011608532603, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.747039794921875, + "eval_bias-bios_n_ngrams_match_1": 21.814, + "eval_bias-bios_n_ngrams_match_2": 9.83, + "eval_bias-bios_n_ngrams_match_3": 5.526, + "eval_bias-bios_num_pred_words": 40.97, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.7375930603379715, + "eval_bias-bios_pred_num_tokens": 55.421875, + "eval_bias-bios_rouge_score": 0.5384821581566905, + "eval_bias-bios_runtime": 7.4374, + "eval_bias-bios_samples_per_second": 67.228, + "eval_bias-bios_steps_per_second": 0.134, + "eval_bias-bios_token_set_f1": 0.557628993881636, + "eval_bias-bios_token_set_f1_sem": 0.006844604271877559, + "eval_bias-bios_token_set_precision": 0.530631198064721, + "eval_bias-bios_token_set_recall": 0.5978989905704735, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1550 + }, + { + "epoch": 6.87, + "learning_rate": 0.001, + "loss": 1.6451, + "step": 1560 + }, + { + "epoch": 6.93, + "learning_rate": 0.001, + "loss": 1.9438, + "step": 1572 + }, + { + "epoch": 6.96, + "eval_ag_news_accuracy": 0.29990625, + "eval_ag_news_bleu_score": 4.615222668755698, + "eval_ag_news_bleu_score_sem": 0.15290728039133886, + "eval_ag_news_emb_cos_sim": 0.8042817115783691, + "eval_ag_news_emb_cos_sim_sem": 0.0051403454856110645, + "eval_ag_news_emb_top1_equal": 0.27799999713897705, + "eval_ag_news_emb_top1_equal_sem": 0.0200558347666307, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5743329524993896, + "eval_ag_news_n_ngrams_match_1": 12.996, + "eval_ag_news_n_ngrams_match_2": 2.748, + "eval_ag_news_n_ngrams_match_3": 0.726, + "eval_ag_news_num_pred_words": 40.506, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.67081874749316, + "eval_ag_news_pred_num_tokens": 61.625, + "eval_ag_news_rouge_score": 0.3565626567209188, + "eval_ag_news_runtime": 7.9203, + "eval_ag_news_samples_per_second": 63.129, + "eval_ag_news_steps_per_second": 0.126, + "eval_ag_news_token_set_f1": 0.34549790329751373, + "eval_ag_news_token_set_f1_sem": 0.004972836903833072, + "eval_ag_news_token_set_precision": 0.3158240685548052, + "eval_ag_news_token_set_recall": 0.4027427808393305, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1581 + }, + { + "epoch": 6.96, + "eval_anthropic_toxic_prompts_accuracy": 0.10490625, + "eval_anthropic_toxic_prompts_bleu_score": 3.541700637049288, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12635207715416444, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6750056147575378, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005035798625001384, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13199999928474426, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015152928667412809, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.1228177547454834, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.918, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.848, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.698, + "eval_anthropic_toxic_prompts_num_pred_words": 40.474, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.710281574587817, + "eval_anthropic_toxic_prompts_pred_num_tokens": 58.1171875, + "eval_anthropic_toxic_prompts_rouge_score": 0.23509872150435485, + "eval_anthropic_toxic_prompts_runtime": 7.1647, + "eval_anthropic_toxic_prompts_samples_per_second": 69.787, + "eval_anthropic_toxic_prompts_steps_per_second": 0.14, + "eval_anthropic_toxic_prompts_token_set_f1": 0.33558649257134376, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005768792814210663, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43283570980153097, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2996083086572435, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1581 + }, + { + "epoch": 6.96, + "eval_arxiv_accuracy": 0.42675, + "eval_arxiv_bleu_score": 4.320251352031537, + "eval_arxiv_bleu_score_sem": 0.12188966763297118, + "eval_arxiv_emb_cos_sim": 0.7463886737823486, + "eval_arxiv_emb_cos_sim_sem": 0.005261778593770951, + "eval_arxiv_emb_top1_equal": 0.25600001215934753, + "eval_arxiv_emb_top1_equal_sem": 0.019536923601457774, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.028258800506592, + "eval_arxiv_n_ngrams_match_1": 14.756, + "eval_arxiv_n_ngrams_match_2": 2.816, + "eval_arxiv_n_ngrams_match_3": 0.634, + "eval_arxiv_num_pred_words": 38.016, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.66122593497113, + "eval_arxiv_pred_num_tokens": 61.765625, + "eval_arxiv_rouge_score": 0.354438503993915, + "eval_arxiv_runtime": 8.1309, + "eval_arxiv_samples_per_second": 61.494, + "eval_arxiv_steps_per_second": 0.123, + "eval_arxiv_token_set_f1": 0.36116556687680584, + "eval_arxiv_token_set_f1_sem": 0.004720914996476666, + "eval_arxiv_token_set_precision": 0.30851772412283185, + "eval_arxiv_token_set_recall": 0.45619999237268627, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1581 + }, + { + "epoch": 6.96, + "eval_python_code_alpaca_accuracy": 0.15046875, + "eval_python_code_alpaca_bleu_score": 5.126805918317772, + "eval_python_code_alpaca_bleu_score_sem": 0.16463181667868096, + "eval_python_code_alpaca_emb_cos_sim": 0.7541212439537048, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004230157117169431, + "eval_python_code_alpaca_emb_top1_equal": 0.15800000727176666, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.016328049428381567, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.77302885055542, + "eval_python_code_alpaca_n_ngrams_match_1": 9.444, + "eval_python_code_alpaca_n_ngrams_match_2": 2.65, + "eval_python_code_alpaca_n_ngrams_match_3": 0.896, + "eval_python_code_alpaca_num_pred_words": 37.202, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 16.007043602981074, + "eval_python_code_alpaca_pred_num_tokens": 58.7421875, + "eval_python_code_alpaca_rouge_score": 0.37821108219102967, + "eval_python_code_alpaca_runtime": 11.3494, + "eval_python_code_alpaca_samples_per_second": 44.055, + "eval_python_code_alpaca_steps_per_second": 0.088, + "eval_python_code_alpaca_token_set_f1": 0.46395396815143314, + "eval_python_code_alpaca_token_set_f1_sem": 0.005517060349556724, + "eval_python_code_alpaca_token_set_precision": 0.5237061818807659, + "eval_python_code_alpaca_token_set_recall": 0.4361381112417352, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1581 + }, + { + "epoch": 6.96, + "eval_wikibio_accuracy": 0.37090625, + "eval_wikibio_bleu_score": 4.52711017427634, + "eval_wikibio_bleu_score_sem": 0.22225989051906928, + "eval_wikibio_emb_cos_sim": 0.6868708729743958, + "eval_wikibio_emb_cos_sim_sem": 0.007236773502149267, + "eval_wikibio_emb_top1_equal": 0.15399999916553497, + "eval_wikibio_emb_top1_equal_sem": 0.016158283980625493, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3025898933410645, + "eval_wikibio_n_ngrams_match_1": 7.266, + "eval_wikibio_n_ngrams_match_2": 2.21, + "eval_wikibio_n_ngrams_match_3": 0.852, + "eval_wikibio_num_pred_words": 28.48, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.182948771864357, + "eval_wikibio_pred_num_tokens": 62.8125, + "eval_wikibio_rouge_score": 0.2665925355815556, + "eval_wikibio_runtime": 7.4128, + "eval_wikibio_samples_per_second": 67.451, + "eval_wikibio_steps_per_second": 0.135, + "eval_wikibio_token_set_f1": 0.24587304205342644, + "eval_wikibio_token_set_f1_sem": 0.007242710929915511, + "eval_wikibio_token_set_precision": 0.23366527740250978, + "eval_wikibio_token_set_recall": 0.29149566163466467, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1581 + }, + { + "epoch": 6.96, + "eval_bias-bios_accuracy": 0.51584375, + "eval_bias-bios_bleu_score": 18.48892877899192, + "eval_bias-bios_bleu_score_sem": 0.7878024060804362, + "eval_bias-bios_emb_cos_sim": 0.876237154006958, + "eval_bias-bios_emb_cos_sim_sem": 0.0031529775249521316, + "eval_bias-bios_emb_top1_equal": 0.3540000021457672, + "eval_bias-bios_emb_top1_equal_sem": 0.021407582231685648, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7648732662200928, + "eval_bias-bios_n_ngrams_match_1": 22.492, + "eval_bias-bios_n_ngrams_match_2": 10.32, + "eval_bias-bios_n_ngrams_match_3": 5.81, + "eval_bias-bios_num_pred_words": 45.448, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.840832080450535, + "eval_bias-bios_pred_num_tokens": 61.015625, + "eval_bias-bios_rouge_score": 0.5264991873889933, + "eval_bias-bios_runtime": 7.638, + "eval_bias-bios_samples_per_second": 65.462, + "eval_bias-bios_steps_per_second": 0.131, + "eval_bias-bios_token_set_f1": 0.5566655063326945, + "eval_bias-bios_token_set_f1_sem": 0.0068319374696395666, + "eval_bias-bios_token_set_precision": 0.5408795242977424, + "eval_bias-bios_token_set_recall": 0.5843492224289123, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1581 + }, + { + "epoch": 6.98, + "learning_rate": 0.001, + "loss": 1.8587, + "step": 1584 + }, + { + "epoch": 7.03, + "learning_rate": 0.001, + "loss": 1.8517, + "step": 1596 + }, + { + "epoch": 7.08, + "learning_rate": 0.001, + "loss": 1.9463, + "step": 1608 + }, + { + "epoch": 7.1, + "eval_ag_news_accuracy": 0.30053125, + "eval_ag_news_bleu_score": 4.537715333796728, + "eval_ag_news_bleu_score_sem": 0.13889492068202675, + "eval_ag_news_emb_cos_sim": 0.8129717111587524, + "eval_ag_news_emb_cos_sim_sem": 0.004927985070672762, + "eval_ag_news_emb_top1_equal": 0.2980000078678131, + "eval_ag_news_emb_top1_equal_sem": 0.020475119103777986, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5397188663482666, + "eval_ag_news_n_ngrams_match_1": 13.44, + "eval_ag_news_n_ngrams_match_2": 2.832, + "eval_ag_news_n_ngrams_match_3": 0.712, + "eval_ag_news_num_pred_words": 41.938, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.45723074193958, + "eval_ag_news_pred_num_tokens": 62.4296875, + "eval_ag_news_rouge_score": 0.3577707860317296, + "eval_ag_news_runtime": 7.5083, + "eval_ag_news_samples_per_second": 66.593, + "eval_ag_news_steps_per_second": 0.133, + "eval_ag_news_token_set_f1": 0.34854753140569744, + "eval_ag_news_token_set_f1_sem": 0.004711246773498002, + "eval_ag_news_token_set_precision": 0.32687807742299585, + "eval_ag_news_token_set_recall": 0.3920744588382431, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1612 + }, + { + "epoch": 7.1, + "eval_anthropic_toxic_prompts_accuracy": 0.10565625, + "eval_anthropic_toxic_prompts_bleu_score": 3.6555507241528207, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13471216566876523, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.694108784198761, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00449306898432817, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13199999928474426, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015152928667412809, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.1062278747558594, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.302, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.986, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792, + "eval_anthropic_toxic_prompts_num_pred_words": 42.614, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.336628726505555, + "eval_anthropic_toxic_prompts_pred_num_tokens": 61.28125, + "eval_anthropic_toxic_prompts_rouge_score": 0.2389581662106168, + "eval_anthropic_toxic_prompts_runtime": 7.2584, + "eval_anthropic_toxic_prompts_samples_per_second": 68.886, + "eval_anthropic_toxic_prompts_steps_per_second": 0.138, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34118271658652294, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005605196304157451, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4604330151107107, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2952580469903798, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1612 + }, + { + "epoch": 7.1, + "eval_arxiv_accuracy": 0.42734375, + "eval_arxiv_bleu_score": 4.4833449799753495, + "eval_arxiv_bleu_score_sem": 0.13133602727160468, + "eval_arxiv_emb_cos_sim": 0.7571825385093689, + "eval_arxiv_emb_cos_sim_sem": 0.004914910965394936, + "eval_arxiv_emb_top1_equal": 0.2639999985694885, + "eval_arxiv_emb_top1_equal_sem": 0.019732885240582997, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0084228515625, + "eval_arxiv_n_ngrams_match_1": 15.244, + "eval_arxiv_n_ngrams_match_2": 2.932, + "eval_arxiv_n_ngrams_match_3": 0.676, + "eval_arxiv_num_pred_words": 39.102, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.25542890229939, + "eval_arxiv_pred_num_tokens": 62.1953125, + "eval_arxiv_rouge_score": 0.3632984432863542, + "eval_arxiv_runtime": 9.2134, + "eval_arxiv_samples_per_second": 54.269, + "eval_arxiv_steps_per_second": 0.109, + "eval_arxiv_token_set_f1": 0.3692277571283139, + "eval_arxiv_token_set_f1_sem": 0.004558893026175048, + "eval_arxiv_token_set_precision": 0.320379446657654, + "eval_arxiv_token_set_recall": 0.4525783748505137, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1612 + }, + { + "epoch": 7.1, + "eval_python_code_alpaca_accuracy": 0.15453125, + "eval_python_code_alpaca_bleu_score": 5.3941285972882165, + "eval_python_code_alpaca_bleu_score_sem": 0.17369563512429562, + "eval_python_code_alpaca_emb_cos_sim": 0.7757654786109924, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036044649847583542, + "eval_python_code_alpaca_emb_top1_equal": 0.18000000715255737, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017198593316470962, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.6887404918670654, + "eval_python_code_alpaca_n_ngrams_match_1": 9.99, + "eval_python_code_alpaca_n_ngrams_match_2": 2.98, + "eval_python_code_alpaca_n_ngrams_match_3": 1.006, + "eval_python_code_alpaca_num_pred_words": 38.514, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.713132934780964, + "eval_python_code_alpaca_pred_num_tokens": 60.03125, + "eval_python_code_alpaca_rouge_score": 0.3919716483017668, + "eval_python_code_alpaca_runtime": 7.151, + "eval_python_code_alpaca_samples_per_second": 69.92, + "eval_python_code_alpaca_steps_per_second": 0.14, + "eval_python_code_alpaca_token_set_f1": 0.469544790861787, + "eval_python_code_alpaca_token_set_f1_sem": 0.005210264001859052, + "eval_python_code_alpaca_token_set_precision": 0.5573700982167373, + "eval_python_code_alpaca_token_set_recall": 0.42329262951935054, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1612 + }, + { + "epoch": 7.1, + "eval_wikibio_accuracy": 0.36940625, + "eval_wikibio_bleu_score": 4.849451170594448, + "eval_wikibio_bleu_score_sem": 0.21065823619517224, + "eval_wikibio_emb_cos_sim": 0.7106419205665588, + "eval_wikibio_emb_cos_sim_sem": 0.006688869930420594, + "eval_wikibio_emb_top1_equal": 0.18400000035762787, + "eval_wikibio_emb_top1_equal_sem": 0.017346174301986407, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.315532684326172, + "eval_wikibio_n_ngrams_match_1": 8.306, + "eval_wikibio_n_ngrams_match_2": 2.522, + "eval_wikibio_n_ngrams_match_3": 0.952, + "eval_wikibio_num_pred_words": 31.694, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.53705863793149, + "eval_wikibio_pred_num_tokens": 62.8671875, + "eval_wikibio_rouge_score": 0.29178624670056774, + "eval_wikibio_runtime": 7.3455, + "eval_wikibio_samples_per_second": 68.069, + "eval_wikibio_steps_per_second": 0.136, + "eval_wikibio_token_set_f1": 0.2695935871972429, + "eval_wikibio_token_set_f1_sem": 0.006982368442738906, + "eval_wikibio_token_set_precision": 0.26487933820827947, + "eval_wikibio_token_set_recall": 0.30313653889791964, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1612 + }, + { + "epoch": 7.1, + "eval_bias-bios_accuracy": 0.517625, + "eval_bias-bios_bleu_score": 18.311460397478225, + "eval_bias-bios_bleu_score_sem": 0.7992379003992439, + "eval_bias-bios_emb_cos_sim": 0.8802025318145752, + "eval_bias-bios_emb_cos_sim_sem": 0.003091392151627784, + "eval_bias-bios_emb_top1_equal": 0.328000009059906, + "eval_bias-bios_emb_top1_equal_sem": 0.02101702640661987, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7391340732574463, + "eval_bias-bios_n_ngrams_match_1": 23.012, + "eval_bias-bios_n_ngrams_match_2": 10.444, + "eval_bias-bios_n_ngrams_match_3": 5.88, + "eval_bias-bios_num_pred_words": 46.702, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.692412076041388, + "eval_bias-bios_pred_num_tokens": 62.2265625, + "eval_bias-bios_rouge_score": 0.5305381661552371, + "eval_bias-bios_runtime": 7.6578, + "eval_bias-bios_samples_per_second": 65.293, + "eval_bias-bios_steps_per_second": 0.131, + "eval_bias-bios_token_set_f1": 0.5614744301701937, + "eval_bias-bios_token_set_f1_sem": 0.0066862192501673575, + "eval_bias-bios_token_set_precision": 0.554485965307712, + "eval_bias-bios_token_set_recall": 0.58068459361329, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1612 + }, + { + "epoch": 7.14, + "learning_rate": 0.001, + "loss": 1.9251, + "step": 1620 + }, + { + "epoch": 7.19, + "learning_rate": 0.001, + "loss": 1.7034, + "step": 1632 + }, + { + "epoch": 7.24, + "eval_ag_news_accuracy": 0.3025625, + "eval_ag_news_bleu_score": 4.286001981629996, + "eval_ag_news_bleu_score_sem": 0.15687854341520385, + "eval_ag_news_emb_cos_sim": 0.7988663911819458, + "eval_ag_news_emb_cos_sim_sem": 0.004817627200428409, + "eval_ag_news_emb_top1_equal": 0.23600000143051147, + "eval_ag_news_emb_top1_equal_sem": 0.019008700160065242, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.6356382369995117, + "eval_ag_news_n_ngrams_match_1": 11.506, + "eval_ag_news_n_ngrams_match_2": 2.332, + "eval_ag_news_n_ngrams_match_3": 0.646, + "eval_ag_news_num_pred_words": 29.562, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 37.92605098324666, + "eval_ag_news_pred_num_tokens": 44.359375, + "eval_ag_news_rouge_score": 0.3550396467086911, + "eval_ag_news_runtime": 7.2363, + "eval_ag_news_samples_per_second": 69.096, + "eval_ag_news_steps_per_second": 0.138, + "eval_ag_news_token_set_f1": 0.3455860395867242, + "eval_ag_news_token_set_f1_sem": 0.004866392561009078, + "eval_ag_news_token_set_precision": 0.29738271452813736, + "eval_ag_news_token_set_recall": 0.43171517811120513, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1643 + }, + { + "epoch": 7.24, + "eval_anthropic_toxic_prompts_accuracy": 0.110375, + "eval_anthropic_toxic_prompts_bleu_score": 5.980303037695327, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.2250752063017299, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7024601697921753, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004742135387992094, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.9766438007354736, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.558, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.69, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.606, + "eval_anthropic_toxic_prompts_num_pred_words": 23.47, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 19.621851167582104, + "eval_anthropic_toxic_prompts_pred_num_tokens": 33.8828125, + "eval_anthropic_toxic_prompts_rouge_score": 0.3285517353029267, + "eval_anthropic_toxic_prompts_runtime": 6.9949, + "eval_anthropic_toxic_prompts_samples_per_second": 71.48, + "eval_anthropic_toxic_prompts_steps_per_second": 0.143, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3533495443027637, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006121045898866037, + "eval_anthropic_toxic_prompts_token_set_precision": 0.42009567292486544, + "eval_anthropic_toxic_prompts_token_set_recall": 0.32878068071900995, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1643 + }, + { + "epoch": 7.24, + "eval_arxiv_accuracy": 0.4180625, + "eval_arxiv_bleu_score": 3.6374956868576898, + "eval_arxiv_bleu_score_sem": 0.09995697080577728, + "eval_arxiv_emb_cos_sim": 0.7477392554283142, + "eval_arxiv_emb_cos_sim_sem": 0.004550795567367016, + "eval_arxiv_emb_top1_equal": 0.15800000727176666, + "eval_arxiv_emb_top1_equal_sem": 0.01632805076118194, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.1020619869232178, + "eval_arxiv_n_ngrams_match_1": 13.456, + "eval_arxiv_n_ngrams_match_2": 2.482, + "eval_arxiv_n_ngrams_match_3": 0.53, + "eval_arxiv_num_pred_words": 29.192, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.24377038967276, + "eval_arxiv_pred_num_tokens": 47.0859375, + "eval_arxiv_rouge_score": 0.36408465684598523, + "eval_arxiv_runtime": 7.2909, + "eval_arxiv_samples_per_second": 68.579, + "eval_arxiv_steps_per_second": 0.137, + "eval_arxiv_token_set_f1": 0.3662130619620601, + "eval_arxiv_token_set_f1_sem": 0.0043163861426842275, + "eval_arxiv_token_set_precision": 0.30295154357919024, + "eval_arxiv_token_set_recall": 0.4763281220006056, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1643 + }, + { + "epoch": 7.24, + "eval_python_code_alpaca_accuracy": 0.165625, + "eval_python_code_alpaca_bleu_score": 7.944977507185146, + "eval_python_code_alpaca_bleu_score_sem": 0.25233973473450744, + "eval_python_code_alpaca_emb_cos_sim": 0.7998301982879639, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003677243882031509, + "eval_python_code_alpaca_emb_top1_equal": 0.21799999475479126, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018483376892288548, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.479875326156616, + "eval_python_code_alpaca_n_ngrams_match_1": 9.134, + "eval_python_code_alpaca_n_ngrams_match_2": 2.376, + "eval_python_code_alpaca_n_ngrams_match_3": 0.756, + "eval_python_code_alpaca_num_pred_words": 22.662, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.939775747320335, + "eval_python_code_alpaca_pred_num_tokens": 34.484375, + "eval_python_code_alpaca_rouge_score": 0.48839579648916653, + "eval_python_code_alpaca_runtime": 6.9291, + "eval_python_code_alpaca_samples_per_second": 72.159, + "eval_python_code_alpaca_steps_per_second": 0.144, + "eval_python_code_alpaca_token_set_f1": 0.5011354467022529, + "eval_python_code_alpaca_token_set_f1_sem": 0.005571922672352036, + "eval_python_code_alpaca_token_set_precision": 0.5274275229077607, + "eval_python_code_alpaca_token_set_recall": 0.4925627715073534, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1643 + }, + { + "epoch": 7.24, + "eval_wikibio_accuracy": 0.36325, + "eval_wikibio_bleu_score": 5.50352870668292, + "eval_wikibio_bleu_score_sem": 0.22781839034644985, + "eval_wikibio_emb_cos_sim": 0.7406373620033264, + "eval_wikibio_emb_cos_sim_sem": 0.005716939917810654, + "eval_wikibio_emb_top1_equal": 0.1979999989271164, + "eval_wikibio_emb_top1_equal_sem": 0.017838958581409683, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.441089391708374, + "eval_wikibio_n_ngrams_match_1": 8.706, + "eval_wikibio_n_ngrams_match_2": 2.6, + "eval_wikibio_n_ngrams_match_3": 0.922, + "eval_wikibio_num_pred_words": 29.67, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 31.220951494609466, + "eval_wikibio_pred_num_tokens": 55.625, + "eval_wikibio_rouge_score": 0.3350927900544991, + "eval_wikibio_runtime": 7.1601, + "eval_wikibio_samples_per_second": 69.831, + "eval_wikibio_steps_per_second": 0.14, + "eval_wikibio_token_set_f1": 0.29396633540713607, + "eval_wikibio_token_set_f1_sem": 0.006017030654109439, + "eval_wikibio_token_set_precision": 0.28518570292078954, + "eval_wikibio_token_set_recall": 0.3219513805606757, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1643 + }, + { + "epoch": 7.24, + "eval_bias-bios_accuracy": 0.51740625, + "eval_bias-bios_bleu_score": 18.565207501173806, + "eval_bias-bios_bleu_score_sem": 0.8562128078552043, + "eval_bias-bios_emb_cos_sim": 0.8686398267745972, + "eval_bias-bios_emb_cos_sim_sem": 0.0033610366598804567, + "eval_bias-bios_emb_top1_equal": 0.2980000078678131, + "eval_bias-bios_emb_top1_equal_sem": 0.020475119103777986, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.760959506034851, + "eval_bias-bios_n_ngrams_match_1": 19.984, + "eval_bias-bios_n_ngrams_match_2": 9.108, + "eval_bias-bios_n_ngrams_match_3": 5.208, + "eval_bias-bios_num_pred_words": 33.212, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.818017139631238, + "eval_bias-bios_pred_num_tokens": 45.890625, + "eval_bias-bios_rouge_score": 0.5452637587141534, + "eval_bias-bios_runtime": 7.3863, + "eval_bias-bios_samples_per_second": 67.693, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.5557605735112274, + "eval_bias-bios_token_set_f1_sem": 0.00695490868771064, + "eval_bias-bios_token_set_precision": 0.5039103965895205, + "eval_bias-bios_token_set_recall": 0.6343332942788436, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1643 + }, + { + "epoch": 7.24, + "learning_rate": 0.001, + "loss": 1.7573, + "step": 1644 + }, + { + "epoch": 7.3, + "learning_rate": 0.001, + "loss": 1.9469, + "step": 1656 + }, + { + "epoch": 7.35, + "learning_rate": 0.001, + "loss": 1.926, + "step": 1668 + }, + { + "epoch": 7.37, + "eval_ag_news_accuracy": 0.300375, + "eval_ag_news_bleu_score": 4.650876326075321, + "eval_ag_news_bleu_score_sem": 0.15522337420778148, + "eval_ag_news_emb_cos_sim": 0.801328182220459, + "eval_ag_news_emb_cos_sim_sem": 0.0050656023737853636, + "eval_ag_news_emb_top1_equal": 0.27799999713897705, + "eval_ag_news_emb_top1_equal_sem": 0.0200558347666307, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5955419540405273, + "eval_ag_news_n_ngrams_match_1": 12.808, + "eval_ag_news_n_ngrams_match_2": 2.664, + "eval_ag_news_n_ngrams_match_3": 0.696, + "eval_ag_news_num_pred_words": 38.928, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.43544097249612, + "eval_ag_news_pred_num_tokens": 58.90625, + "eval_ag_news_rouge_score": 0.35424002923158404, + "eval_ag_news_runtime": 9.3311, + "eval_ag_news_samples_per_second": 53.584, + "eval_ag_news_steps_per_second": 0.107, + "eval_ag_news_token_set_f1": 0.34464839824272214, + "eval_ag_news_token_set_f1_sem": 0.004853024209760347, + "eval_ag_news_token_set_precision": 0.3142035266838974, + "eval_ag_news_token_set_recall": 0.4011208877740866, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1674 + }, + { + "epoch": 7.37, + "eval_anthropic_toxic_prompts_accuracy": 0.10515625, + "eval_anthropic_toxic_prompts_bleu_score": 3.7182273069339726, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13410780348787632, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6839096546173096, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004770681972822426, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.16200000047683716, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016494123019099097, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.111764430999756, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.906, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.812, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.66, + "eval_anthropic_toxic_prompts_num_pred_words": 37.902, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.46063970785518, + "eval_anthropic_toxic_prompts_pred_num_tokens": 54.546875, + "eval_anthropic_toxic_prompts_rouge_score": 0.24805704357619135, + "eval_anthropic_toxic_prompts_runtime": 7.8945, + "eval_anthropic_toxic_prompts_samples_per_second": 63.336, + "eval_anthropic_toxic_prompts_steps_per_second": 0.127, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3378408216519142, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005726325781171044, + "eval_anthropic_toxic_prompts_token_set_precision": 0.436426569901928, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2997263766470801, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1674 + }, + { + "epoch": 7.37, + "eval_arxiv_accuracy": 0.42275, + "eval_arxiv_bleu_score": 4.503449524355907, + "eval_arxiv_bleu_score_sem": 0.12322119158722054, + "eval_arxiv_emb_cos_sim": 0.7579543590545654, + "eval_arxiv_emb_cos_sim_sem": 0.004369948550893629, + "eval_arxiv_emb_top1_equal": 0.23999999463558197, + "eval_arxiv_emb_top1_equal_sem": 0.019118866773455794, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0502963066101074, + "eval_arxiv_n_ngrams_match_1": 15.374, + "eval_arxiv_n_ngrams_match_2": 2.934, + "eval_arxiv_n_ngrams_match_3": 0.662, + "eval_arxiv_num_pred_words": 37.8, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.12160196569762, + "eval_arxiv_pred_num_tokens": 60.6640625, + "eval_arxiv_rouge_score": 0.3747585887734518, + "eval_arxiv_runtime": 7.6589, + "eval_arxiv_samples_per_second": 65.283, + "eval_arxiv_steps_per_second": 0.131, + "eval_arxiv_token_set_f1": 0.37588382467047504, + "eval_arxiv_token_set_f1_sem": 0.004298183217087768, + "eval_arxiv_token_set_precision": 0.3236729986617741, + "eval_arxiv_token_set_recall": 0.4642341614634383, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1674 + }, + { + "epoch": 7.37, + "eval_python_code_alpaca_accuracy": 0.1518125, + "eval_python_code_alpaca_bleu_score": 5.5349821219571975, + "eval_python_code_alpaca_bleu_score_sem": 0.18336546287590194, + "eval_python_code_alpaca_emb_cos_sim": 0.7575463652610779, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004372302276355746, + "eval_python_code_alpaca_emb_top1_equal": 0.19599999487400055, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01777075118942252, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7344582080841064, + "eval_python_code_alpaca_n_ngrams_match_1": 9.622, + "eval_python_code_alpaca_n_ngrams_match_2": 2.688, + "eval_python_code_alpaca_n_ngrams_match_3": 0.892, + "eval_python_code_alpaca_num_pred_words": 35.136, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.401396822777677, + "eval_python_code_alpaca_pred_num_tokens": 54.4921875, + "eval_python_code_alpaca_rouge_score": 0.3955248696967041, + "eval_python_code_alpaca_runtime": 7.2636, + "eval_python_code_alpaca_samples_per_second": 68.836, + "eval_python_code_alpaca_steps_per_second": 0.138, + "eval_python_code_alpaca_token_set_f1": 0.4712736813612866, + "eval_python_code_alpaca_token_set_f1_sem": 0.005484467071758197, + "eval_python_code_alpaca_token_set_precision": 0.5345270958066641, + "eval_python_code_alpaca_token_set_recall": 0.4378345832325755, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1674 + }, + { + "epoch": 7.37, + "eval_wikibio_accuracy": 0.35821875, + "eval_wikibio_bleu_score": 4.254597470202276, + "eval_wikibio_bleu_score_sem": 0.19247988367918872, + "eval_wikibio_emb_cos_sim": 0.6875274777412415, + "eval_wikibio_emb_cos_sim_sem": 0.007395272788336099, + "eval_wikibio_emb_top1_equal": 0.14399999380111694, + "eval_wikibio_emb_top1_equal_sem": 0.01571693380047095, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3458445072174072, + "eval_wikibio_n_ngrams_match_1": 7.11, + "eval_wikibio_n_ngrams_match_2": 2.1, + "eval_wikibio_n_ngrams_match_3": 0.726, + "eval_wikibio_num_pred_words": 27.93, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.384536493615464, + "eval_wikibio_pred_num_tokens": 62.28125, + "eval_wikibio_rouge_score": 0.26586508728125835, + "eval_wikibio_runtime": 7.381, + "eval_wikibio_samples_per_second": 67.741, + "eval_wikibio_steps_per_second": 0.135, + "eval_wikibio_token_set_f1": 0.24263158653288092, + "eval_wikibio_token_set_f1_sem": 0.007017116461473505, + "eval_wikibio_token_set_precision": 0.22963411681555207, + "eval_wikibio_token_set_recall": 0.2874055442065437, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1674 + }, + { + "epoch": 7.37, + "eval_bias-bios_accuracy": 0.51721875, + "eval_bias-bios_bleu_score": 18.74315096320846, + "eval_bias-bios_bleu_score_sem": 0.8052132527421368, + "eval_bias-bios_emb_cos_sim": 0.8761293888092041, + "eval_bias-bios_emb_cos_sim_sem": 0.002969192683252316, + "eval_bias-bios_emb_top1_equal": 0.3199999928474426, + "eval_bias-bios_emb_top1_equal_sem": 0.0208823415975322, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7489280700683594, + "eval_bias-bios_n_ngrams_match_1": 22.304, + "eval_bias-bios_n_ngrams_match_2": 10.202, + "eval_bias-bios_n_ngrams_match_3": 5.706, + "eval_bias-bios_num_pred_words": 43.782, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.748437450088057, + "eval_bias-bios_pred_num_tokens": 59.0859375, + "eval_bias-bios_rouge_score": 0.5344921446826842, + "eval_bias-bios_runtime": 7.6401, + "eval_bias-bios_samples_per_second": 65.444, + "eval_bias-bios_steps_per_second": 0.131, + "eval_bias-bios_token_set_f1": 0.559229653634193, + "eval_bias-bios_token_set_f1_sem": 0.006654143164760632, + "eval_bias-bios_token_set_precision": 0.5387286363785764, + "eval_bias-bios_token_set_recall": 0.5919811309672199, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1674 + }, + { + "epoch": 7.4, + "learning_rate": 0.001, + "loss": 1.7584, + "step": 1680 + }, + { + "epoch": 7.45, + "learning_rate": 0.001, + "loss": 1.6854, + "step": 1692 + }, + { + "epoch": 7.51, + "learning_rate": 0.001, + "loss": 1.9765, + "step": 1704 + }, + { + "epoch": 7.51, + "eval_ag_news_accuracy": 0.30271875, + "eval_ag_news_bleu_score": 4.851582613767712, + "eval_ag_news_bleu_score_sem": 0.1634093529875337, + "eval_ag_news_emb_cos_sim": 0.810145914554596, + "eval_ag_news_emb_cos_sim_sem": 0.004267049697947543, + "eval_ag_news_emb_top1_equal": 0.2800000011920929, + "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.540452718734741, + "eval_ag_news_n_ngrams_match_1": 13.362, + "eval_ag_news_n_ngrams_match_2": 2.876, + "eval_ag_news_n_ngrams_match_3": 0.8, + "eval_ag_news_num_pred_words": 41.826, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.4825265435078, + "eval_ag_news_pred_num_tokens": 62.125, + "eval_ag_news_rouge_score": 0.35383916590173115, + "eval_ag_news_runtime": 7.4242, + "eval_ag_news_samples_per_second": 67.347, + "eval_ag_news_steps_per_second": 0.135, + "eval_ag_news_token_set_f1": 0.3506777530746214, + "eval_ag_news_token_set_f1_sem": 0.00456835211840307, + "eval_ag_news_token_set_precision": 0.3269254507482778, + "eval_ag_news_token_set_recall": 0.39850577337557036, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1705 + }, + { + "epoch": 7.51, + "eval_anthropic_toxic_prompts_accuracy": 0.10628125, + "eval_anthropic_toxic_prompts_bleu_score": 3.7370627662482057, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1285166203043838, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6878765225410461, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004509487085746684, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.137979030609131, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.262, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.056, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.79, + "eval_anthropic_toxic_prompts_num_pred_words": 41.578, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 23.057221800983054, + "eval_anthropic_toxic_prompts_pred_num_tokens": 60.0859375, + "eval_anthropic_toxic_prompts_rouge_score": 0.24340618440616155, + "eval_anthropic_toxic_prompts_runtime": 7.1278, + "eval_anthropic_toxic_prompts_samples_per_second": 70.148, + "eval_anthropic_toxic_prompts_steps_per_second": 0.14, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34041683902321007, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005818452190683891, + "eval_anthropic_toxic_prompts_token_set_precision": 0.45550169072220287, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2965704042505173, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1705 + }, + { + "epoch": 7.51, + "eval_arxiv_accuracy": 0.42359375, + "eval_arxiv_bleu_score": 4.506231542920421, + "eval_arxiv_bleu_score_sem": 0.12085288715182764, + "eval_arxiv_emb_cos_sim": 0.7582602500915527, + "eval_arxiv_emb_cos_sim_sem": 0.004840408090836815, + "eval_arxiv_emb_top1_equal": 0.2879999876022339, + "eval_arxiv_emb_top1_equal_sem": 0.020271503192099565, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0391695499420166, + "eval_arxiv_n_ngrams_match_1": 15.622, + "eval_arxiv_n_ngrams_match_2": 2.95, + "eval_arxiv_n_ngrams_match_3": 0.67, + "eval_arxiv_num_pred_words": 39.402, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.88788968126323, + "eval_arxiv_pred_num_tokens": 61.8359375, + "eval_arxiv_rouge_score": 0.3665985989173553, + "eval_arxiv_runtime": 8.027, + "eval_arxiv_samples_per_second": 62.29, + "eval_arxiv_steps_per_second": 0.125, + "eval_arxiv_token_set_f1": 0.3723423502046456, + "eval_arxiv_token_set_f1_sem": 0.004292290646390943, + "eval_arxiv_token_set_precision": 0.3257064091259291, + "eval_arxiv_token_set_recall": 0.4477068202273733, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1705 + }, + { + "epoch": 7.51, + "eval_python_code_alpaca_accuracy": 0.152625, + "eval_python_code_alpaca_bleu_score": 5.337657457190803, + "eval_python_code_alpaca_bleu_score_sem": 0.1613635908932767, + "eval_python_code_alpaca_emb_cos_sim": 0.7676838040351868, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037802830122170557, + "eval_python_code_alpaca_emb_top1_equal": 0.1720000058412552, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01689386850274998, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7153306007385254, + "eval_python_code_alpaca_n_ngrams_match_1": 9.8, + "eval_python_code_alpaca_n_ngrams_match_2": 2.85, + "eval_python_code_alpaca_n_ngrams_match_3": 0.902, + "eval_python_code_alpaca_num_pred_words": 37.092, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.109604492674418, + "eval_python_code_alpaca_pred_num_tokens": 59.109375, + "eval_python_code_alpaca_rouge_score": 0.38660030871861584, + "eval_python_code_alpaca_runtime": 7.2323, + "eval_python_code_alpaca_samples_per_second": 69.134, + "eval_python_code_alpaca_steps_per_second": 0.138, + "eval_python_code_alpaca_token_set_f1": 0.464222601369294, + "eval_python_code_alpaca_token_set_f1_sem": 0.005151611944424277, + "eval_python_code_alpaca_token_set_precision": 0.5458459467726668, + "eval_python_code_alpaca_token_set_recall": 0.4191006191223907, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1705 + }, + { + "epoch": 7.51, + "eval_wikibio_accuracy": 0.3705625, + "eval_wikibio_bleu_score": 4.9118433121021585, + "eval_wikibio_bleu_score_sem": 0.19560003433430423, + "eval_wikibio_emb_cos_sim": 0.7209903001785278, + "eval_wikibio_emb_cos_sim_sem": 0.00636168676398194, + "eval_wikibio_emb_top1_equal": 0.1720000058412552, + "eval_wikibio_emb_top1_equal_sem": 0.01689386850274998, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3367502689361572, + "eval_wikibio_n_ngrams_match_1": 8.672, + "eval_wikibio_n_ngrams_match_2": 2.642, + "eval_wikibio_n_ngrams_match_3": 0.964, + "eval_wikibio_num_pred_words": 32.832, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.127570979078257, + "eval_wikibio_pred_num_tokens": 62.8671875, + "eval_wikibio_rouge_score": 0.30578451335993057, + "eval_wikibio_runtime": 7.2607, + "eval_wikibio_samples_per_second": 68.864, + "eval_wikibio_steps_per_second": 0.138, + "eval_wikibio_token_set_f1": 0.28067397282174583, + "eval_wikibio_token_set_f1_sem": 0.006536335756085939, + "eval_wikibio_token_set_precision": 0.27792502257742313, + "eval_wikibio_token_set_recall": 0.30646776239879864, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1705 + }, + { + "epoch": 7.51, + "eval_bias-bios_accuracy": 0.5183125, + "eval_bias-bios_bleu_score": 18.417151799164678, + "eval_bias-bios_bleu_score_sem": 0.8026136252860484, + "eval_bias-bios_emb_cos_sim": 0.878588080406189, + "eval_bias-bios_emb_cos_sim_sem": 0.0028087918241305595, + "eval_bias-bios_emb_top1_equal": 0.36000001430511475, + "eval_bias-bios_emb_top1_equal_sem": 0.021487751507037762, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7430189847946167, + "eval_bias-bios_n_ngrams_match_1": 22.904, + "eval_bias-bios_n_ngrams_match_2": 10.322, + "eval_bias-bios_n_ngrams_match_3": 5.762, + "eval_bias-bios_num_pred_words": 46.048, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.714569605543155, + "eval_bias-bios_pred_num_tokens": 61.546875, + "eval_bias-bios_rouge_score": 0.5311399669704344, + "eval_bias-bios_runtime": 7.5917, + "eval_bias-bios_samples_per_second": 65.862, + "eval_bias-bios_steps_per_second": 0.132, + "eval_bias-bios_token_set_f1": 0.5585373917793387, + "eval_bias-bios_token_set_f1_sem": 0.006650315588312056, + "eval_bias-bios_token_set_precision": 0.5507608675345065, + "eval_bias-bios_token_set_recall": 0.5772023876296363, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1705 + }, + { + "epoch": 7.56, + "learning_rate": 0.001, + "loss": 1.9347, + "step": 1716 + }, + { + "epoch": 7.61, + "learning_rate": 0.001, + "loss": 1.8096, + "step": 1728 + }, + { + "epoch": 7.65, + "eval_ag_news_accuracy": 0.30046875, + "eval_ag_news_bleu_score": 4.6886501818700195, + "eval_ag_news_bleu_score_sem": 0.17742679602856962, + "eval_ag_news_emb_cos_sim": 0.8065664172172546, + "eval_ag_news_emb_cos_sim_sem": 0.004748064350459948, + "eval_ag_news_emb_top1_equal": 0.28200000524520874, + "eval_ag_news_emb_top1_equal_sem": 0.02014357168251164, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.650249481201172, + "eval_ag_news_n_ngrams_match_1": 11.962, + "eval_ag_news_n_ngrams_match_2": 2.542, + "eval_ag_news_n_ngrams_match_3": 0.694, + "eval_ag_news_num_pred_words": 31.874, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 38.48426595238064, + "eval_ag_news_pred_num_tokens": 49.3828125, + "eval_ag_news_rouge_score": 0.3670479319056318, + "eval_ag_news_runtime": 7.829, + "eval_ag_news_samples_per_second": 63.865, + "eval_ag_news_steps_per_second": 0.128, + "eval_ag_news_token_set_f1": 0.3471564789221244, + "eval_ag_news_token_set_f1_sem": 0.004798216383031354, + "eval_ag_news_token_set_precision": 0.30620342693026115, + "eval_ag_news_token_set_recall": 0.4203662546331646, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1736 + }, + { + "epoch": 7.65, + "eval_anthropic_toxic_prompts_accuracy": 0.1083125, + "eval_anthropic_toxic_prompts_bleu_score": 5.104559842212071, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.19071219001558767, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.699446976184845, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004793991984979677, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055010489467818, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 2.9995689392089844, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.656, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.686, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.626, + "eval_anthropic_toxic_prompts_num_pred_words": 27.27, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.07688070156649, + "eval_anthropic_toxic_prompts_pred_num_tokens": 39.6015625, + "eval_anthropic_toxic_prompts_rouge_score": 0.3026076267347825, + "eval_anthropic_toxic_prompts_runtime": 7.8439, + "eval_anthropic_toxic_prompts_samples_per_second": 63.743, + "eval_anthropic_toxic_prompts_steps_per_second": 0.127, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34725671483100823, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0058297886251437794, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4244084345903105, + "eval_anthropic_toxic_prompts_token_set_recall": 0.31743953495929456, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1736 + }, + { + "epoch": 7.65, + "eval_arxiv_accuracy": 0.41196875, + "eval_arxiv_bleu_score": 3.842853544571142, + "eval_arxiv_bleu_score_sem": 0.10663368564080987, + "eval_arxiv_emb_cos_sim": 0.7558121681213379, + "eval_arxiv_emb_cos_sim_sem": 0.00432601311973432, + "eval_arxiv_emb_top1_equal": 0.20600000023841858, + "eval_arxiv_emb_top1_equal_sem": 0.018104793612990725, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.1280531883239746, + "eval_arxiv_n_ngrams_match_1": 14.044, + "eval_arxiv_n_ngrams_match_2": 2.544, + "eval_arxiv_n_ngrams_match_3": 0.524, + "eval_arxiv_num_pred_words": 31.56, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.829491530931623, + "eval_arxiv_pred_num_tokens": 50.46875, + "eval_arxiv_rouge_score": 0.3695066100625418, + "eval_arxiv_runtime": 7.7611, + "eval_arxiv_samples_per_second": 64.424, + "eval_arxiv_steps_per_second": 0.129, + "eval_arxiv_token_set_f1": 0.3668300551628085, + "eval_arxiv_token_set_f1_sem": 0.004175076327889789, + "eval_arxiv_token_set_precision": 0.30894036995626606, + "eval_arxiv_token_set_recall": 0.46467032010595094, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1736 + }, + { + "epoch": 7.65, + "eval_python_code_alpaca_accuracy": 0.1555, + "eval_python_code_alpaca_bleu_score": 6.828216796264504, + "eval_python_code_alpaca_bleu_score_sem": 0.22378378594816778, + "eval_python_code_alpaca_emb_cos_sim": 0.7818934321403503, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004398725043788634, + "eval_python_code_alpaca_emb_top1_equal": 0.19200000166893005, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01763218126724194, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.5831587314605713, + "eval_python_code_alpaca_n_ngrams_match_1": 9.108, + "eval_python_code_alpaca_n_ngrams_match_2": 2.44, + "eval_python_code_alpaca_n_ngrams_match_3": 0.732, + "eval_python_code_alpaca_num_pred_words": 25.85, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.238890282309018, + "eval_python_code_alpaca_pred_num_tokens": 41.40625, + "eval_python_code_alpaca_rouge_score": 0.4523808554731888, + "eval_python_code_alpaca_runtime": 7.6902, + "eval_python_code_alpaca_samples_per_second": 65.018, + "eval_python_code_alpaca_steps_per_second": 0.13, + "eval_python_code_alpaca_token_set_f1": 0.481507138765683, + "eval_python_code_alpaca_token_set_f1_sem": 0.005669716196561488, + "eval_python_code_alpaca_token_set_precision": 0.5192525976348585, + "eval_python_code_alpaca_token_set_recall": 0.46509787489139526, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1736 + }, + { + "epoch": 7.65, + "eval_wikibio_accuracy": 0.35484375, + "eval_wikibio_bleu_score": 5.543856789536708, + "eval_wikibio_bleu_score_sem": 0.21142369496818994, + "eval_wikibio_emb_cos_sim": 0.7370650768280029, + "eval_wikibio_emb_cos_sim_sem": 0.005716528082494802, + "eval_wikibio_emb_top1_equal": 0.18199999630451202, + "eval_wikibio_emb_top1_equal_sem": 0.017272772986938162, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.5122337341308594, + "eval_wikibio_n_ngrams_match_1": 8.5, + "eval_wikibio_n_ngrams_match_2": 2.59, + "eval_wikibio_n_ngrams_match_3": 0.9, + "eval_wikibio_num_pred_words": 28.85, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 33.52306582968505, + "eval_wikibio_pred_num_tokens": 54.8515625, + "eval_wikibio_rouge_score": 0.33258407318876426, + "eval_wikibio_runtime": 7.8711, + "eval_wikibio_samples_per_second": 63.523, + "eval_wikibio_steps_per_second": 0.127, + "eval_wikibio_token_set_f1": 0.29700381328772274, + "eval_wikibio_token_set_f1_sem": 0.00600601802755354, + "eval_wikibio_token_set_precision": 0.28324158592970555, + "eval_wikibio_token_set_recall": 0.3295388378188429, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1736 + }, + { + "epoch": 7.65, + "eval_bias-bios_accuracy": 0.5184375, + "eval_bias-bios_bleu_score": 19.40074143169447, + "eval_bias-bios_bleu_score_sem": 0.8524070756221019, + "eval_bias-bios_emb_cos_sim": 0.8761026859283447, + "eval_bias-bios_emb_cos_sim_sem": 0.003328032190996095, + "eval_bias-bios_emb_top1_equal": 0.30000001192092896, + "eval_bias-bios_emb_top1_equal_sem": 0.020514426052435274, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7550889253616333, + "eval_bias-bios_n_ngrams_match_1": 21.11, + "eval_bias-bios_n_ngrams_match_2": 9.712, + "eval_bias-bios_n_ngrams_match_3": 5.496, + "eval_bias-bios_num_pred_words": 35.97, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.783962060006336, + "eval_bias-bios_pred_num_tokens": 48.75, + "eval_bias-bios_rouge_score": 0.5571982311817295, + "eval_bias-bios_runtime": 7.6404, + "eval_bias-bios_samples_per_second": 65.442, + "eval_bias-bios_steps_per_second": 0.131, + "eval_bias-bios_token_set_f1": 0.5653724034379729, + "eval_bias-bios_token_set_f1_sem": 0.00690907806658598, + "eval_bias-bios_token_set_precision": 0.5237722994373154, + "eval_bias-bios_token_set_recall": 0.6273984808734474, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1736 + }, + { + "epoch": 7.67, + "learning_rate": 0.001, + "loss": 1.6229, + "step": 1740 + }, + { + "epoch": 7.72, + "learning_rate": 0.001, + "loss": 2.0186, + "step": 1752 + }, + { + "epoch": 7.77, + "learning_rate": 0.001, + "loss": 1.9309, + "step": 1764 + }, + { + "epoch": 7.78, + "eval_ag_news_accuracy": 0.3005, + "eval_ag_news_bleu_score": 4.722855480011007, + "eval_ag_news_bleu_score_sem": 0.15753943975856985, + "eval_ag_news_emb_cos_sim": 0.811168372631073, + "eval_ag_news_emb_cos_sim_sem": 0.0049495894315502035, + "eval_ag_news_emb_top1_equal": 0.27000001072883606, + "eval_ag_news_emb_top1_equal_sem": 0.019874356669179787, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.563847541809082, + "eval_ag_news_n_ngrams_match_1": 13.504, + "eval_ag_news_n_ngrams_match_2": 2.902, + "eval_ag_news_n_ngrams_match_3": 0.776, + "eval_ag_news_num_pred_words": 41.78, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.29874962095071, + "eval_ag_news_pred_num_tokens": 62.8203125, + "eval_ag_news_rouge_score": 0.3594130357291111, + "eval_ag_news_runtime": 7.5908, + "eval_ag_news_samples_per_second": 65.869, + "eval_ag_news_steps_per_second": 0.132, + "eval_ag_news_token_set_f1": 0.3522306876614356, + "eval_ag_news_token_set_f1_sem": 0.00471422098838425, + "eval_ag_news_token_set_precision": 0.32937037502803546, + "eval_ag_news_token_set_recall": 0.3987290004470333, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1767 + }, + { + "epoch": 7.78, + "eval_anthropic_toxic_prompts_accuracy": 0.105375, + "eval_anthropic_toxic_prompts_bleu_score": 3.4987759769154145, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12297900893527906, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6914503574371338, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004275769877600584, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14800000190734863, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015896458012572223, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.161133289337158, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.134, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.974, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.786, + "eval_anthropic_toxic_prompts_num_pred_words": 43.256, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 23.597323376209825, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.515625, + "eval_anthropic_toxic_prompts_rouge_score": 0.23212696161321655, + "eval_anthropic_toxic_prompts_runtime": 10.2195, + "eval_anthropic_toxic_prompts_samples_per_second": 48.926, + "eval_anthropic_toxic_prompts_steps_per_second": 0.098, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3285741928131714, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005470398784443636, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4484121061331258, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2823611547664964, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1767 + }, + { + "epoch": 7.78, + "eval_arxiv_accuracy": 0.42471875, + "eval_arxiv_bleu_score": 4.434731531662221, + "eval_arxiv_bleu_score_sem": 0.12335882016089646, + "eval_arxiv_emb_cos_sim": 0.7598865032196045, + "eval_arxiv_emb_cos_sim_sem": 0.005009810683690815, + "eval_arxiv_emb_top1_equal": 0.32600000500679016, + "eval_arxiv_emb_top1_equal_sem": 0.020984011608532603, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.030510187149048, + "eval_arxiv_n_ngrams_match_1": 15.518, + "eval_arxiv_n_ngrams_match_2": 2.902, + "eval_arxiv_n_ngrams_match_3": 0.636, + "eval_arxiv_num_pred_words": 39.462, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.70779474558654, + "eval_arxiv_pred_num_tokens": 62.90625, + "eval_arxiv_rouge_score": 0.3665031141829219, + "eval_arxiv_runtime": 8.1516, + "eval_arxiv_samples_per_second": 61.338, + "eval_arxiv_steps_per_second": 0.123, + "eval_arxiv_token_set_f1": 0.3706754086012643, + "eval_arxiv_token_set_f1_sem": 0.004476124291976172, + "eval_arxiv_token_set_precision": 0.32358896466381126, + "eval_arxiv_token_set_recall": 0.45072060730764696, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1767 + }, + { + "epoch": 7.78, + "eval_python_code_alpaca_accuracy": 0.15021875, + "eval_python_code_alpaca_bleu_score": 4.789435602398936, + "eval_python_code_alpaca_bleu_score_sem": 0.15382870656988126, + "eval_python_code_alpaca_emb_cos_sim": 0.7624420523643494, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036925334347325555, + "eval_python_code_alpaca_emb_top1_equal": 0.1599999964237213, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.016411540042267993, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7904624938964844, + "eval_python_code_alpaca_n_ngrams_match_1": 9.702, + "eval_python_code_alpaca_n_ngrams_match_2": 2.786, + "eval_python_code_alpaca_n_ngrams_match_3": 0.878, + "eval_python_code_alpaca_num_pred_words": 39.734, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 16.288551415603756, + "eval_python_code_alpaca_pred_num_tokens": 62.8828125, + "eval_python_code_alpaca_rouge_score": 0.37263424824477864, + "eval_python_code_alpaca_runtime": 7.7315, + "eval_python_code_alpaca_samples_per_second": 64.67, + "eval_python_code_alpaca_steps_per_second": 0.129, + "eval_python_code_alpaca_token_set_f1": 0.4545399548124749, + "eval_python_code_alpaca_token_set_f1_sem": 0.005183398897931823, + "eval_python_code_alpaca_token_set_precision": 0.5375639518729065, + "eval_python_code_alpaca_token_set_recall": 0.41141154370575833, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1767 + }, + { + "epoch": 7.78, + "eval_wikibio_accuracy": 0.36715625, + "eval_wikibio_bleu_score": 4.919477452581796, + "eval_wikibio_bleu_score_sem": 0.2047184635454884, + "eval_wikibio_emb_cos_sim": 0.7149428725242615, + "eval_wikibio_emb_cos_sim_sem": 0.006690116098771149, + "eval_wikibio_emb_top1_equal": 0.17800000309944153, + "eval_wikibio_emb_top1_equal_sem": 0.017123621962581055, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3364834785461426, + "eval_wikibio_n_ngrams_match_1": 8.498, + "eval_wikibio_n_ngrams_match_2": 2.632, + "eval_wikibio_n_ngrams_match_3": 0.99, + "eval_wikibio_num_pred_words": 32.026, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.120067814377208, + "eval_wikibio_pred_num_tokens": 63.0, + "eval_wikibio_rouge_score": 0.30135222420067076, + "eval_wikibio_runtime": 7.4611, + "eval_wikibio_samples_per_second": 67.014, + "eval_wikibio_steps_per_second": 0.134, + "eval_wikibio_token_set_f1": 0.27669579538740924, + "eval_wikibio_token_set_f1_sem": 0.006812996857810089, + "eval_wikibio_token_set_precision": 0.27239784751259927, + "eval_wikibio_token_set_recall": 0.30446375248856655, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1767 + }, + { + "epoch": 7.78, + "eval_bias-bios_accuracy": 0.5174375, + "eval_bias-bios_bleu_score": 18.54502948244028, + "eval_bias-bios_bleu_score_sem": 0.776228581133173, + "eval_bias-bios_emb_cos_sim": 0.8806796073913574, + "eval_bias-bios_emb_cos_sim_sem": 0.0027210159238386543, + "eval_bias-bios_emb_top1_equal": 0.34599998593330383, + "eval_bias-bios_emb_top1_equal_sem": 0.021294951270401857, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.756240725517273, + "eval_bias-bios_n_ngrams_match_1": 23.022, + "eval_bias-bios_n_ngrams_match_2": 10.602, + "eval_bias-bios_n_ngrams_match_3": 5.974, + "eval_bias-bios_num_pred_words": 46.896, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.790627866508819, + "eval_bias-bios_pred_num_tokens": 62.3984375, + "eval_bias-bios_rouge_score": 0.5295545960920665, + "eval_bias-bios_runtime": 7.7836, + "eval_bias-bios_samples_per_second": 64.238, + "eval_bias-bios_steps_per_second": 0.128, + "eval_bias-bios_token_set_f1": 0.5596962553749173, + "eval_bias-bios_token_set_f1_sem": 0.0065499820854619285, + "eval_bias-bios_token_set_precision": 0.552492983592854, + "eval_bias-bios_token_set_recall": 0.5769811603574992, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1767 + }, + { + "epoch": 7.82, + "learning_rate": 0.001, + "loss": 1.8319, + "step": 1776 + }, + { + "epoch": 7.88, + "learning_rate": 0.001, + "loss": 1.6043, + "step": 1788 + }, + { + "epoch": 7.92, + "eval_ag_news_accuracy": 0.30328125, + "eval_ag_news_bleu_score": 4.792043355736725, + "eval_ag_news_bleu_score_sem": 0.16482900313217344, + "eval_ag_news_emb_cos_sim": 0.8007201552391052, + "eval_ag_news_emb_cos_sim_sem": 0.005518706853646463, + "eval_ag_news_emb_top1_equal": 0.28600001335144043, + "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5438921451568604, + "eval_ag_news_n_ngrams_match_1": 12.572, + "eval_ag_news_n_ngrams_match_2": 2.682, + "eval_ag_news_n_ngrams_match_3": 0.744, + "eval_ag_news_num_pred_words": 36.492, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 34.601330848618204, + "eval_ag_news_pred_num_tokens": 56.8984375, + "eval_ag_news_rouge_score": 0.3590280838098541, + "eval_ag_news_runtime": 8.658, + "eval_ag_news_samples_per_second": 57.75, + "eval_ag_news_steps_per_second": 0.115, + "eval_ag_news_token_set_f1": 0.3456240580243733, + "eval_ag_news_token_set_f1_sem": 0.004942295341404688, + "eval_ag_news_token_set_precision": 0.30991615070020745, + "eval_ag_news_token_set_recall": 0.4120273375000977, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1798 + }, + { + "epoch": 7.92, + "eval_anthropic_toxic_prompts_accuracy": 0.10675, + "eval_anthropic_toxic_prompts_bleu_score": 4.810195451329484, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1836726785355343, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6906068921089172, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0052169098691491, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01580720436986462, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0305025577545166, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.75, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.796, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658, + "eval_anthropic_toxic_prompts_num_pred_words": 30.982, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.70763675825323, + "eval_anthropic_toxic_prompts_pred_num_tokens": 45.0234375, + "eval_anthropic_toxic_prompts_rouge_score": 0.28588224786399713, + "eval_anthropic_toxic_prompts_runtime": 6.9727, + "eval_anthropic_toxic_prompts_samples_per_second": 71.708, + "eval_anthropic_toxic_prompts_steps_per_second": 0.143, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35198053648732547, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005893928358486648, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4273047679068406, + "eval_anthropic_toxic_prompts_token_set_recall": 0.32545975191429377, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1798 + }, + { + "epoch": 7.92, + "eval_arxiv_accuracy": 0.42528125, + "eval_arxiv_bleu_score": 4.221191763826797, + "eval_arxiv_bleu_score_sem": 0.12013763585677406, + "eval_arxiv_emb_cos_sim": 0.7373228669166565, + "eval_arxiv_emb_cos_sim_sem": 0.006062924899138375, + "eval_arxiv_emb_top1_equal": 0.25999999046325684, + "eval_arxiv_emb_top1_equal_sem": 0.0196359666629192, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.028066873550415, + "eval_arxiv_n_ngrams_match_1": 14.23, + "eval_arxiv_n_ngrams_match_2": 2.724, + "eval_arxiv_n_ngrams_match_3": 0.65, + "eval_arxiv_num_pred_words": 35.354, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.65726086928022, + "eval_arxiv_pred_num_tokens": 58.640625, + "eval_arxiv_rouge_score": 0.3524750488192354, + "eval_arxiv_runtime": 8.5405, + "eval_arxiv_samples_per_second": 58.545, + "eval_arxiv_steps_per_second": 0.117, + "eval_arxiv_token_set_f1": 0.354716902698285, + "eval_arxiv_token_set_f1_sem": 0.004857799242708296, + "eval_arxiv_token_set_precision": 0.29925603684239627, + "eval_arxiv_token_set_recall": 0.45414328256592784, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1798 + }, + { + "epoch": 7.92, + "eval_python_code_alpaca_accuracy": 0.15634375, + "eval_python_code_alpaca_bleu_score": 6.6354189861223025, + "eval_python_code_alpaca_bleu_score_sem": 0.23023821468117778, + "eval_python_code_alpaca_emb_cos_sim": 0.7703977823257446, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004220384358420294, + "eval_python_code_alpaca_emb_top1_equal": 0.20399999618530273, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018039369108186407, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.540879011154175, + "eval_python_code_alpaca_n_ngrams_match_1": 9.11, + "eval_python_code_alpaca_n_ngrams_match_2": 2.612, + "eval_python_code_alpaca_n_ngrams_match_3": 0.858, + "eval_python_code_alpaca_num_pred_words": 29.414, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 12.69082144302526, + "eval_python_code_alpaca_pred_num_tokens": 48.5703125, + "eval_python_code_alpaca_rouge_score": 0.42557610046948885, + "eval_python_code_alpaca_runtime": 8.5016, + "eval_python_code_alpaca_samples_per_second": 58.812, + "eval_python_code_alpaca_steps_per_second": 0.118, + "eval_python_code_alpaca_token_set_f1": 0.47824295657687516, + "eval_python_code_alpaca_token_set_f1_sem": 0.0058007310055928065, + "eval_python_code_alpaca_token_set_precision": 0.5116566636117332, + "eval_python_code_alpaca_token_set_recall": 0.46656534883311057, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1798 + }, + { + "epoch": 7.92, + "eval_wikibio_accuracy": 0.3696875, + "eval_wikibio_bleu_score": 4.370823640419664, + "eval_wikibio_bleu_score_sem": 0.19076798738180825, + "eval_wikibio_emb_cos_sim": 0.6826354265213013, + "eval_wikibio_emb_cos_sim_sem": 0.00763070997536604, + "eval_wikibio_emb_top1_equal": 0.15600000321865082, + "eval_wikibio_emb_top1_equal_sem": 0.016243635183835314, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3066444396972656, + "eval_wikibio_n_ngrams_match_1": 7.458, + "eval_wikibio_n_ngrams_match_2": 2.3, + "eval_wikibio_n_ngrams_match_3": 0.826, + "eval_wikibio_num_pred_words": 29.314, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.29338703499244, + "eval_wikibio_pred_num_tokens": 61.484375, + "eval_wikibio_rouge_score": 0.2675907815883483, + "eval_wikibio_runtime": 9.3728, + "eval_wikibio_samples_per_second": 53.346, + "eval_wikibio_steps_per_second": 0.107, + "eval_wikibio_token_set_f1": 0.24419770753332024, + "eval_wikibio_token_set_f1_sem": 0.007244638990825622, + "eval_wikibio_token_set_precision": 0.23626165051127185, + "eval_wikibio_token_set_recall": 0.2800556622176949, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1798 + }, + { + "epoch": 7.92, + "eval_bias-bios_accuracy": 0.520875, + "eval_bias-bios_bleu_score": 19.41641229356281, + "eval_bias-bios_bleu_score_sem": 0.857620968734896, + "eval_bias-bios_emb_cos_sim": 0.8749354481697083, + "eval_bias-bios_emb_cos_sim_sem": 0.0032761149515914533, + "eval_bias-bios_emb_top1_equal": 0.35199999809265137, + "eval_bias-bios_emb_top1_equal_sem": 0.02138004257753857, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7302442789077759, + "eval_bias-bios_n_ngrams_match_1": 21.662, + "eval_bias-bios_n_ngrams_match_2": 9.86, + "eval_bias-bios_n_ngrams_match_3": 5.568, + "eval_bias-bios_num_pred_words": 39.64, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.642031969512989, + "eval_bias-bios_pred_num_tokens": 53.828125, + "eval_bias-bios_rouge_score": 0.5458043549089255, + "eval_bias-bios_runtime": 8.2889, + "eval_bias-bios_samples_per_second": 60.322, + "eval_bias-bios_steps_per_second": 0.121, + "eval_bias-bios_token_set_f1": 0.5622363262827373, + "eval_bias-bios_token_set_f1_sem": 0.006909230874498535, + "eval_bias-bios_token_set_precision": 0.5260818559910302, + "eval_bias-bios_token_set_recall": 0.6163827708499481, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1798 + }, + { + "epoch": 7.93, + "learning_rate": 0.001, + "loss": 1.9572, + "step": 1800 + }, + { + "epoch": 7.98, + "learning_rate": 0.001, + "loss": 1.8099, + "step": 1812 + }, + { + "epoch": 8.04, + "learning_rate": 0.001, + "loss": 1.8423, + "step": 1824 + }, + { + "epoch": 8.06, + "eval_ag_news_accuracy": 0.30134375, + "eval_ag_news_bleu_score": 4.780837595873983, + "eval_ag_news_bleu_score_sem": 0.15800340186528505, + "eval_ag_news_emb_cos_sim": 0.8033591508865356, + "eval_ag_news_emb_cos_sim_sem": 0.0052791523130582855, + "eval_ag_news_emb_top1_equal": 0.28600001335144043, + "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5662569999694824, + "eval_ag_news_n_ngrams_match_1": 12.928, + "eval_ag_news_n_ngrams_match_2": 2.704, + "eval_ag_news_n_ngrams_match_3": 0.73, + "eval_ag_news_num_pred_words": 38.418, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 35.3839030268648, + "eval_ag_news_pred_num_tokens": 59.2109375, + "eval_ag_news_rouge_score": 0.3585844720770892, + "eval_ag_news_runtime": 8.5983, + "eval_ag_news_samples_per_second": 58.151, + "eval_ag_news_steps_per_second": 0.116, + "eval_ag_news_token_set_f1": 0.3478374542683173, + "eval_ag_news_token_set_f1_sem": 0.0050479052066528095, + "eval_ag_news_token_set_precision": 0.31776503427402997, + "eval_ag_news_token_set_recall": 0.40573543588464517, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1829 + }, + { + "epoch": 8.06, + "eval_anthropic_toxic_prompts_accuracy": 0.1070625, + "eval_anthropic_toxic_prompts_bleu_score": 4.9483724168872705, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18687237219876157, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6995450854301453, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0047627024969777495, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.14000000059604645, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015533271243205533, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.037666082382202, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.838, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.832, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.676, + "eval_anthropic_toxic_prompts_num_pred_words": 30.12, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.85650901255428, + "eval_anthropic_toxic_prompts_pred_num_tokens": 43.6484375, + "eval_anthropic_toxic_prompts_rouge_score": 0.29494632328980463, + "eval_anthropic_toxic_prompts_runtime": 8.6405, + "eval_anthropic_toxic_prompts_samples_per_second": 57.867, + "eval_anthropic_toxic_prompts_steps_per_second": 0.116, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34456781435249784, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005781889435940958, + "eval_anthropic_toxic_prompts_token_set_precision": 0.42904865190258323, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3116079242511146, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1829 + }, + { + "epoch": 8.06, + "eval_arxiv_accuracy": 0.42596875, + "eval_arxiv_bleu_score": 4.432236788900172, + "eval_arxiv_bleu_score_sem": 0.1304993760904918, + "eval_arxiv_emb_cos_sim": 0.7465862035751343, + "eval_arxiv_emb_cos_sim_sem": 0.005250071608477703, + "eval_arxiv_emb_top1_equal": 0.2800000011920929, + "eval_arxiv_emb_top1_equal_sem": 0.020099949126240343, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0305418968200684, + "eval_arxiv_n_ngrams_match_1": 14.788, + "eval_arxiv_n_ngrams_match_2": 2.836, + "eval_arxiv_n_ngrams_match_3": 0.684, + "eval_arxiv_num_pred_words": 36.79, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.70845139335647, + "eval_arxiv_pred_num_tokens": 60.890625, + "eval_arxiv_rouge_score": 0.36099883081969086, + "eval_arxiv_runtime": 9.4935, + "eval_arxiv_samples_per_second": 52.668, + "eval_arxiv_steps_per_second": 0.105, + "eval_arxiv_token_set_f1": 0.36457213548998924, + "eval_arxiv_token_set_f1_sem": 0.0045752990200083506, + "eval_arxiv_token_set_precision": 0.3117065680056879, + "eval_arxiv_token_set_recall": 0.45586883738956696, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1829 + }, + { + "epoch": 8.06, + "eval_python_code_alpaca_accuracy": 0.15471875, + "eval_python_code_alpaca_bleu_score": 6.5590124981329225, + "eval_python_code_alpaca_bleu_score_sem": 0.2234983443166712, + "eval_python_code_alpaca_emb_cos_sim": 0.7730389833450317, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038631195543194887, + "eval_python_code_alpaca_emb_top1_equal": 0.20000000298023224, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017906459589198134, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.5823779106140137, + "eval_python_code_alpaca_n_ngrams_match_1": 9.404, + "eval_python_code_alpaca_n_ngrams_match_2": 2.632, + "eval_python_code_alpaca_n_ngrams_match_3": 0.848, + "eval_python_code_alpaca_num_pred_words": 30.27, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.228557115492329, + "eval_python_code_alpaca_pred_num_tokens": 49.7421875, + "eval_python_code_alpaca_rouge_score": 0.4327032429294919, + "eval_python_code_alpaca_runtime": 8.2947, + "eval_python_code_alpaca_samples_per_second": 60.28, + "eval_python_code_alpaca_steps_per_second": 0.121, + "eval_python_code_alpaca_token_set_f1": 0.4822270748001864, + "eval_python_code_alpaca_token_set_f1_sem": 0.0055473955401953, + "eval_python_code_alpaca_token_set_precision": 0.5275970090814532, + "eval_python_code_alpaca_token_set_recall": 0.46006825227663467, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1829 + }, + { + "epoch": 8.06, + "eval_wikibio_accuracy": 0.37275, + "eval_wikibio_bleu_score": 4.983038179149817, + "eval_wikibio_bleu_score_sem": 0.20608286310648566, + "eval_wikibio_emb_cos_sim": 0.7166478037834167, + "eval_wikibio_emb_cos_sim_sem": 0.006287153235015197, + "eval_wikibio_emb_top1_equal": 0.18400000035762787, + "eval_wikibio_emb_top1_equal_sem": 0.017346174301986407, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.313105583190918, + "eval_wikibio_n_ngrams_match_1": 8.406, + "eval_wikibio_n_ngrams_match_2": 2.586, + "eval_wikibio_n_ngrams_match_3": 0.962, + "eval_wikibio_num_pred_words": 31.31, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.470304453997354, + "eval_wikibio_pred_num_tokens": 62.1953125, + "eval_wikibio_rouge_score": 0.29782203869979695, + "eval_wikibio_runtime": 8.5123, + "eval_wikibio_samples_per_second": 58.739, + "eval_wikibio_steps_per_second": 0.117, + "eval_wikibio_token_set_f1": 0.27353970378616643, + "eval_wikibio_token_set_f1_sem": 0.00668621847617472, + "eval_wikibio_token_set_precision": 0.26680747758540296, + "eval_wikibio_token_set_recall": 0.3051204144974896, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1829 + }, + { + "epoch": 8.06, + "eval_bias-bios_accuracy": 0.52309375, + "eval_bias-bios_bleu_score": 19.697531118947712, + "eval_bias-bios_bleu_score_sem": 0.8502093943718262, + "eval_bias-bios_emb_cos_sim": 0.8757075071334839, + "eval_bias-bios_emb_cos_sim_sem": 0.0033073384659748216, + "eval_bias-bios_emb_top1_equal": 0.3319999873638153, + "eval_bias-bios_emb_top1_equal_sem": 0.02108176585203148, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7177313566207886, + "eval_bias-bios_n_ngrams_match_1": 22.114, + "eval_bias-bios_n_ngrams_match_2": 10.124, + "eval_bias-bios_n_ngrams_match_3": 5.774, + "eval_bias-bios_num_pred_words": 41.354, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.571873520953892, + "eval_bias-bios_pred_num_tokens": 56.984375, + "eval_bias-bios_rouge_score": 0.542823255202964, + "eval_bias-bios_runtime": 9.2763, + "eval_bias-bios_samples_per_second": 53.901, + "eval_bias-bios_steps_per_second": 0.108, + "eval_bias-bios_token_set_f1": 0.5630881889770337, + "eval_bias-bios_token_set_f1_sem": 0.0068162888586790924, + "eval_bias-bios_token_set_precision": 0.533611840371098, + "eval_bias-bios_token_set_recall": 0.6075041270934662, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1829 + }, + { + "epoch": 8.09, + "learning_rate": 0.001, + "loss": 1.928, + "step": 1836 + }, + { + "epoch": 8.14, + "learning_rate": 0.001, + "loss": 1.8933, + "step": 1848 + }, + { + "epoch": 8.19, + "learning_rate": 0.001, + "loss": 1.6677, + "step": 1860 + }, + { + "epoch": 8.19, + "eval_ag_news_accuracy": 0.30309375, + "eval_ag_news_bleu_score": 4.741384672804814, + "eval_ag_news_bleu_score_sem": 0.15837203198791805, + "eval_ag_news_emb_cos_sim": 0.8121864199638367, + "eval_ag_news_emb_cos_sim_sem": 0.0046788633557932795, + "eval_ag_news_emb_top1_equal": 0.27399998903274536, + "eval_ag_news_emb_top1_equal_sem": 0.0199661026485885, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.63670015335083, + "eval_ag_news_n_ngrams_match_1": 12.424, + "eval_ag_news_n_ngrams_match_2": 2.622, + "eval_ag_news_n_ngrams_match_3": 0.692, + "eval_ag_news_num_pred_words": 34.722, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 37.966346668463544, + "eval_ag_news_pred_num_tokens": 52.328125, + "eval_ag_news_rouge_score": 0.3665288936984439, + "eval_ag_news_runtime": 7.2875, + "eval_ag_news_samples_per_second": 68.611, + "eval_ag_news_steps_per_second": 0.137, + "eval_ag_news_token_set_f1": 0.3479838698728074, + "eval_ag_news_token_set_f1_sem": 0.0048582863561366565, + "eval_ag_news_token_set_precision": 0.3123052399394728, + "eval_ag_news_token_set_recall": 0.4094756649000542, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1860 + }, + { + "epoch": 8.19, + "eval_anthropic_toxic_prompts_accuracy": 0.10640625, + "eval_anthropic_toxic_prompts_bleu_score": 4.9816386277532265, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18112198763560855, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7021182179450989, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004715369424462663, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01632805076118194, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0633704662323, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.938, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.822, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.692, + "eval_anthropic_toxic_prompts_num_pred_words": 29.768, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 21.399562250453446, + "eval_anthropic_toxic_prompts_pred_num_tokens": 43.046875, + "eval_anthropic_toxic_prompts_rouge_score": 0.2986432883196827, + "eval_anthropic_toxic_prompts_runtime": 7.076, + "eval_anthropic_toxic_prompts_samples_per_second": 70.661, + "eval_anthropic_toxic_prompts_steps_per_second": 0.141, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3476961677437007, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005938682229453897, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4367535744857751, + "eval_anthropic_toxic_prompts_token_set_recall": 0.31075569132235875, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1860 + }, + { + "epoch": 8.19, + "eval_arxiv_accuracy": 0.41928125, + "eval_arxiv_bleu_score": 4.424283023632561, + "eval_arxiv_bleu_score_sem": 0.11617275039162875, + "eval_arxiv_emb_cos_sim": 0.7651264071464539, + "eval_arxiv_emb_cos_sim_sem": 0.004366971074856047, + "eval_arxiv_emb_top1_equal": 0.23000000417232513, + "eval_arxiv_emb_top1_equal_sem": 0.018839050665941787, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0793185234069824, + "eval_arxiv_n_ngrams_match_1": 15.038, + "eval_arxiv_n_ngrams_match_2": 2.916, + "eval_arxiv_n_ngrams_match_3": 0.668, + "eval_arxiv_num_pred_words": 34.756, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.74357960552871, + "eval_arxiv_pred_num_tokens": 55.7734375, + "eval_arxiv_rouge_score": 0.3787988872723498, + "eval_arxiv_runtime": 9.7779, + "eval_arxiv_samples_per_second": 51.136, + "eval_arxiv_steps_per_second": 0.102, + "eval_arxiv_token_set_f1": 0.3769822097015867, + "eval_arxiv_token_set_f1_sem": 0.004083903196924782, + "eval_arxiv_token_set_precision": 0.3253013010979465, + "eval_arxiv_token_set_recall": 0.4601533774146747, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1860 + }, + { + "epoch": 8.19, + "eval_python_code_alpaca_accuracy": 0.15565625, + "eval_python_code_alpaca_bleu_score": 6.6369636954019295, + "eval_python_code_alpaca_bleu_score_sem": 0.20906494831471414, + "eval_python_code_alpaca_emb_cos_sim": 0.7860262989997864, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037672392281477584, + "eval_python_code_alpaca_emb_top1_equal": 0.18199999630451202, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017272772986938162, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.638681173324585, + "eval_python_code_alpaca_n_ngrams_match_1": 9.666, + "eval_python_code_alpaca_n_ngrams_match_2": 2.728, + "eval_python_code_alpaca_n_ngrams_match_3": 0.888, + "eval_python_code_alpaca_num_pred_words": 29.712, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.994734802261277, + "eval_python_code_alpaca_pred_num_tokens": 47.40625, + "eval_python_code_alpaca_rouge_score": 0.4400602798695769, + "eval_python_code_alpaca_runtime": 7.9726, + "eval_python_code_alpaca_samples_per_second": 62.715, + "eval_python_code_alpaca_steps_per_second": 0.125, + "eval_python_code_alpaca_token_set_f1": 0.4884306011296119, + "eval_python_code_alpaca_token_set_f1_sem": 0.0055162166038522165, + "eval_python_code_alpaca_token_set_precision": 0.5446404584245398, + "eval_python_code_alpaca_token_set_recall": 0.4568444762878828, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1860 + }, + { + "epoch": 8.19, + "eval_wikibio_accuracy": 0.35609375, + "eval_wikibio_bleu_score": 5.359000016975188, + "eval_wikibio_bleu_score_sem": 0.19773247550237727, + "eval_wikibio_emb_cos_sim": 0.7486798167228699, + "eval_wikibio_emb_cos_sim_sem": 0.005239392212273522, + "eval_wikibio_emb_top1_equal": 0.20000000298023224, + "eval_wikibio_emb_top1_equal_sem": 0.017906459589198134, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4573657512664795, + "eval_wikibio_n_ngrams_match_1": 8.894, + "eval_wikibio_n_ngrams_match_2": 2.644, + "eval_wikibio_n_ngrams_match_3": 0.95, + "eval_wikibio_num_pred_words": 31.156, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 31.733272980946744, + "eval_wikibio_pred_num_tokens": 58.828125, + "eval_wikibio_rouge_score": 0.33704755730063046, + "eval_wikibio_runtime": 8.2679, + "eval_wikibio_samples_per_second": 60.475, + "eval_wikibio_steps_per_second": 0.121, + "eval_wikibio_token_set_f1": 0.29998801830909244, + "eval_wikibio_token_set_f1_sem": 0.005805959306225305, + "eval_wikibio_token_set_precision": 0.2915927252931224, + "eval_wikibio_token_set_recall": 0.3265489991928534, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1860 + }, + { + "epoch": 8.19, + "eval_bias-bios_accuracy": 0.523875, + "eval_bias-bios_bleu_score": 19.84644091800536, + "eval_bias-bios_bleu_score_sem": 0.8744952223323816, + "eval_bias-bios_emb_cos_sim": 0.8809575438499451, + "eval_bias-bios_emb_cos_sim_sem": 0.0029165990476574, + "eval_bias-bios_emb_top1_equal": 0.3440000116825104, + "eval_bias-bios_emb_top1_equal_sem": 0.021265758943789875, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7271181344985962, + "eval_bias-bios_n_ngrams_match_1": 21.742, + "eval_bias-bios_n_ngrams_match_2": 9.928, + "eval_bias-bios_n_ngrams_match_3": 5.668, + "eval_bias-bios_num_pred_words": 38.868, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.624421703274523, + "eval_bias-bios_pred_num_tokens": 52.421875, + "eval_bias-bios_rouge_score": 0.5528489807404857, + "eval_bias-bios_runtime": 13.0908, + "eval_bias-bios_samples_per_second": 38.195, + "eval_bias-bios_steps_per_second": 0.076, + "eval_bias-bios_token_set_f1": 0.5651409833769688, + "eval_bias-bios_token_set_f1_sem": 0.0069194692707168675, + "eval_bias-bios_token_set_precision": 0.5349813822732767, + "eval_bias-bios_token_set_recall": 0.6088791750502968, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1860 + }, + { + "epoch": 8.25, + "learning_rate": 0.001, + "loss": 1.7665, + "step": 1872 + }, + { + "epoch": 8.3, + "learning_rate": 0.001, + "loss": 1.9331, + "step": 1884 + }, + { + "epoch": 8.33, + "eval_ag_news_accuracy": 0.3009375, + "eval_ag_news_bleu_score": 4.783602500552586, + "eval_ag_news_bleu_score_sem": 0.15494989020321834, + "eval_ag_news_emb_cos_sim": 0.8162067532539368, + "eval_ag_news_emb_cos_sim_sem": 0.004301302001182929, + "eval_ag_news_emb_top1_equal": 0.2639999985694885, + "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5908281803131104, + "eval_ag_news_n_ngrams_match_1": 13.826, + "eval_ag_news_n_ngrams_match_2": 2.882, + "eval_ag_news_n_ngrams_match_3": 0.814, + "eval_ag_news_num_pred_words": 43.092, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.264096704415266, + "eval_ag_news_pred_num_tokens": 62.875, + "eval_ag_news_rouge_score": 0.3612734905729005, + "eval_ag_news_runtime": 8.5439, + "eval_ag_news_samples_per_second": 58.521, + "eval_ag_news_steps_per_second": 0.117, + "eval_ag_news_token_set_f1": 0.35453410878160624, + "eval_ag_news_token_set_f1_sem": 0.004645656214615403, + "eval_ag_news_token_set_precision": 0.33472180012828634, + "eval_ag_news_token_set_recall": 0.39579148585435375, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1891 + }, + { + "epoch": 8.33, + "eval_anthropic_toxic_prompts_accuracy": 0.1051875, + "eval_anthropic_toxic_prompts_bleu_score": 3.494749302388673, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12794921935087805, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.694394052028656, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004276470264197607, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015807205702664997, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.2136523723602295, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.266, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.974, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746, + "eval_anthropic_toxic_prompts_num_pred_words": 43.598, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 24.869754149947237, + "eval_anthropic_toxic_prompts_pred_num_tokens": 61.5546875, + "eval_anthropic_toxic_prompts_rouge_score": 0.23461670239543642, + "eval_anthropic_toxic_prompts_runtime": 8.6056, + "eval_anthropic_toxic_prompts_samples_per_second": 58.102, + "eval_anthropic_toxic_prompts_steps_per_second": 0.116, + "eval_anthropic_toxic_prompts_token_set_f1": 0.33095488094061143, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0053161893127592836, + "eval_anthropic_toxic_prompts_token_set_precision": 0.45590834800070323, + "eval_anthropic_toxic_prompts_token_set_recall": 0.28048851738633934, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1891 + }, + { + "epoch": 8.33, + "eval_arxiv_accuracy": 0.42571875, + "eval_arxiv_bleu_score": 4.591204061401259, + "eval_arxiv_bleu_score_sem": 0.1265891653412925, + "eval_arxiv_emb_cos_sim": 0.7642773985862732, + "eval_arxiv_emb_cos_sim_sem": 0.004905883908455568, + "eval_arxiv_emb_top1_equal": 0.29600000381469727, + "eval_arxiv_emb_top1_equal_sem": 0.020435341676588347, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0380170345306396, + "eval_arxiv_n_ngrams_match_1": 15.618, + "eval_arxiv_n_ngrams_match_2": 3.012, + "eval_arxiv_n_ngrams_match_3": 0.692, + "eval_arxiv_num_pred_words": 39.924, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.863829933772514, + "eval_arxiv_pred_num_tokens": 62.9453125, + "eval_arxiv_rouge_score": 0.3659808540317445, + "eval_arxiv_runtime": 9.2214, + "eval_arxiv_samples_per_second": 54.222, + "eval_arxiv_steps_per_second": 0.108, + "eval_arxiv_token_set_f1": 0.3706836572969632, + "eval_arxiv_token_set_f1_sem": 0.004433309146267008, + "eval_arxiv_token_set_precision": 0.32536084685593664, + "eval_arxiv_token_set_recall": 0.44598999972836184, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1891 + }, + { + "epoch": 8.33, + "eval_python_code_alpaca_accuracy": 0.1533125, + "eval_python_code_alpaca_bleu_score": 5.021076645768737, + "eval_python_code_alpaca_bleu_score_sem": 0.15326960251605942, + "eval_python_code_alpaca_emb_cos_sim": 0.7768791317939758, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0033942160588140272, + "eval_python_code_alpaca_emb_top1_equal": 0.17399999499320984, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.016971269551723376, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7844736576080322, + "eval_python_code_alpaca_n_ngrams_match_1": 9.976, + "eval_python_code_alpaca_n_ngrams_match_2": 2.918, + "eval_python_code_alpaca_n_ngrams_match_3": 0.996, + "eval_python_code_alpaca_num_pred_words": 40.982, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 16.191293469447224, + "eval_python_code_alpaca_pred_num_tokens": 62.4140625, + "eval_python_code_alpaca_rouge_score": 0.37388306065756094, + "eval_python_code_alpaca_runtime": 8.516, + "eval_python_code_alpaca_samples_per_second": 58.713, + "eval_python_code_alpaca_steps_per_second": 0.117, + "eval_python_code_alpaca_token_set_f1": 0.45995671587715675, + "eval_python_code_alpaca_token_set_f1_sem": 0.004843210631367747, + "eval_python_code_alpaca_token_set_precision": 0.5544483613954407, + "eval_python_code_alpaca_token_set_recall": 0.4095854452444462, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1891 + }, + { + "epoch": 8.33, + "eval_wikibio_accuracy": 0.36975, + "eval_wikibio_bleu_score": 4.851922607844777, + "eval_wikibio_bleu_score_sem": 0.1873093904634118, + "eval_wikibio_emb_cos_sim": 0.7219175100326538, + "eval_wikibio_emb_cos_sim_sem": 0.006202877601707938, + "eval_wikibio_emb_top1_equal": 0.16599999368190765, + "eval_wikibio_emb_top1_equal_sem": 0.016656615375209204, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3165245056152344, + "eval_wikibio_n_ngrams_match_1": 8.46, + "eval_wikibio_n_ngrams_match_2": 2.57, + "eval_wikibio_n_ngrams_match_3": 0.95, + "eval_wikibio_num_pred_words": 32.034, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.564384027638354, + "eval_wikibio_pred_num_tokens": 62.9609375, + "eval_wikibio_rouge_score": 0.3016254823114396, + "eval_wikibio_runtime": 8.6373, + "eval_wikibio_samples_per_second": 57.888, + "eval_wikibio_steps_per_second": 0.116, + "eval_wikibio_token_set_f1": 0.27968945656360833, + "eval_wikibio_token_set_f1_sem": 0.006463263382407512, + "eval_wikibio_token_set_precision": 0.27151554578125564, + "eval_wikibio_token_set_recall": 0.3125748784752346, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1891 + }, + { + "epoch": 8.33, + "eval_bias-bios_accuracy": 0.51821875, + "eval_bias-bios_bleu_score": 18.91106026065125, + "eval_bias-bios_bleu_score_sem": 0.8012562185192902, + "eval_bias-bios_emb_cos_sim": 0.8836072683334351, + "eval_bias-bios_emb_cos_sim_sem": 0.0027063216331050595, + "eval_bias-bios_emb_top1_equal": 0.34599998593330383, + "eval_bias-bios_emb_top1_equal_sem": 0.021294949937601483, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7455099821090698, + "eval_bias-bios_n_ngrams_match_1": 23.232, + "eval_bias-bios_n_ngrams_match_2": 10.734, + "eval_bias-bios_n_ngrams_match_3": 6.094, + "eval_bias-bios_num_pred_words": 46.834, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.728822327459904, + "eval_bias-bios_pred_num_tokens": 61.984375, + "eval_bias-bios_rouge_score": 0.5344024868808166, + "eval_bias-bios_runtime": 8.4511, + "eval_bias-bios_samples_per_second": 59.164, + "eval_bias-bios_steps_per_second": 0.118, + "eval_bias-bios_token_set_f1": 0.5663459254895272, + "eval_bias-bios_token_set_f1_sem": 0.006602976981687319, + "eval_bias-bios_token_set_precision": 0.5589301696435766, + "eval_bias-bios_token_set_recall": 0.5836912180039152, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1891 + }, + { + "epoch": 8.35, + "learning_rate": 0.001, + "loss": 1.9073, + "step": 1896 + }, + { + "epoch": 8.41, + "learning_rate": 0.001, + "loss": 1.688, + "step": 1908 + }, + { + "epoch": 8.46, + "learning_rate": 0.001, + "loss": 1.6889, + "step": 1920 + }, + { + "epoch": 8.47, + "eval_ag_news_accuracy": 0.3001875, + "eval_ag_news_bleu_score": 4.435449766158028, + "eval_ag_news_bleu_score_sem": 0.14973093498983295, + "eval_ag_news_emb_cos_sim": 0.8028610944747925, + "eval_ag_news_emb_cos_sim_sem": 0.004656717211563431, + "eval_ag_news_emb_top1_equal": 0.25600001215934753, + "eval_ag_news_emb_top1_equal_sem": 0.019536923601457774, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.6249423027038574, + "eval_ag_news_n_ngrams_match_1": 11.832, + "eval_ag_news_n_ngrams_match_2": 2.458, + "eval_ag_news_n_ngrams_match_3": 0.65, + "eval_ag_news_num_pred_words": 32.41, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 37.52255814699448, + "eval_ag_news_pred_num_tokens": 48.4765625, + "eval_ag_news_rouge_score": 0.35709356513331103, + "eval_ag_news_runtime": 8.655, + "eval_ag_news_samples_per_second": 57.77, + "eval_ag_news_steps_per_second": 0.116, + "eval_ag_news_token_set_f1": 0.3414618889785419, + "eval_ag_news_token_set_f1_sem": 0.004810199967445401, + "eval_ag_news_token_set_precision": 0.29624824561399704, + "eval_ag_news_token_set_recall": 0.4208496232263841, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1922 + }, + { + "epoch": 8.47, + "eval_anthropic_toxic_prompts_accuracy": 0.10909375, + "eval_anthropic_toxic_prompts_bleu_score": 5.894206921427712, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.22053411846492368, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.7000393271446228, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004913296610940808, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1459999978542328, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01580720436986462, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.011507511138916, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.452, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.652, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.592, + "eval_anthropic_toxic_prompts_num_pred_words": 23.66, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.31800647066561, + "eval_anthropic_toxic_prompts_pred_num_tokens": 33.6484375, + "eval_anthropic_toxic_prompts_rouge_score": 0.32193702696635396, + "eval_anthropic_toxic_prompts_runtime": 6.9678, + "eval_anthropic_toxic_prompts_samples_per_second": 71.758, + "eval_anthropic_toxic_prompts_steps_per_second": 0.144, + "eval_anthropic_toxic_prompts_token_set_f1": 0.35768379223275326, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005947411729849731, + "eval_anthropic_toxic_prompts_token_set_precision": 0.409905716617632, + "eval_anthropic_toxic_prompts_token_set_recall": 0.34156957135932925, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1922 + }, + { + "epoch": 8.47, + "eval_arxiv_accuracy": 0.4183125, + "eval_arxiv_bleu_score": 3.843480783594084, + "eval_arxiv_bleu_score_sem": 0.11168850679535348, + "eval_arxiv_emb_cos_sim": 0.7463698983192444, + "eval_arxiv_emb_cos_sim_sem": 0.005271770598181763, + "eval_arxiv_emb_top1_equal": 0.21199999749660492, + "eval_arxiv_emb_top1_equal_sem": 0.01829703673906991, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0738461017608643, + "eval_arxiv_n_ngrams_match_1": 13.604, + "eval_arxiv_n_ngrams_match_2": 2.6, + "eval_arxiv_n_ngrams_match_3": 0.596, + "eval_arxiv_num_pred_words": 31.144, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.624914558559464, + "eval_arxiv_pred_num_tokens": 50.8828125, + "eval_arxiv_rouge_score": 0.3602952189935648, + "eval_arxiv_runtime": 7.506, + "eval_arxiv_samples_per_second": 66.614, + "eval_arxiv_steps_per_second": 0.133, + "eval_arxiv_token_set_f1": 0.3620667627381011, + "eval_arxiv_token_set_f1_sem": 0.004667772539252971, + "eval_arxiv_token_set_precision": 0.29690661507058724, + "eval_arxiv_token_set_recall": 0.47944050208988725, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1922 + }, + { + "epoch": 8.47, + "eval_python_code_alpaca_accuracy": 0.164125, + "eval_python_code_alpaca_bleu_score": 8.149238938682629, + "eval_python_code_alpaca_bleu_score_sem": 0.27088847698794644, + "eval_python_code_alpaca_emb_cos_sim": 0.7918772101402283, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003844238771008207, + "eval_python_code_alpaca_emb_top1_equal": 0.18799999356269836, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017490679184236527, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.4667320251464844, + "eval_python_code_alpaca_n_ngrams_match_1": 8.97, + "eval_python_code_alpaca_n_ngrams_match_2": 2.46, + "eval_python_code_alpaca_n_ngrams_match_3": 0.804, + "eval_python_code_alpaca_num_pred_words": 22.936, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.78387445376964, + "eval_python_code_alpaca_pred_num_tokens": 35.703125, + "eval_python_code_alpaca_rouge_score": 0.4751503363768772, + "eval_python_code_alpaca_runtime": 7.1271, + "eval_python_code_alpaca_samples_per_second": 70.155, + "eval_python_code_alpaca_steps_per_second": 0.14, + "eval_python_code_alpaca_token_set_f1": 0.5075156538927706, + "eval_python_code_alpaca_token_set_f1_sem": 0.005494481157661636, + "eval_python_code_alpaca_token_set_precision": 0.5162122177661042, + "eval_python_code_alpaca_token_set_recall": 0.5168435949387356, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1922 + }, + { + "epoch": 8.47, + "eval_wikibio_accuracy": 0.36375, + "eval_wikibio_bleu_score": 5.502713975837069, + "eval_wikibio_bleu_score_sem": 0.20994709566725045, + "eval_wikibio_emb_cos_sim": 0.733432412147522, + "eval_wikibio_emb_cos_sim_sem": 0.006024464945119351, + "eval_wikibio_emb_top1_equal": 0.20600000023841858, + "eval_wikibio_emb_top1_equal_sem": 0.0181047949457911, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3645694255828857, + "eval_wikibio_n_ngrams_match_1": 8.712, + "eval_wikibio_n_ngrams_match_2": 2.632, + "eval_wikibio_n_ngrams_match_3": 0.954, + "eval_wikibio_num_pred_words": 30.35, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.9210419569838, + "eval_wikibio_pred_num_tokens": 55.71875, + "eval_wikibio_rouge_score": 0.3255887283809268, + "eval_wikibio_runtime": 7.2562, + "eval_wikibio_samples_per_second": 68.906, + "eval_wikibio_steps_per_second": 0.138, + "eval_wikibio_token_set_f1": 0.2944355952466261, + "eval_wikibio_token_set_f1_sem": 0.006328439981856485, + "eval_wikibio_token_set_precision": 0.28416654885615683, + "eval_wikibio_token_set_recall": 0.32345250615831, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1922 + }, + { + "epoch": 8.47, + "eval_bias-bios_accuracy": 0.514625, + "eval_bias-bios_bleu_score": 18.49435751172742, + "eval_bias-bios_bleu_score_sem": 0.8705296846533569, + "eval_bias-bios_emb_cos_sim": 0.8680934309959412, + "eval_bias-bios_emb_cos_sim_sem": 0.0035307687876271567, + "eval_bias-bios_emb_top1_equal": 0.32199999690055847, + "eval_bias-bios_emb_top1_equal_sem": 0.02091666653838802, + "eval_bias-bios_exact_match": 0.006, + "eval_bias-bios_exact_match_sem": 0.003457152557758369, + "eval_bias-bios_loss": 1.7615852355957031, + "eval_bias-bios_n_ngrams_match_1": 19.848, + "eval_bias-bios_n_ngrams_match_2": 9.216, + "eval_bias-bios_n_ngrams_match_3": 5.312, + "eval_bias-bios_num_pred_words": 32.898, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.821658784164547, + "eval_bias-bios_pred_num_tokens": 44.765625, + "eval_bias-bios_rouge_score": 0.5446137362717307, + "eval_bias-bios_runtime": 7.3911, + "eval_bias-bios_samples_per_second": 67.649, + "eval_bias-bios_steps_per_second": 0.135, + "eval_bias-bios_token_set_f1": 0.5594082568477496, + "eval_bias-bios_token_set_f1_sem": 0.007063404257871856, + "eval_bias-bios_token_set_precision": 0.5013370129474001, + "eval_bias-bios_token_set_recall": 0.6512309406286945, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1922 + }, + { + "epoch": 8.51, + "learning_rate": 0.001, + "loss": 1.9388, + "step": 1932 + }, + { + "epoch": 8.56, + "learning_rate": 0.001, + "loss": 1.9097, + "step": 1944 + }, + { + "epoch": 8.6, + "eval_ag_news_accuracy": 0.3004375, + "eval_ag_news_bleu_score": 4.6384322948771315, + "eval_ag_news_bleu_score_sem": 0.15318901483073602, + "eval_ag_news_emb_cos_sim": 0.8059445023536682, + "eval_ag_news_emb_cos_sim_sem": 0.005206606322650648, + "eval_ag_news_emb_top1_equal": 0.26600000262260437, + "eval_ag_news_emb_top1_equal_sem": 0.01978055817719369, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5975077152252197, + "eval_ag_news_n_ngrams_match_1": 13.012, + "eval_ag_news_n_ngrams_match_2": 2.656, + "eval_ag_news_n_ngrams_match_3": 0.694, + "eval_ag_news_num_pred_words": 39.108, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.50713479148357, + "eval_ag_news_pred_num_tokens": 59.328125, + "eval_ag_news_rouge_score": 0.35917363408294, + "eval_ag_news_runtime": 9.0027, + "eval_ag_news_samples_per_second": 55.539, + "eval_ag_news_steps_per_second": 0.111, + "eval_ag_news_token_set_f1": 0.34629940683072663, + "eval_ag_news_token_set_f1_sem": 0.004761672140250987, + "eval_ag_news_token_set_precision": 0.31872649692720056, + "eval_ag_news_token_set_recall": 0.3968739193028862, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1953 + }, + { + "epoch": 8.6, + "eval_anthropic_toxic_prompts_accuracy": 0.10528125, + "eval_anthropic_toxic_prompts_bleu_score": 4.006997604254403, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14432842993601536, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6876260042190552, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004934488470102209, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.16200000047683716, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016494123019099097, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.146822452545166, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.988, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.858, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, + "eval_anthropic_toxic_prompts_num_pred_words": 36.364, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 23.262030813524362, + "eval_anthropic_toxic_prompts_pred_num_tokens": 53.046875, + "eval_anthropic_toxic_prompts_rouge_score": 0.2569124557583795, + "eval_anthropic_toxic_prompts_runtime": 8.5843, + "eval_anthropic_toxic_prompts_samples_per_second": 58.246, + "eval_anthropic_toxic_prompts_steps_per_second": 0.116, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3357822926624641, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005603619060244223, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43718307712451415, + "eval_anthropic_toxic_prompts_token_set_recall": 0.29577861821149204, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1953 + }, + { + "epoch": 8.6, + "eval_arxiv_accuracy": 0.42340625, + "eval_arxiv_bleu_score": 4.469706868664844, + "eval_arxiv_bleu_score_sem": 0.12689372985136127, + "eval_arxiv_emb_cos_sim": 0.7521007657051086, + "eval_arxiv_emb_cos_sim_sem": 0.004824712034021417, + "eval_arxiv_emb_top1_equal": 0.2280000001192093, + "eval_arxiv_emb_top1_equal_sem": 0.018781307089698163, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0552661418914795, + "eval_arxiv_n_ngrams_match_1": 15.32, + "eval_arxiv_n_ngrams_match_2": 2.954, + "eval_arxiv_n_ngrams_match_3": 0.672, + "eval_arxiv_num_pred_words": 37.658, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.226834124998554, + "eval_arxiv_pred_num_tokens": 60.109375, + "eval_arxiv_rouge_score": 0.3712890829953238, + "eval_arxiv_runtime": 8.9036, + "eval_arxiv_samples_per_second": 56.157, + "eval_arxiv_steps_per_second": 0.112, + "eval_arxiv_token_set_f1": 0.36978878421186867, + "eval_arxiv_token_set_f1_sem": 0.004399200540521649, + "eval_arxiv_token_set_precision": 0.3202345342560703, + "eval_arxiv_token_set_recall": 0.45431508454570024, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1953 + }, + { + "epoch": 8.6, + "eval_python_code_alpaca_accuracy": 0.15228125, + "eval_python_code_alpaca_bleu_score": 5.802510448307412, + "eval_python_code_alpaca_bleu_score_sem": 0.19450827928089162, + "eval_python_code_alpaca_emb_cos_sim": 0.7649307250976562, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.004133352827137784, + "eval_python_code_alpaca_emb_top1_equal": 0.16599999368190765, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01665661404240883, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7348744869232178, + "eval_python_code_alpaca_n_ngrams_match_1": 9.686, + "eval_python_code_alpaca_n_ngrams_match_2": 2.746, + "eval_python_code_alpaca_n_ngrams_match_3": 0.916, + "eval_python_code_alpaca_num_pred_words": 34.002, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.40780943299212, + "eval_python_code_alpaca_pred_num_tokens": 53.484375, + "eval_python_code_alpaca_rouge_score": 0.40480305406942174, + "eval_python_code_alpaca_runtime": 11.2595, + "eval_python_code_alpaca_samples_per_second": 44.407, + "eval_python_code_alpaca_steps_per_second": 0.089, + "eval_python_code_alpaca_token_set_f1": 0.4745207619123493, + "eval_python_code_alpaca_token_set_f1_sem": 0.005881419094198025, + "eval_python_code_alpaca_token_set_precision": 0.5407649612484423, + "eval_python_code_alpaca_token_set_recall": 0.43923169222918024, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1953 + }, + { + "epoch": 8.6, + "eval_wikibio_accuracy": 0.35765625, + "eval_wikibio_bleu_score": 4.715955204214835, + "eval_wikibio_bleu_score_sem": 0.19740124186338762, + "eval_wikibio_emb_cos_sim": 0.7062340378761292, + "eval_wikibio_emb_cos_sim_sem": 0.006724205133960606, + "eval_wikibio_emb_top1_equal": 0.17000000178813934, + "eval_wikibio_emb_top1_equal_sem": 0.016815633120741882, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3458058834075928, + "eval_wikibio_n_ngrams_match_1": 7.826, + "eval_wikibio_n_ngrams_match_2": 2.344, + "eval_wikibio_n_ngrams_match_3": 0.886, + "eval_wikibio_num_pred_words": 29.11, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.383440195847996, + "eval_wikibio_pred_num_tokens": 61.578125, + "eval_wikibio_rouge_score": 0.2898479176930867, + "eval_wikibio_runtime": 8.5106, + "eval_wikibio_samples_per_second": 58.751, + "eval_wikibio_steps_per_second": 0.118, + "eval_wikibio_token_set_f1": 0.26292399366093444, + "eval_wikibio_token_set_f1_sem": 0.0069327366813689865, + "eval_wikibio_token_set_precision": 0.25331391833809414, + "eval_wikibio_token_set_recall": 0.29977228313329735, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1953 + }, + { + "epoch": 8.6, + "eval_bias-bios_accuracy": 0.5214375, + "eval_bias-bios_bleu_score": 19.096584666724308, + "eval_bias-bios_bleu_score_sem": 0.8134426930137699, + "eval_bias-bios_emb_cos_sim": 0.8831885457038879, + "eval_bias-bios_emb_cos_sim_sem": 0.0026979201593416726, + "eval_bias-bios_emb_top1_equal": 0.328000009059906, + "eval_bias-bios_emb_top1_equal_sem": 0.02101702640661987, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.730255365371704, + "eval_bias-bios_n_ngrams_match_1": 22.402, + "eval_bias-bios_n_ngrams_match_2": 10.26, + "eval_bias-bios_n_ngrams_match_3": 5.776, + "eval_bias-bios_num_pred_words": 43.138, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.642094520043632, + "eval_bias-bios_pred_num_tokens": 57.28125, + "eval_bias-bios_rouge_score": 0.5385177520846558, + "eval_bias-bios_runtime": 10.0512, + "eval_bias-bios_samples_per_second": 49.745, + "eval_bias-bios_steps_per_second": 0.099, + "eval_bias-bios_token_set_f1": 0.561331969332709, + "eval_bias-bios_token_set_f1_sem": 0.006692673176916008, + "eval_bias-bios_token_set_precision": 0.5433084362152154, + "eval_bias-bios_token_set_recall": 0.5896360367294446, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1953 + }, + { + "epoch": 8.62, + "learning_rate": 0.001, + "loss": 1.7706, + "step": 1956 + }, + { + "epoch": 8.67, + "learning_rate": 0.001, + "loss": 1.6184, + "step": 1968 + }, + { + "epoch": 8.72, + "learning_rate": 0.001, + "loss": 1.9561, + "step": 1980 + }, + { + "epoch": 8.74, + "eval_ag_news_accuracy": 0.2984375, + "eval_ag_news_bleu_score": 4.711518363848454, + "eval_ag_news_bleu_score_sem": 0.1532454630156928, + "eval_ag_news_emb_cos_sim": 0.8103645443916321, + "eval_ag_news_emb_cos_sim_sem": 0.004439122889552799, + "eval_ag_news_emb_top1_equal": 0.2540000081062317, + "eval_ag_news_emb_top1_equal_sem": 0.019486597059300604, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.5887601375579834, + "eval_ag_news_n_ngrams_match_1": 13.628, + "eval_ag_news_n_ngrams_match_2": 2.846, + "eval_ag_news_n_ngrams_match_3": 0.774, + "eval_ag_news_num_pred_words": 42.722, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.18917849568472, + "eval_ag_news_pred_num_tokens": 62.578125, + "eval_ag_news_rouge_score": 0.3583171385544124, + "eval_ag_news_runtime": 8.9128, + "eval_ag_news_samples_per_second": 56.099, + "eval_ag_news_steps_per_second": 0.112, + "eval_ag_news_token_set_f1": 0.3518442756676556, + "eval_ag_news_token_set_f1_sem": 0.004538612728904295, + "eval_ag_news_token_set_precision": 0.33050553019565915, + "eval_ag_news_token_set_recall": 0.39305225223561946, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 1984 + }, + { + "epoch": 8.74, + "eval_anthropic_toxic_prompts_accuracy": 0.10534375, + "eval_anthropic_toxic_prompts_bleu_score": 3.4700986196870534, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12543073827880039, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6862208843231201, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004841854845643796, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.13600000739097595, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015345323732734733, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.1650032997131348, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.18, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.874, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.682, + "eval_anthropic_toxic_prompts_num_pred_words": 41.556, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 23.688822199019885, + "eval_anthropic_toxic_prompts_pred_num_tokens": 60.3515625, + "eval_anthropic_toxic_prompts_rouge_score": 0.24235614608709247, + "eval_anthropic_toxic_prompts_runtime": 13.8753, + "eval_anthropic_toxic_prompts_samples_per_second": 36.035, + "eval_anthropic_toxic_prompts_steps_per_second": 0.072, + "eval_anthropic_toxic_prompts_token_set_f1": 0.324157927386135, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005188058466257896, + "eval_anthropic_toxic_prompts_token_set_precision": 0.45317186235301055, + "eval_anthropic_toxic_prompts_token_set_recall": 0.27235036851456745, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 1984 + }, + { + "epoch": 8.74, + "eval_arxiv_accuracy": 0.4249375, + "eval_arxiv_bleu_score": 4.5550681043846515, + "eval_arxiv_bleu_score_sem": 0.12490632330266863, + "eval_arxiv_emb_cos_sim": 0.7511861324310303, + "eval_arxiv_emb_cos_sim_sem": 0.005204222942380189, + "eval_arxiv_emb_top1_equal": 0.2720000147819519, + "eval_arxiv_emb_top1_equal_sem": 0.019920483557355567, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0397071838378906, + "eval_arxiv_n_ngrams_match_1": 15.462, + "eval_arxiv_n_ngrams_match_2": 3.074, + "eval_arxiv_n_ngrams_match_3": 0.686, + "eval_arxiv_num_pred_words": 39.26, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.89912273813464, + "eval_arxiv_pred_num_tokens": 62.1875, + "eval_arxiv_rouge_score": 0.3649035759644474, + "eval_arxiv_runtime": 9.0376, + "eval_arxiv_samples_per_second": 55.324, + "eval_arxiv_steps_per_second": 0.111, + "eval_arxiv_token_set_f1": 0.37007363238592483, + "eval_arxiv_token_set_f1_sem": 0.004455040383833329, + "eval_arxiv_token_set_precision": 0.3223344413721305, + "eval_arxiv_token_set_recall": 0.45127944355291727, + "eval_arxiv_true_num_tokens": 64.0, + "step": 1984 + }, + { + "epoch": 8.74, + "eval_python_code_alpaca_accuracy": 0.15215625, + "eval_python_code_alpaca_bleu_score": 5.232190399641075, + "eval_python_code_alpaca_bleu_score_sem": 0.17118683274174132, + "eval_python_code_alpaca_emb_cos_sim": 0.7645835280418396, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038300940938292218, + "eval_python_code_alpaca_emb_top1_equal": 0.1720000058412552, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01689386850274998, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7257423400878906, + "eval_python_code_alpaca_n_ngrams_match_1": 9.856, + "eval_python_code_alpaca_n_ngrams_match_2": 2.808, + "eval_python_code_alpaca_n_ngrams_match_3": 0.948, + "eval_python_code_alpaca_num_pred_words": 38.478, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.267743579221799, + "eval_python_code_alpaca_pred_num_tokens": 59.984375, + "eval_python_code_alpaca_rouge_score": 0.38401352446211545, + "eval_python_code_alpaca_runtime": 8.9935, + "eval_python_code_alpaca_samples_per_second": 55.596, + "eval_python_code_alpaca_steps_per_second": 0.111, + "eval_python_code_alpaca_token_set_f1": 0.4612661815110851, + "eval_python_code_alpaca_token_set_f1_sem": 0.005052258479426189, + "eval_python_code_alpaca_token_set_precision": 0.5531760300949725, + "eval_python_code_alpaca_token_set_recall": 0.41410391470065155, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 1984 + }, + { + "epoch": 8.74, + "eval_wikibio_accuracy": 0.36490625, + "eval_wikibio_bleu_score": 5.001279621214367, + "eval_wikibio_bleu_score_sem": 0.1826826365219488, + "eval_wikibio_emb_cos_sim": 0.7376046776771545, + "eval_wikibio_emb_cos_sim_sem": 0.005935431880873798, + "eval_wikibio_emb_top1_equal": 0.1720000058412552, + "eval_wikibio_emb_top1_equal_sem": 0.016893869835550357, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3209877014160156, + "eval_wikibio_n_ngrams_match_1": 9.054, + "eval_wikibio_n_ngrams_match_2": 2.696, + "eval_wikibio_n_ngrams_match_3": 0.962, + "eval_wikibio_num_pred_words": 33.432, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.687684222458053, + "eval_wikibio_pred_num_tokens": 62.8203125, + "eval_wikibio_rouge_score": 0.3192760141503558, + "eval_wikibio_runtime": 9.256, + "eval_wikibio_samples_per_second": 54.019, + "eval_wikibio_steps_per_second": 0.108, + "eval_wikibio_token_set_f1": 0.29227717249567686, + "eval_wikibio_token_set_f1_sem": 0.006152932662747309, + "eval_wikibio_token_set_precision": 0.2899212240419065, + "eval_wikibio_token_set_recall": 0.31631044337697983, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 1984 + }, + { + "epoch": 8.74, + "eval_bias-bios_accuracy": 0.51871875, + "eval_bias-bios_bleu_score": 18.589985258771534, + "eval_bias-bios_bleu_score_sem": 0.7979796988758581, + "eval_bias-bios_emb_cos_sim": 0.87850421667099, + "eval_bias-bios_emb_cos_sim_sem": 0.00308360126703616, + "eval_bias-bios_emb_top1_equal": 0.36399999260902405, + "eval_bias-bios_emb_top1_equal_sem": 0.021539170945502367, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7309093475341797, + "eval_bias-bios_n_ngrams_match_1": 22.936, + "eval_bias-bios_n_ngrams_match_2": 10.564, + "eval_bias-bios_n_ngrams_match_3": 5.958, + "eval_bias-bios_num_pred_words": 46.596, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.645785556023036, + "eval_bias-bios_pred_num_tokens": 61.7265625, + "eval_bias-bios_rouge_score": 0.5292722044570275, + "eval_bias-bios_runtime": 8.9433, + "eval_bias-bios_samples_per_second": 55.908, + "eval_bias-bios_steps_per_second": 0.112, + "eval_bias-bios_token_set_f1": 0.5576777941607378, + "eval_bias-bios_token_set_f1_sem": 0.006638163225364711, + "eval_bias-bios_token_set_precision": 0.5529639838978428, + "eval_bias-bios_token_set_recall": 0.5721815762275477, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 1984 + }, + { + "epoch": 8.78, + "learning_rate": 0.001, + "loss": 1.9147, + "step": 1992 + }, + { + "epoch": 8.83, + "learning_rate": 0.001, + "loss": 1.7913, + "step": 2004 + }, + { + "epoch": 8.88, + "eval_ag_news_accuracy": 0.2978125, + "eval_ag_news_bleu_score": 4.281196440555999, + "eval_ag_news_bleu_score_sem": 0.14904247872139, + "eval_ag_news_emb_cos_sim": 0.79570472240448, + "eval_ag_news_emb_cos_sim_sem": 0.005238732809288028, + "eval_ag_news_emb_top1_equal": 0.2879999876022339, + "eval_ag_news_emb_top1_equal_sem": 0.020271503192099565, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.697861909866333, + "eval_ag_news_n_ngrams_match_1": 11.452, + "eval_ag_news_n_ngrams_match_2": 2.336, + "eval_ag_news_n_ngrams_match_3": 0.616, + "eval_ag_news_num_pred_words": 30.118, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 40.36091676282594, + "eval_ag_news_pred_num_tokens": 46.9296875, + "eval_ag_news_rouge_score": 0.35785020490253794, + "eval_ag_news_runtime": 8.5687, + "eval_ag_news_samples_per_second": 58.352, + "eval_ag_news_steps_per_second": 0.117, + "eval_ag_news_token_set_f1": 0.33766916509866857, + "eval_ag_news_token_set_f1_sem": 0.00485462271797604, + "eval_ag_news_token_set_precision": 0.29302357484649894, + "eval_ag_news_token_set_recall": 0.4163851421960052, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 2015 + }, + { + "epoch": 8.88, + "eval_anthropic_toxic_prompts_accuracy": 0.10834375, + "eval_anthropic_toxic_prompts_bleu_score": 5.503328095222172, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.2081873652662082, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6974567770957947, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005060578715972177, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15399999916553497, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016158283980625493, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.032620668411255, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.684, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.742, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648, + "eval_anthropic_toxic_prompts_num_pred_words": 25.888, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.751544308456207, + "eval_anthropic_toxic_prompts_pred_num_tokens": 36.640625, + "eval_anthropic_toxic_prompts_rouge_score": 0.31427257546174336, + "eval_anthropic_toxic_prompts_runtime": 8.3584, + "eval_anthropic_toxic_prompts_samples_per_second": 59.82, + "eval_anthropic_toxic_prompts_steps_per_second": 0.12, + "eval_anthropic_toxic_prompts_token_set_f1": 0.34967349844245615, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005804893266715575, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4261249444076511, + "eval_anthropic_toxic_prompts_token_set_recall": 0.32047917547306215, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 2015 + }, + { + "epoch": 8.88, + "eval_arxiv_accuracy": 0.41240625, + "eval_arxiv_bleu_score": 3.8906961921992056, + "eval_arxiv_bleu_score_sem": 0.11585900533149257, + "eval_arxiv_emb_cos_sim": 0.7511284947395325, + "eval_arxiv_emb_cos_sim_sem": 0.004930338129734693, + "eval_arxiv_emb_top1_equal": 0.20399999618530273, + "eval_arxiv_emb_top1_equal_sem": 0.018039369108186407, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.1257741451263428, + "eval_arxiv_n_ngrams_match_1": 13.67, + "eval_arxiv_n_ngrams_match_2": 2.592, + "eval_arxiv_n_ngrams_match_3": 0.59, + "eval_arxiv_num_pred_words": 30.658, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.777521377159953, + "eval_arxiv_pred_num_tokens": 50.5703125, + "eval_arxiv_rouge_score": 0.3649159407180931, + "eval_arxiv_runtime": 9.1547, + "eval_arxiv_samples_per_second": 54.617, + "eval_arxiv_steps_per_second": 0.109, + "eval_arxiv_token_set_f1": 0.3608202993888602, + "eval_arxiv_token_set_f1_sem": 0.004479796341346729, + "eval_arxiv_token_set_precision": 0.30272902785643907, + "eval_arxiv_token_set_recall": 0.46174145285100326, + "eval_arxiv_true_num_tokens": 64.0, + "step": 2015 + }, + { + "epoch": 8.88, + "eval_python_code_alpaca_accuracy": 0.15665625, + "eval_python_code_alpaca_bleu_score": 7.371272378741536, + "eval_python_code_alpaca_bleu_score_sem": 0.23850916870772282, + "eval_python_code_alpaca_emb_cos_sim": 0.7900562882423401, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038222738876293495, + "eval_python_code_alpaca_emb_top1_equal": 0.21400000154972076, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018359796975924752, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.597472906112671, + "eval_python_code_alpaca_n_ngrams_match_1": 9.35, + "eval_python_code_alpaca_n_ngrams_match_2": 2.518, + "eval_python_code_alpaca_n_ngrams_match_3": 0.796, + "eval_python_code_alpaca_num_pred_words": 25.446, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.429756859807119, + "eval_python_code_alpaca_pred_num_tokens": 39.7109375, + "eval_python_code_alpaca_rouge_score": 0.46722847470855877, + "eval_python_code_alpaca_runtime": 8.8159, + "eval_python_code_alpaca_samples_per_second": 56.715, + "eval_python_code_alpaca_steps_per_second": 0.113, + "eval_python_code_alpaca_token_set_f1": 0.49105008465089256, + "eval_python_code_alpaca_token_set_f1_sem": 0.005520499061597566, + "eval_python_code_alpaca_token_set_precision": 0.5310244980548657, + "eval_python_code_alpaca_token_set_recall": 0.47358586937672026, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 2015 + }, + { + "epoch": 8.88, + "eval_wikibio_accuracy": 0.35096875, + "eval_wikibio_bleu_score": 5.4778327942374, + "eval_wikibio_bleu_score_sem": 0.21824048592552908, + "eval_wikibio_emb_cos_sim": 0.729748547077179, + "eval_wikibio_emb_cos_sim_sem": 0.006024370316292732, + "eval_wikibio_emb_top1_equal": 0.16599999368190765, + "eval_wikibio_emb_top1_equal_sem": 0.01665661670800958, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4867711067199707, + "eval_wikibio_n_ngrams_match_1": 8.13, + "eval_wikibio_n_ngrams_match_2": 2.482, + "eval_wikibio_n_ngrams_match_3": 0.888, + "eval_wikibio_num_pred_words": 27.738, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 32.68025610528006, + "eval_wikibio_pred_num_tokens": 53.921875, + "eval_wikibio_rouge_score": 0.3262775647584429, + "eval_wikibio_runtime": 8.7759, + "eval_wikibio_samples_per_second": 56.974, + "eval_wikibio_steps_per_second": 0.114, + "eval_wikibio_token_set_f1": 0.28553711803580656, + "eval_wikibio_token_set_f1_sem": 0.006105552369558703, + "eval_wikibio_token_set_precision": 0.2709517628431276, + "eval_wikibio_token_set_recall": 0.3200790729630114, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 2015 + }, + { + "epoch": 8.88, + "eval_bias-bios_accuracy": 0.5211875, + "eval_bias-bios_bleu_score": 19.690638801615393, + "eval_bias-bios_bleu_score_sem": 0.8750117263932384, + "eval_bias-bios_emb_cos_sim": 0.8750105500221252, + "eval_bias-bios_emb_cos_sim_sem": 0.0031565084463454018, + "eval_bias-bios_emb_top1_equal": 0.3140000104904175, + "eval_bias-bios_emb_top1_equal_sem": 0.020776702507015268, + "eval_bias-bios_exact_match": 0.004, + "eval_bias-bios_exact_match_sem": 0.002825591608118863, + "eval_bias-bios_loss": 1.7455610036849976, + "eval_bias-bios_n_ngrams_match_1": 20.688, + "eval_bias-bios_n_ngrams_match_2": 9.66, + "eval_bias-bios_n_ngrams_match_3": 5.574, + "eval_bias-bios_num_pred_words": 34.442, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.729114628460026, + "eval_bias-bios_pred_num_tokens": 46.53125, + "eval_bias-bios_rouge_score": 0.556540741298637, + "eval_bias-bios_runtime": 8.9271, + "eval_bias-bios_samples_per_second": 56.009, + "eval_bias-bios_steps_per_second": 0.112, + "eval_bias-bios_token_set_f1": 0.564996792506389, + "eval_bias-bios_token_set_f1_sem": 0.006915302425054524, + "eval_bias-bios_token_set_precision": 0.519362727301722, + "eval_bias-bios_token_set_recall": 0.6305880276758675, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 2015 + }, + { + "epoch": 8.88, + "learning_rate": 0.001, + "loss": 1.5533, + "step": 2016 + }, + { + "epoch": 8.93, + "learning_rate": 0.001, + "loss": 1.9502, + "step": 2028 + }, + { + "epoch": 8.99, + "learning_rate": 0.001, + "loss": 1.7723, + "step": 2040 + }, + { + "epoch": 9.01, + "eval_ag_news_accuracy": 0.2991875, + "eval_ag_news_bleu_score": 4.30447564205776, + "eval_ag_news_bleu_score_sem": 0.1472288899842663, + "eval_ag_news_emb_cos_sim": 0.7993799448013306, + "eval_ag_news_emb_cos_sim_sem": 0.00450738725875599, + "eval_ag_news_emb_top1_equal": 0.2639999985694885, + "eval_ag_news_emb_top1_equal_sem": 0.019732885240582997, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.6647632122039795, + "eval_ag_news_n_ngrams_match_1": 11.316, + "eval_ag_news_n_ngrams_match_2": 2.278, + "eval_ag_news_n_ngrams_match_3": 0.614, + "eval_ag_news_num_pred_words": 31.038, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 39.046889241518514, + "eval_ag_news_pred_num_tokens": 45.71875, + "eval_ag_news_rouge_score": 0.34739004258080053, + "eval_ag_news_runtime": 8.7567, + "eval_ag_news_samples_per_second": 57.099, + "eval_ag_news_steps_per_second": 0.114, + "eval_ag_news_token_set_f1": 0.3335748310879103, + "eval_ag_news_token_set_f1_sem": 0.004691210695222084, + "eval_ag_news_token_set_precision": 0.2911811558728018, + "eval_ag_news_token_set_recall": 0.40806454549989735, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 2046 + }, + { + "epoch": 9.01, + "eval_anthropic_toxic_prompts_accuracy": 0.111, + "eval_anthropic_toxic_prompts_bleu_score": 5.890646812535753, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.22763323642991268, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6962231993675232, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.005150838289762755, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1679999977350235, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016736554076096456, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.001394510269165, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.268, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.514, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.542, + "eval_anthropic_toxic_prompts_num_pred_words": 22.254, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 20.11356594952904, + "eval_anthropic_toxic_prompts_pred_num_tokens": 31.28125, + "eval_anthropic_toxic_prompts_rouge_score": 0.3268142602299797, + "eval_anthropic_toxic_prompts_runtime": 7.0964, + "eval_anthropic_toxic_prompts_samples_per_second": 70.459, + "eval_anthropic_toxic_prompts_steps_per_second": 0.141, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3395884278036088, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062451772266664705, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4012655734234844, + "eval_anthropic_toxic_prompts_token_set_recall": 0.32073276005169205, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 2046 + }, + { + "epoch": 9.01, + "eval_arxiv_accuracy": 0.41378125, + "eval_arxiv_bleu_score": 3.6087119904118774, + "eval_arxiv_bleu_score_sem": 0.0994488717730809, + "eval_arxiv_emb_cos_sim": 0.7521159052848816, + "eval_arxiv_emb_cos_sim_sem": 0.004161717817917032, + "eval_arxiv_emb_top1_equal": 0.20000000298023224, + "eval_arxiv_emb_top1_equal_sem": 0.017906459589198134, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.1329307556152344, + "eval_arxiv_n_ngrams_match_1": 13.584, + "eval_arxiv_n_ngrams_match_2": 2.43, + "eval_arxiv_n_ngrams_match_3": 0.492, + "eval_arxiv_num_pred_words": 30.74, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.941115918528485, + "eval_arxiv_pred_num_tokens": 49.3515625, + "eval_arxiv_rouge_score": 0.36391134537924674, + "eval_arxiv_runtime": 7.4784, + "eval_arxiv_samples_per_second": 66.86, + "eval_arxiv_steps_per_second": 0.134, + "eval_arxiv_token_set_f1": 0.3633853973300188, + "eval_arxiv_token_set_f1_sem": 0.004066752835657384, + "eval_arxiv_token_set_precision": 0.30449794792905727, + "eval_arxiv_token_set_recall": 0.4635120505169177, + "eval_arxiv_true_num_tokens": 64.0, + "step": 2046 + }, + { + "epoch": 9.01, + "eval_python_code_alpaca_accuracy": 0.1635625, + "eval_python_code_alpaca_bleu_score": 7.749931235668791, + "eval_python_code_alpaca_bleu_score_sem": 0.25897742129416856, + "eval_python_code_alpaca_emb_cos_sim": 0.7957867980003357, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0036585930067849024, + "eval_python_code_alpaca_emb_top1_equal": 0.18400000035762787, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017346174301986407, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.4820594787597656, + "eval_python_code_alpaca_n_ngrams_match_1": 8.966, + "eval_python_code_alpaca_n_ngrams_match_2": 2.316, + "eval_python_code_alpaca_n_ngrams_match_3": 0.736, + "eval_python_code_alpaca_num_pred_words": 23.268, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 11.96588253983069, + "eval_python_code_alpaca_pred_num_tokens": 36.1875, + "eval_python_code_alpaca_rouge_score": 0.4742782948523391, + "eval_python_code_alpaca_runtime": 8.4943, + "eval_python_code_alpaca_samples_per_second": 58.863, + "eval_python_code_alpaca_steps_per_second": 0.118, + "eval_python_code_alpaca_token_set_f1": 0.4923373640218391, + "eval_python_code_alpaca_token_set_f1_sem": 0.005667700385172624, + "eval_python_code_alpaca_token_set_precision": 0.5204886057719729, + "eval_python_code_alpaca_token_set_recall": 0.4850051527947253, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 2046 + }, + { + "epoch": 9.01, + "eval_wikibio_accuracy": 0.35090625, + "eval_wikibio_bleu_score": 5.393127917759537, + "eval_wikibio_bleu_score_sem": 0.1942594604313065, + "eval_wikibio_emb_cos_sim": 0.7504561543464661, + "eval_wikibio_emb_cos_sim_sem": 0.004878221969874094, + "eval_wikibio_emb_top1_equal": 0.15399999916553497, + "eval_wikibio_emb_top1_equal_sem": 0.016158283980625493, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.490276575088501, + "eval_wikibio_n_ngrams_match_1": 9.23, + "eval_wikibio_n_ngrams_match_2": 2.8, + "eval_wikibio_n_ngrams_match_3": 0.996, + "eval_wikibio_num_pred_words": 33.05, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 32.79501673669647, + "eval_wikibio_pred_num_tokens": 59.5625, + "eval_wikibio_rouge_score": 0.3395944756830169, + "eval_wikibio_runtime": 9.6425, + "eval_wikibio_samples_per_second": 51.854, + "eval_wikibio_steps_per_second": 0.104, + "eval_wikibio_token_set_f1": 0.30709651202586963, + "eval_wikibio_token_set_f1_sem": 0.005262512950887301, + "eval_wikibio_token_set_precision": 0.3053592113623277, + "eval_wikibio_token_set_recall": 0.3229125854108825, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 2046 + }, + { + "epoch": 9.01, + "eval_bias-bios_accuracy": 0.51809375, + "eval_bias-bios_bleu_score": 18.31660780382149, + "eval_bias-bios_bleu_score_sem": 0.8767916065522813, + "eval_bias-bios_emb_cos_sim": 0.8683634400367737, + "eval_bias-bios_emb_cos_sim_sem": 0.003222134537206428, + "eval_bias-bios_emb_top1_equal": 0.3400000035762787, + "eval_bias-bios_emb_top1_equal_sem": 0.021206117459812355, + "eval_bias-bios_exact_match": 0.004, + "eval_bias-bios_exact_match_sem": 0.002825591608118863, + "eval_bias-bios_loss": 1.7671860456466675, + "eval_bias-bios_n_ngrams_match_1": 19.582, + "eval_bias-bios_n_ngrams_match_2": 8.978, + "eval_bias-bios_n_ngrams_match_3": 5.232, + "eval_bias-bios_num_pred_words": 32.48, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.8543562699255185, + "eval_bias-bios_pred_num_tokens": 44.546875, + "eval_bias-bios_rouge_score": 0.5402457226413455, + "eval_bias-bios_runtime": 10.5114, + "eval_bias-bios_samples_per_second": 47.568, + "eval_bias-bios_steps_per_second": 0.095, + "eval_bias-bios_token_set_f1": 0.5531321383277782, + "eval_bias-bios_token_set_f1_sem": 0.007013550276523037, + "eval_bias-bios_token_set_precision": 0.501690836944203, + "eval_bias-bios_token_set_recall": 0.6315241644945384, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 2046 + }, + { + "epoch": 9.04, + "learning_rate": 0.001, + "loss": 1.8543, + "step": 2052 + }, + { + "epoch": 9.09, + "learning_rate": 0.001, + "loss": 1.8932, + "step": 2064 + }, + { + "epoch": 9.15, + "learning_rate": 0.001, + "loss": 1.8602, + "step": 2076 + }, + { + "epoch": 9.15, + "eval_ag_news_accuracy": 0.29896875, + "eval_ag_news_bleu_score": 4.784236208697559, + "eval_ag_news_bleu_score_sem": 0.15460864144388536, + "eval_ag_news_emb_cos_sim": 0.8137236833572388, + "eval_ag_news_emb_cos_sim_sem": 0.004588369542336808, + "eval_ag_news_emb_top1_equal": 0.28600001335144043, + "eval_ag_news_emb_top1_equal_sem": 0.020229345383440313, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.6021766662597656, + "eval_ag_news_n_ngrams_match_1": 13.402, + "eval_ag_news_n_ngrams_match_2": 2.744, + "eval_ag_news_n_ngrams_match_3": 0.744, + "eval_ag_news_num_pred_words": 41.766, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.6779833476461, + "eval_ag_news_pred_num_tokens": 60.9921875, + "eval_ag_news_rouge_score": 0.3599304555911331, + "eval_ag_news_runtime": 7.6467, + "eval_ag_news_samples_per_second": 65.388, + "eval_ag_news_steps_per_second": 0.131, + "eval_ag_news_token_set_f1": 0.34965679144064016, + "eval_ag_news_token_set_f1_sem": 0.004684602734156572, + "eval_ag_news_token_set_precision": 0.3302881209684665, + "eval_ag_news_token_set_recall": 0.38649755707507244, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 2077 + }, + { + "epoch": 9.15, + "eval_anthropic_toxic_prompts_accuracy": 0.1051875, + "eval_anthropic_toxic_prompts_bleu_score": 3.964390298136568, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14222773439203992, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6894749999046326, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004599546073080835, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1599999964237213, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01641154137506837, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.1385340690612793, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.052, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714, + "eval_anthropic_toxic_prompts_num_pred_words": 37.216, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 23.070022997931947, + "eval_anthropic_toxic_prompts_pred_num_tokens": 53.09375, + "eval_anthropic_toxic_prompts_rouge_score": 0.2580413938262478, + "eval_anthropic_toxic_prompts_runtime": 9.432, + "eval_anthropic_toxic_prompts_samples_per_second": 53.011, + "eval_anthropic_toxic_prompts_steps_per_second": 0.106, + "eval_anthropic_toxic_prompts_token_set_f1": 0.337508001789652, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005664451580114572, + "eval_anthropic_toxic_prompts_token_set_precision": 0.44401354128900383, + "eval_anthropic_toxic_prompts_token_set_recall": 0.2945522601233305, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 2077 + }, + { + "epoch": 9.15, + "eval_arxiv_accuracy": 0.4218125, + "eval_arxiv_bleu_score": 4.489080872238089, + "eval_arxiv_bleu_score_sem": 0.11303639098293934, + "eval_arxiv_emb_cos_sim": 0.758951723575592, + "eval_arxiv_emb_cos_sim_sem": 0.004843240958033718, + "eval_arxiv_emb_top1_equal": 0.2680000066757202, + "eval_arxiv_emb_top1_equal_sem": 0.019827715320059287, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0513577461242676, + "eval_arxiv_n_ngrams_match_1": 15.63, + "eval_arxiv_n_ngrams_match_2": 3.016, + "eval_arxiv_n_ngrams_match_3": 0.674, + "eval_arxiv_num_pred_words": 39.66, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.1440331712043, + "eval_arxiv_pred_num_tokens": 61.859375, + "eval_arxiv_rouge_score": 0.3685153218719503, + "eval_arxiv_runtime": 9.1478, + "eval_arxiv_samples_per_second": 54.658, + "eval_arxiv_steps_per_second": 0.109, + "eval_arxiv_token_set_f1": 0.37061277220553285, + "eval_arxiv_token_set_f1_sem": 0.004351648539318885, + "eval_arxiv_token_set_precision": 0.32616954864681463, + "eval_arxiv_token_set_recall": 0.44792024372594574, + "eval_arxiv_true_num_tokens": 64.0, + "step": 2077 + }, + { + "epoch": 9.15, + "eval_python_code_alpaca_accuracy": 0.15096875, + "eval_python_code_alpaca_bleu_score": 5.555247370237761, + "eval_python_code_alpaca_bleu_score_sem": 0.177653537319343, + "eval_python_code_alpaca_emb_cos_sim": 0.7676288485527039, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038828899818810327, + "eval_python_code_alpaca_emb_top1_equal": 0.19200000166893005, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01763218126724194, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.7803382873535156, + "eval_python_code_alpaca_n_ngrams_match_1": 9.9, + "eval_python_code_alpaca_n_ngrams_match_2": 2.82, + "eval_python_code_alpaca_n_ngrams_match_3": 0.944, + "eval_python_code_alpaca_num_pred_words": 36.2, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 16.124474731386275, + "eval_python_code_alpaca_pred_num_tokens": 56.4921875, + "eval_python_code_alpaca_rouge_score": 0.3968651941156845, + "eval_python_code_alpaca_runtime": 9.8269, + "eval_python_code_alpaca_samples_per_second": 50.881, + "eval_python_code_alpaca_steps_per_second": 0.102, + "eval_python_code_alpaca_token_set_f1": 0.4758023187041246, + "eval_python_code_alpaca_token_set_f1_sem": 0.005450858689322465, + "eval_python_code_alpaca_token_set_precision": 0.5504250236468058, + "eval_python_code_alpaca_token_set_recall": 0.4366312452197343, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 2077 + }, + { + "epoch": 9.15, + "eval_wikibio_accuracy": 0.36121875, + "eval_wikibio_bleu_score": 4.974402129355878, + "eval_wikibio_bleu_score_sem": 0.20370528008271044, + "eval_wikibio_emb_cos_sim": 0.7261828780174255, + "eval_wikibio_emb_cos_sim_sem": 0.005940349247857084, + "eval_wikibio_emb_top1_equal": 0.15399999916553497, + "eval_wikibio_emb_top1_equal_sem": 0.016158283980625493, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.302112340927124, + "eval_wikibio_n_ngrams_match_1": 8.336, + "eval_wikibio_n_ngrams_match_2": 2.586, + "eval_wikibio_n_ngrams_match_3": 0.944, + "eval_wikibio_num_pred_words": 30.934, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.169970588188452, + "eval_wikibio_pred_num_tokens": 62.8515625, + "eval_wikibio_rouge_score": 0.3019658582188841, + "eval_wikibio_runtime": 10.5935, + "eval_wikibio_samples_per_second": 47.199, + "eval_wikibio_steps_per_second": 0.094, + "eval_wikibio_token_set_f1": 0.27333695218102066, + "eval_wikibio_token_set_f1_sem": 0.00662283612815362, + "eval_wikibio_token_set_precision": 0.26494879182019215, + "eval_wikibio_token_set_recall": 0.30625239575202884, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 2077 + }, + { + "epoch": 9.15, + "eval_bias-bios_accuracy": 0.52196875, + "eval_bias-bios_bleu_score": 19.35869072751502, + "eval_bias-bios_bleu_score_sem": 0.8266302646813208, + "eval_bias-bios_emb_cos_sim": 0.8836058974266052, + "eval_bias-bios_emb_cos_sim_sem": 0.0028305407938486794, + "eval_bias-bios_emb_top1_equal": 0.33799999952316284, + "eval_bias-bios_emb_top1_equal_sem": 0.02117566563684607, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7248351573944092, + "eval_bias-bios_n_ngrams_match_1": 22.862, + "eval_bias-bios_n_ngrams_match_2": 10.558, + "eval_bias-bios_n_ngrams_match_3": 5.998, + "eval_bias-bios_num_pred_words": 44.866, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.611595923353254, + "eval_bias-bios_pred_num_tokens": 59.171875, + "eval_bias-bios_rouge_score": 0.5393844436690122, + "eval_bias-bios_runtime": 9.8175, + "eval_bias-bios_samples_per_second": 50.93, + "eval_bias-bios_steps_per_second": 0.102, + "eval_bias-bios_token_set_f1": 0.5655314980286692, + "eval_bias-bios_token_set_f1_sem": 0.00664708407861911, + "eval_bias-bios_token_set_precision": 0.5518918963501629, + "eval_bias-bios_token_set_recall": 0.5894865301239679, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 2077 + }, + { + "epoch": 9.2, + "learning_rate": 0.001, + "loss": 1.5989, + "step": 2088 + }, + { + "epoch": 9.25, + "learning_rate": 0.001, + "loss": 1.7685, + "step": 2100 + }, + { + "epoch": 9.29, + "eval_ag_news_accuracy": 0.2963125, + "eval_ag_news_bleu_score": 4.742361383468992, + "eval_ag_news_bleu_score_sem": 0.16001156054221022, + "eval_ag_news_emb_cos_sim": 0.8078240156173706, + "eval_ag_news_emb_cos_sim_sem": 0.004800064223487835, + "eval_ag_news_emb_top1_equal": 0.2680000066757202, + "eval_ag_news_emb_top1_equal_sem": 0.019827715320059287, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.606048822402954, + "eval_ag_news_n_ngrams_match_1": 13.284, + "eval_ag_news_n_ngrams_match_2": 2.808, + "eval_ag_news_n_ngrams_match_3": 0.822, + "eval_ag_news_num_pred_words": 42.118, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.82028154881562, + "eval_ag_news_pred_num_tokens": 62.4296875, + "eval_ag_news_rouge_score": 0.3538727301681347, + "eval_ag_news_runtime": 10.1283, + "eval_ag_news_samples_per_second": 49.366, + "eval_ag_news_steps_per_second": 0.099, + "eval_ag_news_token_set_f1": 0.3470712549807732, + "eval_ag_news_token_set_f1_sem": 0.004668509288448099, + "eval_ag_news_token_set_precision": 0.32390128066439394, + "eval_ag_news_token_set_recall": 0.3932473776896412, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 2108 + }, + { + "epoch": 9.29, + "eval_anthropic_toxic_prompts_accuracy": 0.104625, + "eval_anthropic_toxic_prompts_bleu_score": 3.816677653825337, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14619119461894714, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6866650581359863, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004563866340643905, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.12999999523162842, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015055010489467818, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0957064628601074, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.12, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.908, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, + "eval_anthropic_toxic_prompts_num_pred_words": 40.54, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.102847864310338, + "eval_anthropic_toxic_prompts_pred_num_tokens": 57.0703125, + "eval_anthropic_toxic_prompts_rouge_score": 0.24599181752231808, + "eval_anthropic_toxic_prompts_runtime": 9.553, + "eval_anthropic_toxic_prompts_samples_per_second": 52.339, + "eval_anthropic_toxic_prompts_steps_per_second": 0.105, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3365827504338421, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005591265270104003, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4504800028610584, + "eval_anthropic_toxic_prompts_token_set_recall": 0.29152372627313916, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 2108 + }, + { + "epoch": 9.29, + "eval_arxiv_accuracy": 0.42315625, + "eval_arxiv_bleu_score": 4.544794268192885, + "eval_arxiv_bleu_score_sem": 0.12591766464787646, + "eval_arxiv_emb_cos_sim": 0.7467482089996338, + "eval_arxiv_emb_cos_sim_sem": 0.005067803826804646, + "eval_arxiv_emb_top1_equal": 0.2639999985694885, + "eval_arxiv_emb_top1_equal_sem": 0.019732885240582997, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0610029697418213, + "eval_arxiv_n_ngrams_match_1": 15.25, + "eval_arxiv_n_ngrams_match_2": 2.944, + "eval_arxiv_n_ngrams_match_3": 0.72, + "eval_arxiv_num_pred_words": 39.27, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.3489587873246, + "eval_arxiv_pred_num_tokens": 62.5234375, + "eval_arxiv_rouge_score": 0.3583765104716006, + "eval_arxiv_runtime": 9.2107, + "eval_arxiv_samples_per_second": 54.285, + "eval_arxiv_steps_per_second": 0.109, + "eval_arxiv_token_set_f1": 0.36628400169248776, + "eval_arxiv_token_set_f1_sem": 0.004341865017628588, + "eval_arxiv_token_set_precision": 0.3198325113572577, + "eval_arxiv_token_set_recall": 0.44349729752138284, + "eval_arxiv_true_num_tokens": 64.0, + "step": 2108 + }, + { + "epoch": 9.29, + "eval_python_code_alpaca_accuracy": 0.1504375, + "eval_python_code_alpaca_bleu_score": 5.339483500068607, + "eval_python_code_alpaca_bleu_score_sem": 0.17147023537882844, + "eval_python_code_alpaca_emb_cos_sim": 0.7684195637702942, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0035968480306156577, + "eval_python_code_alpaca_emb_top1_equal": 0.16599999368190765, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.016656615375209204, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.650757074356079, + "eval_python_code_alpaca_n_ngrams_match_1": 9.746, + "eval_python_code_alpaca_n_ngrams_match_2": 2.816, + "eval_python_code_alpaca_n_ngrams_match_3": 0.892, + "eval_python_code_alpaca_num_pred_words": 37.3, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 14.164758362368579, + "eval_python_code_alpaca_pred_num_tokens": 56.8671875, + "eval_python_code_alpaca_rouge_score": 0.3932239250461642, + "eval_python_code_alpaca_runtime": 9.2918, + "eval_python_code_alpaca_samples_per_second": 53.811, + "eval_python_code_alpaca_steps_per_second": 0.108, + "eval_python_code_alpaca_token_set_f1": 0.4701602719431107, + "eval_python_code_alpaca_token_set_f1_sem": 0.00509986315936943, + "eval_python_code_alpaca_token_set_precision": 0.5467301722042317, + "eval_python_code_alpaca_token_set_recall": 0.4302604505118198, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 2108 + }, + { + "epoch": 9.29, + "eval_wikibio_accuracy": 0.36921875, + "eval_wikibio_bleu_score": 4.925348408283992, + "eval_wikibio_bleu_score_sem": 0.18491607742008886, + "eval_wikibio_emb_cos_sim": 0.7267881035804749, + "eval_wikibio_emb_cos_sim_sem": 0.006228084188798708, + "eval_wikibio_emb_top1_equal": 0.17599999904632568, + "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.313004732131958, + "eval_wikibio_n_ngrams_match_1": 8.734, + "eval_wikibio_n_ngrams_match_2": 2.648, + "eval_wikibio_n_ngrams_match_3": 0.934, + "eval_wikibio_num_pred_words": 32.936, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.467534184397877, + "eval_wikibio_pred_num_tokens": 62.8828125, + "eval_wikibio_rouge_score": 0.3109781321220584, + "eval_wikibio_runtime": 8.1563, + "eval_wikibio_samples_per_second": 61.302, + "eval_wikibio_steps_per_second": 0.123, + "eval_wikibio_token_set_f1": 0.28620409585063805, + "eval_wikibio_token_set_f1_sem": 0.006271836547776463, + "eval_wikibio_token_set_precision": 0.27991496187706066, + "eval_wikibio_token_set_recall": 0.31658886375293016, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 2108 + }, + { + "epoch": 9.29, + "eval_bias-bios_accuracy": 0.519375, + "eval_bias-bios_bleu_score": 18.704773391130743, + "eval_bias-bios_bleu_score_sem": 0.7916581464984886, + "eval_bias-bios_emb_cos_sim": 0.8804818391799927, + "eval_bias-bios_emb_cos_sim_sem": 0.00273221327958854, + "eval_bias-bios_emb_top1_equal": 0.3540000021457672, + "eval_bias-bios_emb_top1_equal_sem": 0.021407582231685648, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.734951138496399, + "eval_bias-bios_n_ngrams_match_1": 22.898, + "eval_bias-bios_n_ngrams_match_2": 10.586, + "eval_bias-bios_n_ngrams_match_3": 5.936, + "eval_bias-bios_num_pred_words": 46.224, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.668650818235701, + "eval_bias-bios_pred_num_tokens": 61.2734375, + "eval_bias-bios_rouge_score": 0.5303838349796598, + "eval_bias-bios_runtime": 9.2802, + "eval_bias-bios_samples_per_second": 53.878, + "eval_bias-bios_steps_per_second": 0.108, + "eval_bias-bios_token_set_f1": 0.5603213543459662, + "eval_bias-bios_token_set_f1_sem": 0.006540485270971654, + "eval_bias-bios_token_set_precision": 0.547956585093052, + "eval_bias-bios_token_set_recall": 0.5836179991680309, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 2108 + }, + { + "epoch": 9.3, + "learning_rate": 0.001, + "loss": 1.9071, + "step": 2112 + }, + { + "epoch": 9.36, + "learning_rate": 0.001, + "loss": 1.8768, + "step": 2124 + }, + { + "epoch": 9.41, + "learning_rate": 0.001, + "loss": 1.656, + "step": 2136 + }, + { + "epoch": 9.42, + "eval_ag_news_accuracy": 0.29778125, + "eval_ag_news_bleu_score": 4.602006773557938, + "eval_ag_news_bleu_score_sem": 0.14925154576540084, + "eval_ag_news_emb_cos_sim": 0.8055858612060547, + "eval_ag_news_emb_cos_sim_sem": 0.00523150236725406, + "eval_ag_news_emb_top1_equal": 0.2800000011920929, + "eval_ag_news_emb_top1_equal_sem": 0.02009995045904072, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.692465305328369, + "eval_ag_news_n_ngrams_match_1": 12.23, + "eval_ag_news_n_ngrams_match_2": 2.478, + "eval_ag_news_n_ngrams_match_3": 0.658, + "eval_ag_news_num_pred_words": 33.72, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 40.14369152281874, + "eval_ag_news_pred_num_tokens": 50.9296875, + "eval_ag_news_rouge_score": 0.36367578767071507, + "eval_ag_news_runtime": 8.1497, + "eval_ag_news_samples_per_second": 61.352, + "eval_ag_news_steps_per_second": 0.123, + "eval_ag_news_token_set_f1": 0.3436143762432177, + "eval_ag_news_token_set_f1_sem": 0.00480620750082883, + "eval_ag_news_token_set_precision": 0.3081705922040173, + "eval_ag_news_token_set_recall": 0.4031165490812298, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 2139 + }, + { + "epoch": 9.42, + "eval_anthropic_toxic_prompts_accuracy": 0.10625, + "eval_anthropic_toxic_prompts_bleu_score": 5.066131426147549, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18945199564446616, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6970410346984863, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004803729424518879, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15800000727176666, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01632805076118194, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0718159675598145, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.762, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.792, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712, + "eval_anthropic_toxic_prompts_num_pred_words": 28.788, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 21.58105761272503, + "eval_anthropic_toxic_prompts_pred_num_tokens": 41.046875, + "eval_anthropic_toxic_prompts_rouge_score": 0.29877063955843924, + "eval_anthropic_toxic_prompts_runtime": 9.0343, + "eval_anthropic_toxic_prompts_samples_per_second": 55.344, + "eval_anthropic_toxic_prompts_steps_per_second": 0.111, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3459221599188793, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005996514735593316, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4299008213906788, + "eval_anthropic_toxic_prompts_token_set_recall": 0.31319329335249113, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 2139 + }, + { + "epoch": 9.42, + "eval_arxiv_accuracy": 0.41684375, + "eval_arxiv_bleu_score": 4.282260926566434, + "eval_arxiv_bleu_score_sem": 0.11797367084225056, + "eval_arxiv_emb_cos_sim": 0.764333188533783, + "eval_arxiv_emb_cos_sim_sem": 0.004310273080506548, + "eval_arxiv_emb_top1_equal": 0.22200000286102295, + "eval_arxiv_emb_top1_equal_sem": 0.01860441382553699, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.1021311283111572, + "eval_arxiv_n_ngrams_match_1": 14.612, + "eval_arxiv_n_ngrams_match_2": 2.786, + "eval_arxiv_n_ngrams_match_3": 0.66, + "eval_arxiv_num_pred_words": 33.678, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 22.245308408000255, + "eval_arxiv_pred_num_tokens": 53.53125, + "eval_arxiv_rouge_score": 0.37436243044694695, + "eval_arxiv_runtime": 7.9047, + "eval_arxiv_samples_per_second": 63.254, + "eval_arxiv_steps_per_second": 0.127, + "eval_arxiv_token_set_f1": 0.3727697548259072, + "eval_arxiv_token_set_f1_sem": 0.0039577237427073746, + "eval_arxiv_token_set_precision": 0.3203496820674592, + "eval_arxiv_token_set_recall": 0.4578201411971948, + "eval_arxiv_true_num_tokens": 64.0, + "step": 2139 + }, + { + "epoch": 9.42, + "eval_python_code_alpaca_accuracy": 0.1565625, + "eval_python_code_alpaca_bleu_score": 6.878507736852894, + "eval_python_code_alpaca_bleu_score_sem": 0.22342270711564582, + "eval_python_code_alpaca_emb_cos_sim": 0.7866730093955994, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0038430362518699314, + "eval_python_code_alpaca_emb_top1_equal": 0.21400000154972076, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.018359796975924752, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.614943742752075, + "eval_python_code_alpaca_n_ngrams_match_1": 9.52, + "eval_python_code_alpaca_n_ngrams_match_2": 2.618, + "eval_python_code_alpaca_n_ngrams_match_3": 0.838, + "eval_python_code_alpaca_num_pred_words": 27.932, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.666447519540702, + "eval_python_code_alpaca_pred_num_tokens": 43.03125, + "eval_python_code_alpaca_rouge_score": 0.44403574820208813, + "eval_python_code_alpaca_runtime": 7.8167, + "eval_python_code_alpaca_samples_per_second": 63.965, + "eval_python_code_alpaca_steps_per_second": 0.128, + "eval_python_code_alpaca_token_set_f1": 0.4872379721570455, + "eval_python_code_alpaca_token_set_f1_sem": 0.0053259176733194856, + "eval_python_code_alpaca_token_set_precision": 0.5347430179351469, + "eval_python_code_alpaca_token_set_recall": 0.46333195077411227, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 2139 + }, + { + "epoch": 9.42, + "eval_wikibio_accuracy": 0.35528125, + "eval_wikibio_bleu_score": 5.5707561638950605, + "eval_wikibio_bleu_score_sem": 0.20852715862239565, + "eval_wikibio_emb_cos_sim": 0.7497373223304749, + "eval_wikibio_emb_cos_sim_sem": 0.005253118056934688, + "eval_wikibio_emb_top1_equal": 0.1679999977350235, + "eval_wikibio_emb_top1_equal_sem": 0.016736554076096456, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.4478862285614014, + "eval_wikibio_n_ngrams_match_1": 8.882, + "eval_wikibio_n_ngrams_match_2": 2.648, + "eval_wikibio_n_ngrams_match_3": 0.968, + "eval_wikibio_num_pred_words": 30.796, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 31.43387800196767, + "eval_wikibio_pred_num_tokens": 56.734375, + "eval_wikibio_rouge_score": 0.3351312538709482, + "eval_wikibio_runtime": 8.6543, + "eval_wikibio_samples_per_second": 57.775, + "eval_wikibio_steps_per_second": 0.116, + "eval_wikibio_token_set_f1": 0.30050983298174566, + "eval_wikibio_token_set_f1_sem": 0.005767539340825794, + "eval_wikibio_token_set_precision": 0.29309326972903244, + "eval_wikibio_token_set_recall": 0.32473088203684336, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 2139 + }, + { + "epoch": 9.42, + "eval_bias-bios_accuracy": 0.524625, + "eval_bias-bios_bleu_score": 19.932481798405632, + "eval_bias-bios_bleu_score_sem": 0.8520770726409266, + "eval_bias-bios_emb_cos_sim": 0.8823494911193848, + "eval_bias-bios_emb_cos_sim_sem": 0.002854128028483915, + "eval_bias-bios_emb_top1_equal": 0.3440000116825104, + "eval_bias-bios_emb_top1_equal_sem": 0.021265758943789875, + "eval_bias-bios_exact_match": 0.0, + "eval_bias-bios_exact_match_sem": 0.0, + "eval_bias-bios_loss": 1.7244471311569214, + "eval_bias-bios_n_ngrams_match_1": 21.46, + "eval_bias-bios_n_ngrams_match_2": 9.906, + "eval_bias-bios_n_ngrams_match_3": 5.628, + "eval_bias-bios_num_pred_words": 37.35, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.609418899299354, + "eval_bias-bios_pred_num_tokens": 50.25, + "eval_bias-bios_rouge_score": 0.5569036405399674, + "eval_bias-bios_runtime": 8.2143, + "eval_bias-bios_samples_per_second": 60.869, + "eval_bias-bios_steps_per_second": 0.122, + "eval_bias-bios_token_set_f1": 0.5652699740770379, + "eval_bias-bios_token_set_f1_sem": 0.006749920522407458, + "eval_bias-bios_token_set_precision": 0.5302845452267541, + "eval_bias-bios_token_set_recall": 0.6149457522776463, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 2139 + }, + { + "epoch": 9.46, + "learning_rate": 0.001, + "loss": 1.704, + "step": 2148 + }, + { + "epoch": 9.52, + "learning_rate": 0.001, + "loss": 1.914, + "step": 2160 + }, + { + "epoch": 9.56, + "eval_ag_news_accuracy": 0.2976875, + "eval_ag_news_bleu_score": 4.662313290914308, + "eval_ag_news_bleu_score_sem": 0.14449223368201164, + "eval_ag_news_emb_cos_sim": 0.8208157420158386, + "eval_ag_news_emb_cos_sim_sem": 0.004164131852596115, + "eval_ag_news_emb_top1_equal": 0.3179999887943268, + "eval_ag_news_emb_top1_equal_sem": 0.02084757283415153, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.6028475761413574, + "eval_ag_news_n_ngrams_match_1": 13.692, + "eval_ag_news_n_ngrams_match_2": 2.838, + "eval_ag_news_n_ngrams_match_3": 0.758, + "eval_ag_news_num_pred_words": 42.6, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.70259922570546, + "eval_ag_news_pred_num_tokens": 62.671875, + "eval_ag_news_rouge_score": 0.3604608500886415, + "eval_ag_news_runtime": 8.2458, + "eval_ag_news_samples_per_second": 60.637, + "eval_ag_news_steps_per_second": 0.121, + "eval_ag_news_token_set_f1": 0.3520480981442656, + "eval_ag_news_token_set_f1_sem": 0.004477106239124451, + "eval_ag_news_token_set_precision": 0.33254875465414585, + "eval_ag_news_token_set_recall": 0.39049022931431326, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 2170 + }, + { + "epoch": 9.56, + "eval_anthropic_toxic_prompts_accuracy": 0.10446875, + "eval_anthropic_toxic_prompts_bleu_score": 3.4787612684393774, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12096052765266524, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6876007318496704, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004465850535071452, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1379999965429306, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01543984193692329, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.2157742977142334, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.258, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752, + "eval_anthropic_toxic_prompts_num_pred_words": 43.164, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 24.922581940318025, + "eval_anthropic_toxic_prompts_pred_num_tokens": 62.0078125, + "eval_anthropic_toxic_prompts_rouge_score": 0.23859345959584935, + "eval_anthropic_toxic_prompts_runtime": 7.188, + "eval_anthropic_toxic_prompts_samples_per_second": 69.56, + "eval_anthropic_toxic_prompts_steps_per_second": 0.139, + "eval_anthropic_toxic_prompts_token_set_f1": 0.3262175803043801, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005114010427873442, + "eval_anthropic_toxic_prompts_token_set_precision": 0.4622129056213488, + "eval_anthropic_toxic_prompts_token_set_recall": 0.27428680446357634, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 2170 + }, + { + "epoch": 9.56, + "eval_arxiv_accuracy": 0.42478125, + "eval_arxiv_bleu_score": 4.5261234443477205, + "eval_arxiv_bleu_score_sem": 0.12124453371716433, + "eval_arxiv_emb_cos_sim": 0.760948657989502, + "eval_arxiv_emb_cos_sim_sem": 0.004511617567146002, + "eval_arxiv_emb_top1_equal": 0.3059999942779541, + "eval_arxiv_emb_top1_equal_sem": 0.02062957067522617, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.0417191982269287, + "eval_arxiv_n_ngrams_match_1": 15.466, + "eval_arxiv_n_ngrams_match_2": 3.018, + "eval_arxiv_n_ngrams_match_3": 0.668, + "eval_arxiv_num_pred_words": 39.328, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 20.941214404121222, + "eval_arxiv_pred_num_tokens": 62.7734375, + "eval_arxiv_rouge_score": 0.3653128340776683, + "eval_arxiv_runtime": 8.437, + "eval_arxiv_samples_per_second": 59.263, + "eval_arxiv_steps_per_second": 0.119, + "eval_arxiv_token_set_f1": 0.37110011938398535, + "eval_arxiv_token_set_f1_sem": 0.004186995057375558, + "eval_arxiv_token_set_precision": 0.3248995450752447, + "eval_arxiv_token_set_recall": 0.44557839135742766, + "eval_arxiv_true_num_tokens": 64.0, + "step": 2170 + }, + { + "epoch": 9.56, + "eval_python_code_alpaca_accuracy": 0.15040625, + "eval_python_code_alpaca_bleu_score": 4.897251390417854, + "eval_python_code_alpaca_bleu_score_sem": 0.15472477985283284, + "eval_python_code_alpaca_emb_cos_sim": 0.7662152051925659, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003257823266045889, + "eval_python_code_alpaca_emb_top1_equal": 0.15600000321865082, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01624363651663569, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.789936065673828, + "eval_python_code_alpaca_n_ngrams_match_1": 9.924, + "eval_python_code_alpaca_n_ngrams_match_2": 2.752, + "eval_python_code_alpaca_n_ngrams_match_3": 0.896, + "eval_python_code_alpaca_num_pred_words": 40.184, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 16.279978919032434, + "eval_python_code_alpaca_pred_num_tokens": 61.9296875, + "eval_python_code_alpaca_rouge_score": 0.3780064651438805, + "eval_python_code_alpaca_runtime": 7.1858, + "eval_python_code_alpaca_samples_per_second": 69.581, + "eval_python_code_alpaca_steps_per_second": 0.139, + "eval_python_code_alpaca_token_set_f1": 0.4557865078169055, + "eval_python_code_alpaca_token_set_f1_sem": 0.004864382445439913, + "eval_python_code_alpaca_token_set_precision": 0.5547829884131548, + "eval_python_code_alpaca_token_set_recall": 0.40366635664875294, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 2170 + }, + { + "epoch": 9.56, + "eval_wikibio_accuracy": 0.36859375, + "eval_wikibio_bleu_score": 5.140089255979534, + "eval_wikibio_bleu_score_sem": 0.19882621034566597, + "eval_wikibio_emb_cos_sim": 0.7363477945327759, + "eval_wikibio_emb_cos_sim_sem": 0.005824229015191742, + "eval_wikibio_emb_top1_equal": 0.17599999904632568, + "eval_wikibio_emb_top1_equal_sem": 0.017047853594066943, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.316140651702881, + "eval_wikibio_n_ngrams_match_1": 8.868, + "eval_wikibio_n_ngrams_match_2": 2.662, + "eval_wikibio_n_ngrams_match_3": 1.014, + "eval_wikibio_num_pred_words": 32.484, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 27.553805361448823, + "eval_wikibio_pred_num_tokens": 62.96875, + "eval_wikibio_rouge_score": 0.3147196406028563, + "eval_wikibio_runtime": 7.1483, + "eval_wikibio_samples_per_second": 69.947, + "eval_wikibio_steps_per_second": 0.14, + "eval_wikibio_token_set_f1": 0.288499220535135, + "eval_wikibio_token_set_f1_sem": 0.0062498790382535625, + "eval_wikibio_token_set_precision": 0.2838371780069014, + "eval_wikibio_token_set_recall": 0.31298672925789045, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 2170 + }, + { + "epoch": 9.56, + "eval_bias-bios_accuracy": 0.52065625, + "eval_bias-bios_bleu_score": 18.525149542142778, + "eval_bias-bios_bleu_score_sem": 0.7829670022152398, + "eval_bias-bios_emb_cos_sim": 0.8845528960227966, + "eval_bias-bios_emb_cos_sim_sem": 0.002710936454403237, + "eval_bias-bios_emb_top1_equal": 0.3400000035762787, + "eval_bias-bios_emb_top1_equal_sem": 0.02120611612701198, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7260913848876953, + "eval_bias-bios_n_ngrams_match_1": 23.064, + "eval_bias-bios_n_ngrams_match_2": 10.62, + "eval_bias-bios_n_ngrams_match_3": 5.954, + "eval_bias-bios_num_pred_words": 46.594, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.618649794138943, + "eval_bias-bios_pred_num_tokens": 61.9375, + "eval_bias-bios_rouge_score": 0.5330248094854184, + "eval_bias-bios_runtime": 7.5812, + "eval_bias-bios_samples_per_second": 65.952, + "eval_bias-bios_steps_per_second": 0.132, + "eval_bias-bios_token_set_f1": 0.5622923685230573, + "eval_bias-bios_token_set_f1_sem": 0.006591145208419194, + "eval_bias-bios_token_set_precision": 0.5571964713315781, + "eval_bias-bios_token_set_recall": 0.5784252887077622, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 2170 + }, + { + "epoch": 9.57, + "learning_rate": 0.001, + "loss": 1.8839, + "step": 2172 + }, + { + "epoch": 9.62, + "learning_rate": 0.001, + "loss": 1.7105, + "step": 2184 + }, + { + "epoch": 9.67, + "learning_rate": 0.001, + "loss": 1.618, + "step": 2196 + }, + { + "epoch": 9.7, + "eval_ag_news_accuracy": 0.30134375, + "eval_ag_news_bleu_score": 4.71286424512986, + "eval_ag_news_bleu_score_sem": 0.15146065256691588, + "eval_ag_news_emb_cos_sim": 0.8077859282493591, + "eval_ag_news_emb_cos_sim_sem": 0.004614233200012415, + "eval_ag_news_emb_top1_equal": 0.2980000078678131, + "eval_ag_news_emb_top1_equal_sem": 0.020475119103777986, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.58840274810791, + "eval_ag_news_n_ngrams_match_1": 12.806, + "eval_ag_news_n_ngrams_match_2": 2.646, + "eval_ag_news_n_ngrams_match_3": 0.714, + "eval_ag_news_num_pred_words": 37.05, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 36.17624717597981, + "eval_ag_news_pred_num_tokens": 55.4375, + "eval_ag_news_rouge_score": 0.3616265811172079, + "eval_ag_news_runtime": 8.2263, + "eval_ag_news_samples_per_second": 60.781, + "eval_ag_news_steps_per_second": 0.122, + "eval_ag_news_token_set_f1": 0.34669352755643745, + "eval_ag_news_token_set_f1_sem": 0.004696687693622286, + "eval_ag_news_token_set_precision": 0.31449899140963855, + "eval_ag_news_token_set_recall": 0.4041313425662957, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 2201 + }, + { + "epoch": 9.7, + "eval_anthropic_toxic_prompts_accuracy": 0.105625, + "eval_anthropic_toxic_prompts_bleu_score": 5.247525795504296, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.18949880274254094, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6975520849227905, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004827754150877183, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.15600000321865082, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01624363651663569, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.0993876457214355, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.908, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.898, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.724, + "eval_anthropic_toxic_prompts_num_pred_words": 29.092, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.184362432018744, + "eval_anthropic_toxic_prompts_pred_num_tokens": 40.7421875, + "eval_anthropic_toxic_prompts_rouge_score": 0.2998625990799938, + "eval_anthropic_toxic_prompts_runtime": 7.1556, + "eval_anthropic_toxic_prompts_samples_per_second": 69.876, + "eval_anthropic_toxic_prompts_steps_per_second": 0.14, + "eval_anthropic_toxic_prompts_token_set_f1": 0.351969265526759, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005811501239388595, + "eval_anthropic_toxic_prompts_token_set_precision": 0.436701861568256, + "eval_anthropic_toxic_prompts_token_set_recall": 0.31632956198925377, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 2201 + }, + { + "epoch": 9.7, + "eval_arxiv_accuracy": 0.41996875, + "eval_arxiv_bleu_score": 4.343432537348339, + "eval_arxiv_bleu_score_sem": 0.12501379181401626, + "eval_arxiv_emb_cos_sim": 0.7525537610054016, + "eval_arxiv_emb_cos_sim_sem": 0.0055605577850193905, + "eval_arxiv_emb_top1_equal": 0.23000000417232513, + "eval_arxiv_emb_top1_equal_sem": 0.018839050665941787, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.061976909637451, + "eval_arxiv_n_ngrams_match_1": 14.962, + "eval_arxiv_n_ngrams_match_2": 2.88, + "eval_arxiv_n_ngrams_match_3": 0.634, + "eval_arxiv_num_pred_words": 35.798, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.369761518678352, + "eval_arxiv_pred_num_tokens": 57.390625, + "eval_arxiv_rouge_score": 0.3687784695393248, + "eval_arxiv_runtime": 7.4289, + "eval_arxiv_samples_per_second": 67.305, + "eval_arxiv_steps_per_second": 0.135, + "eval_arxiv_token_set_f1": 0.3704525693635287, + "eval_arxiv_token_set_f1_sem": 0.004514654454041064, + "eval_arxiv_token_set_precision": 0.31772398129801444, + "eval_arxiv_token_set_recall": 0.4578272797297293, + "eval_arxiv_true_num_tokens": 64.0, + "step": 2201 + }, + { + "epoch": 9.7, + "eval_python_code_alpaca_accuracy": 0.15540625, + "eval_python_code_alpaca_bleu_score": 6.813732587814551, + "eval_python_code_alpaca_bleu_score_sem": 0.2352598823468727, + "eval_python_code_alpaca_emb_cos_sim": 0.784164309501648, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.0037966154812114805, + "eval_python_code_alpaca_emb_top1_equal": 0.18799999356269836, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.017490679184236527, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.620197296142578, + "eval_python_code_alpaca_n_ngrams_match_1": 9.592, + "eval_python_code_alpaca_n_ngrams_match_2": 2.604, + "eval_python_code_alpaca_n_ngrams_match_3": 0.836, + "eval_python_code_alpaca_num_pred_words": 28.782, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 13.738433857711156, + "eval_python_code_alpaca_pred_num_tokens": 45.59375, + "eval_python_code_alpaca_rouge_score": 0.44568837350944746, + "eval_python_code_alpaca_runtime": 7.0229, + "eval_python_code_alpaca_samples_per_second": 71.196, + "eval_python_code_alpaca_steps_per_second": 0.142, + "eval_python_code_alpaca_token_set_f1": 0.49671498346282555, + "eval_python_code_alpaca_token_set_f1_sem": 0.005645253796668827, + "eval_python_code_alpaca_token_set_precision": 0.5401766662770758, + "eval_python_code_alpaca_token_set_recall": 0.4764558513237111, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 2201 + }, + { + "epoch": 9.7, + "eval_wikibio_accuracy": 0.3660625, + "eval_wikibio_bleu_score": 5.173119518560294, + "eval_wikibio_bleu_score_sem": 0.20925532304214553, + "eval_wikibio_emb_cos_sim": 0.7356558442115784, + "eval_wikibio_emb_cos_sim_sem": 0.005945926351025957, + "eval_wikibio_emb_top1_equal": 0.1679999977350235, + "eval_wikibio_emb_top1_equal_sem": 0.016736554076096456, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.33961820602417, + "eval_wikibio_n_ngrams_match_1": 8.662, + "eval_wikibio_n_ngrams_match_2": 2.558, + "eval_wikibio_n_ngrams_match_3": 0.926, + "eval_wikibio_num_pred_words": 31.312, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.208354869271222, + "eval_wikibio_pred_num_tokens": 60.8671875, + "eval_wikibio_rouge_score": 0.3186093278020351, + "eval_wikibio_runtime": 7.6126, + "eval_wikibio_samples_per_second": 65.681, + "eval_wikibio_steps_per_second": 0.131, + "eval_wikibio_token_set_f1": 0.2882805023245171, + "eval_wikibio_token_set_f1_sem": 0.0063211158691996346, + "eval_wikibio_token_set_precision": 0.27934300035101545, + "eval_wikibio_token_set_recall": 0.3159991648885542, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 2201 + }, + { + "epoch": 9.7, + "eval_bias-bios_accuracy": 0.5226875, + "eval_bias-bios_bleu_score": 19.813183783213244, + "eval_bias-bios_bleu_score_sem": 0.876898543066567, + "eval_bias-bios_emb_cos_sim": 0.8768750429153442, + "eval_bias-bios_emb_cos_sim_sem": 0.0033438308734403625, + "eval_bias-bios_emb_top1_equal": 0.3400000035762787, + "eval_bias-bios_emb_top1_equal_sem": 0.021206117459812355, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7114582061767578, + "eval_bias-bios_n_ngrams_match_1": 21.73, + "eval_bias-bios_n_ngrams_match_2": 9.894, + "eval_bias-bios_n_ngrams_match_3": 5.61, + "eval_bias-bios_num_pred_words": 38.77, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.5370297245558175, + "eval_bias-bios_pred_num_tokens": 52.84375, + "eval_bias-bios_rouge_score": 0.5543715036682836, + "eval_bias-bios_runtime": 7.8561, + "eval_bias-bios_samples_per_second": 63.645, + "eval_bias-bios_steps_per_second": 0.127, + "eval_bias-bios_token_set_f1": 0.5675258217455457, + "eval_bias-bios_token_set_f1_sem": 0.006914693327907323, + "eval_bias-bios_token_set_precision": 0.5323334251485022, + "eval_bias-bios_token_set_recall": 0.6194974352714948, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 2201 + }, + { + "epoch": 9.73, + "learning_rate": 0.001, + "loss": 1.9294, + "step": 2208 + }, + { + "epoch": 9.78, + "learning_rate": 0.001, + "loss": 1.8799, + "step": 2220 + }, + { + "epoch": 9.83, + "learning_rate": 0.001, + "loss": 1.7465, + "step": 2232 + }, + { + "epoch": 9.83, + "eval_ag_news_accuracy": 0.2993125, + "eval_ag_news_bleu_score": 4.552771903506971, + "eval_ag_news_bleu_score_sem": 0.1476329454929656, + "eval_ag_news_emb_cos_sim": 0.8088976144790649, + "eval_ag_news_emb_cos_sim_sem": 0.004970004601293117, + "eval_ag_news_emb_top1_equal": 0.2720000147819519, + "eval_ag_news_emb_top1_equal_sem": 0.019920483557355567, + "eval_ag_news_exact_match": 0.0, + "eval_ag_news_exact_match_sem": 0.0, + "eval_ag_news_loss": 3.615370512008667, + "eval_ag_news_n_ngrams_match_1": 12.698, + "eval_ag_news_n_ngrams_match_2": 2.48, + "eval_ag_news_n_ngrams_match_3": 0.602, + "eval_ag_news_num_pred_words": 37.68, + "eval_ag_news_num_true_words": 39.994, + "eval_ag_news_perplexity": 37.16511349581766, + "eval_ag_news_pred_num_tokens": 55.8203125, + "eval_ag_news_rouge_score": 0.35895464585649733, + "eval_ag_news_runtime": 7.5991, + "eval_ag_news_samples_per_second": 65.798, + "eval_ag_news_steps_per_second": 0.132, + "eval_ag_news_token_set_f1": 0.34484115121913256, + "eval_ag_news_token_set_f1_sem": 0.004831306493464716, + "eval_ag_news_token_set_precision": 0.3160684717490578, + "eval_ag_news_token_set_recall": 0.39293489358284867, + "eval_ag_news_true_num_tokens": 56.09375, + "step": 2232 + }, + { + "epoch": 9.83, + "eval_anthropic_toxic_prompts_accuracy": 0.10490625, + "eval_anthropic_toxic_prompts_bleu_score": 4.10741998542506, + "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14748491850601175, + "eval_anthropic_toxic_prompts_emb_cos_sim": 0.692173421382904, + "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.004653590795083951, + "eval_anthropic_toxic_prompts_emb_top1_equal": 0.17000000178813934, + "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.016815633120741882, + "eval_anthropic_toxic_prompts_exact_match": 0.0, + "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, + "eval_anthropic_toxic_prompts_loss": 3.105919361114502, + "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.944, + "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78, + "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.616, + "eval_anthropic_toxic_prompts_num_pred_words": 33.656, + "eval_anthropic_toxic_prompts_num_true_words": 14.584, + "eval_anthropic_toxic_prompts_perplexity": 22.329738634739794, + "eval_anthropic_toxic_prompts_pred_num_tokens": 48.140625, + "eval_anthropic_toxic_prompts_rouge_score": 0.27032811691109593, + "eval_anthropic_toxic_prompts_runtime": 8.4776, + "eval_anthropic_toxic_prompts_samples_per_second": 58.979, + "eval_anthropic_toxic_prompts_steps_per_second": 0.118, + "eval_anthropic_toxic_prompts_token_set_f1": 0.33804776599997466, + "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005822921546573262, + "eval_anthropic_toxic_prompts_token_set_precision": 0.43360700078282016, + "eval_anthropic_toxic_prompts_token_set_recall": 0.3003765734989987, + "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, + "step": 2232 + }, + { + "epoch": 9.83, + "eval_arxiv_accuracy": 0.42009375, + "eval_arxiv_bleu_score": 4.3336030432396875, + "eval_arxiv_bleu_score_sem": 0.11746066880184015, + "eval_arxiv_emb_cos_sim": 0.7599811553955078, + "eval_arxiv_emb_cos_sim_sem": 0.0044633248783609684, + "eval_arxiv_emb_top1_equal": 0.24400000274181366, + "eval_arxiv_emb_top1_equal_sem": 0.0192267343061996, + "eval_arxiv_exact_match": 0.0, + "eval_arxiv_exact_match_sem": 0.0, + "eval_arxiv_loss": 3.081174373626709, + "eval_arxiv_n_ngrams_match_1": 15.182, + "eval_arxiv_n_ngrams_match_2": 2.782, + "eval_arxiv_n_ngrams_match_3": 0.62, + "eval_arxiv_num_pred_words": 36.522, + "eval_arxiv_num_true_words": 43.592, + "eval_arxiv_perplexity": 21.783969900093172, + "eval_arxiv_pred_num_tokens": 58.0078125, + "eval_arxiv_rouge_score": 0.3726513634142933, + "eval_arxiv_runtime": 7.4, + "eval_arxiv_samples_per_second": 67.568, + "eval_arxiv_steps_per_second": 0.135, + "eval_arxiv_token_set_f1": 0.37323648339023796, + "eval_arxiv_token_set_f1_sem": 0.004144558142027552, + "eval_arxiv_token_set_precision": 0.3254934337714673, + "eval_arxiv_token_set_recall": 0.4520853257568568, + "eval_arxiv_true_num_tokens": 64.0, + "step": 2232 + }, + { + "epoch": 9.83, + "eval_python_code_alpaca_accuracy": 0.1516875, + "eval_python_code_alpaca_bleu_score": 5.6993663718319345, + "eval_python_code_alpaca_bleu_score_sem": 0.1847532408187973, + "eval_python_code_alpaca_emb_cos_sim": 0.7660696506500244, + "eval_python_code_alpaca_emb_cos_sim_sem": 0.003645446597086736, + "eval_python_code_alpaca_emb_top1_equal": 0.17800000309944153, + "eval_python_code_alpaca_emb_top1_equal_sem": 0.01712362329538143, + "eval_python_code_alpaca_exact_match": 0.0, + "eval_python_code_alpaca_exact_match_sem": 0.0, + "eval_python_code_alpaca_loss": 2.730116367340088, + "eval_python_code_alpaca_n_ngrams_match_1": 9.632, + "eval_python_code_alpaca_n_ngrams_match_2": 2.586, + "eval_python_code_alpaca_n_ngrams_match_3": 0.846, + "eval_python_code_alpaca_num_pred_words": 33.144, + "eval_python_code_alpaca_num_true_words": 18.128, + "eval_python_code_alpaca_perplexity": 15.334671371003653, + "eval_python_code_alpaca_pred_num_tokens": 53.25, + "eval_python_code_alpaca_rouge_score": 0.412255658616845, + "eval_python_code_alpaca_runtime": 7.191, + "eval_python_code_alpaca_samples_per_second": 69.531, + "eval_python_code_alpaca_steps_per_second": 0.139, + "eval_python_code_alpaca_token_set_f1": 0.47325705061843437, + "eval_python_code_alpaca_token_set_f1_sem": 0.005397233971667218, + "eval_python_code_alpaca_token_set_precision": 0.5404911472732544, + "eval_python_code_alpaca_token_set_recall": 0.43638181985541336, + "eval_python_code_alpaca_true_num_tokens": 23.359375, + "step": 2232 + }, + { + "epoch": 9.83, + "eval_wikibio_accuracy": 0.36203125, + "eval_wikibio_bleu_score": 5.0191180628988965, + "eval_wikibio_bleu_score_sem": 0.19563339003404953, + "eval_wikibio_emb_cos_sim": 0.7325459718704224, + "eval_wikibio_emb_cos_sim_sem": 0.005758308708647951, + "eval_wikibio_emb_top1_equal": 0.164000004529953, + "eval_wikibio_emb_top1_equal_sem": 0.016575810354078253, + "eval_wikibio_exact_match": 0.0, + "eval_wikibio_exact_match_sem": 0.0, + "eval_wikibio_loss": 3.3490078449249268, + "eval_wikibio_n_ngrams_match_1": 8.456, + "eval_wikibio_n_ngrams_match_2": 2.51, + "eval_wikibio_n_ngrams_match_3": 0.916, + "eval_wikibio_num_pred_words": 31.194, + "eval_wikibio_num_true_words": 29.41, + "eval_wikibio_perplexity": 28.47446853593286, + "eval_wikibio_pred_num_tokens": 61.1171875, + "eval_wikibio_rouge_score": 0.31073899515076675, + "eval_wikibio_runtime": 7.1404, + "eval_wikibio_samples_per_second": 70.024, + "eval_wikibio_steps_per_second": 0.14, + "eval_wikibio_token_set_f1": 0.28317539584992235, + "eval_wikibio_token_set_f1_sem": 0.006279021614211278, + "eval_wikibio_token_set_precision": 0.27439931371489285, + "eval_wikibio_token_set_recall": 0.31139234908194147, + "eval_wikibio_true_num_tokens": 61.1328125, + "step": 2232 + }, + { + "epoch": 9.83, + "eval_bias-bios_accuracy": 0.52453125, + "eval_bias-bios_bleu_score": 19.547165413925843, + "eval_bias-bios_bleu_score_sem": 0.8244631545646848, + "eval_bias-bios_emb_cos_sim": 0.8838831186294556, + "eval_bias-bios_emb_cos_sim_sem": 0.002780134283669016, + "eval_bias-bios_emb_top1_equal": 0.3479999899864197, + "eval_bias-bios_emb_top1_equal_sem": 0.021323727763296864, + "eval_bias-bios_exact_match": 0.002, + "eval_bias-bios_exact_match_sem": 0.002, + "eval_bias-bios_loss": 1.7094721794128418, + "eval_bias-bios_n_ngrams_match_1": 22.43, + "eval_bias-bios_n_ngrams_match_2": 10.338, + "eval_bias-bios_n_ngrams_match_3": 5.81, + "eval_bias-bios_num_pred_words": 41.41, + "eval_bias-bios_num_true_words": 40.706, + "eval_bias-bios_perplexity": 5.526043947964353, + "eval_bias-bios_pred_num_tokens": 57.0390625, + "eval_bias-bios_rouge_score": 0.5483517989330391, + "eval_bias-bios_runtime": 8.0059, + "eval_bias-bios_samples_per_second": 62.454, + "eval_bias-bios_steps_per_second": 0.125, + "eval_bias-bios_token_set_f1": 0.5685466671620878, + "eval_bias-bios_token_set_f1_sem": 0.006663139402344389, + "eval_bias-bios_token_set_precision": 0.547655818008668, + "eval_bias-bios_token_set_recall": 0.60030696946573, + "eval_bias-bios_true_num_tokens": 55.1796875, + "step": 2232 + } + ], + "logging_steps": 12, + "max_steps": 2270, + "num_train_epochs": 10, + "save_steps": 62, + "total_flos": 1.1894837433768346e+17, + "trial_name": null, + "trial_params": null +}